317 files changed, 80193 insertions, 0 deletions
diff --git a/third_party/bearssl/LICENSE.txt b/third_party/bearssl/LICENSE.txt
new file mode 100644
index 0000000..0885020
--- /dev/null
+++ b/third_party/bearssl/LICENSE.txt
@@ -0,0 +1,21 @@
+Copyright (c) 2016 Thomas Pornin <[email protected]>
+
+Permission is hereby granted, free of charge, to any person obtaining 
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be 
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/third_party/bearssl/README.txt b/third_party/bearssl/README.txt
new file mode 100644
index 0000000..0cb5288
--- /dev/null
+++ b/third_party/bearssl/README.txt
@@ -0,0 +1,136 @@
+# Documentation
+
+The most up-to-date documentation is supposed to be available on the
+[BearSSL Web site](https://www.bearssl.org/).
+
+# Disclaimer
+
+BearSSL is considered beta-level software. Most planned functionalities
+are implemented; new evolution may still break both source and binary
+compatibility.
+
+Using BearSSL for production purposes would be a relatively bold but not
+utterly crazy move. BearSSL is free, open-source software, provided
+without any guarantee of fitness or reliability. That being said, it
+appears to behave properly, and only minor issues have been found (and
+fixed) so far. You are encourage to inspect its API and code for
+learning, testing and possibly contributing.
+
+The usage license is explicited in the `LICENSE.txt` file. This is the
+"MIT license". It can be summarised in the following way:
+
+ - You can use and reuse the library as you wish, and modify it, and
+   integrate it in your own code, and distribute it as is or in any
+   modified form, and so on.
+
+ - The only obligation that the license terms put upon you is that you
+   acknowledge and make it clear that if anything breaks, it is not my
+   fault, and I am not liable for anything, regardless of the type and
+   amount of collateral damage. The license terms say that the copyright
+   notice "shall be included in all copies or substantial portions of
+   the Software": this is how the disclaimer is "made explicit".
+   Basically, I have put it in every source file, so just keep it there.
+
+# Installation
+
+Right now, BearSSL is a simple library, along with a few test and debug
+command-line tools. There is no installer yet. The library _can_ be
+compiled as a shared library on some systems, but since the binary API
+is not fully stabilised, this is not a very good idea to do that right
+now.
+
+To compile the code, just type `make`. This will try to use sane
+"default" values. On a Windows system with Visual Studio, run a console
+with the environment initialised for a specific version of the C compiler,
+and type `nmake`.
+
+To override the default settings, create a custom configuration file in
+the `conf` directory, and invoke `make` (or `nmake`) with an explicit
+`CONF=` parameter. For instance, to use the provided `samd20.mk`
+configuration file (that targets cross-compilation for an Atmel board
+that features a Cortex-M0+ CPU), type:
+
+    make CONF=samd20
+
+The `conf/samd20.mk` file includes the `Unix.mk` file and then overrides
+some of the parameters, including the destination directory. Any custom
+configuration can be made the same way.
+
+Some compile-time options can be set through macros, either on the
+compiler command-line, or in the `src/config.h` file. See the comments
+in that file. Some settings are autodetected but they can still be
+explicitly overridden.
+
+When compilation is done, the library (static and DLL, when appropriate)
+and the command-line tools can be found in the designated build
+directory (by default named `build`). The public headers (to be used
+by applications linked against BearSSL) are in the `inc/` directory.
+
+To run the tests:
+
+  - `testcrypto all` runs the cryptographic tests (test vectors on all
+    implemented cryptogaphic functions). It can be slow. You can also
+    run a selection of the tests by providing their names (run
+    `testcrypto` without any parameter to see the available names).
+
+  - `testspeed all` runs a number of performance benchmarks, there again
+    on cryptographic functions. It gives a taste of how things go on the
+    current platform. As for `testcrypto`, specific named benchmarks can
+    be executed.
+
+  - `testx509` runs X.509 validation tests. The test certificates are
+    all in `test/x509/`.
+
+The `brssl` command-line tool produced in the build directory is a
+stand-alone binary. It can exercise some of the functionalities of
+BearSSL, in particular running a test SSL client or server. It is not
+meant for production purposes (e.g. the SSL client has a mode where it
+disregards the inability to validate the server's certificate, which is
+inherently unsafe, but convenient for debug).
+
+**Using the library** means writing some application code that invokes
+it, and linking with the static library. The header files are all in the
+`inc` directory; copy them wherever makes sense (e.g. in the
+`/usr/local/include` directory). The library itself (`libbearssl.a`) is
+what you link against.
+
+Alternatively, you may want to copy the source files directly into your
+own application code. This will make integrating ulterior versions of
+BearSSL more difficult. If you still want to go down that road, then
+simply copy all the `*.h` and `*.c` files from the `src` and `inc`
+directories into your application source code. In the BearSSL source
+archive, the source files are segregated into various sub-directories,
+but this is for my convenience only. There is no technical requirement
+for that, and all files can be dumped together in a simple directory.
+
+Dependencies are simple and systematic:
+
+  - Each `*.c` file includes `inner.h`
+  - `inner.h` includes `config.h` and `bearssl.h`
+  - `bearssl.h` includes the other `bearssl_*.h`
+
+# Versioning
+
+I follow this simple version numbering scheme:
+
+ - Version numbers are `x.y` or `x.y.z` where `x`, `y` and `z` are
+   decimal integers (possibly greater than 10). When the `.z` part is
+   missing, it is equivalent to `.0`.
+
+ - Backward compatibility is maintained, at both source and binary levels,
+   for each major version: this means that if some application code was
+   designed for version `x.y`, then it should compile, link and run
+   properly with any version `x.y'` for any `y'` greater than `y`.
+
+   The major version `0` is an exception. You shall not expect that any
+   version that starts with `0.` offers any kind of compatibility,
+   either source or binary, with any other `0.` version. (Of course I
+   will try to maintain some decent level of source compatibility, but I
+   make no promise in that respect. Since the API uses caller-allocated
+   context structures, I already know that binary compatibility _will_
+   be broken.)
+
+ - Sub-versions (the `y` part) are about added functionality. That is,
+   it can be expected that `1.3` will contain some extra functions when
+   compared to `1.2`. The next version level (the `z` part) is for
+   bugfixes that do not add any functionality.
diff --git a/third_party/bearssl/inc/bearssl.h b/third_party/bearssl/inc/bearssl.h
new file mode 100644
index 0000000..310edb2
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_H__
+#define BR_BEARSSL_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+/** \mainpage BearSSL API
+ *
+ * # API Layout
+ *
+ * The functions and structures defined by the BearSSL API are located
+ * in various header files:
+ *
+ * | Header file     | Elements                                          |
+ * | :-------------- | :------------------------------------------------ |
+ * | bearssl_hash.h  | Hash functions                                    |
+ * | bearssl_hmac.h  | HMAC                                              |
+ * | bearssl_kdf.h   | Key Derivation Functions                          |
+ * | bearssl_rand.h  | Pseudorandom byte generators                      |
+ * | bearssl_prf.h   | PRF implementations (for SSL/TLS)                 |
+ * | bearssl_block.h | Symmetric encryption                              |
+ * | bearssl_aead.h  | AEAD algorithms (combined encryption + MAC)       |
+ * | bearssl_rsa.h   | RSA encryption and signatures                     |
+ * | bearssl_ec.h    | Elliptic curves support (including ECDSA)         |
+ * | bearssl_ssl.h   | SSL/TLS engine interface                          |
+ * | bearssl_x509.h  | X.509 certificate decoding and validation         |
+ * | bearssl_pem.h   | Base64/PEM decoding support functions             |
+ *
+ * Applications using BearSSL are supposed to simply include `bearssl.h`
+ * as follows:
+ *
+ *     #include <bearssl.h>
+ *
+ * The `bearssl.h` file itself includes all the other header files. It is
+ * possible to include specific header files, but it has no practical
+ * advantage for the application. The API is separated into separate
+ * header files only for documentation convenience.
+ *
+ *
+ * # Conventions
+ *
+ * ## MUST and SHALL
+ *
+ * In all descriptions, the usual "MUST", "SHALL", "MAY",... terminology
+ * is used. Failure to meet requirements expressed with a "MUST" or
+ * "SHALL" implies undefined behaviour, which means that segmentation
+ * faults, buffer overflows, and other similar adverse events, may occur.
+ *
+ * In general, BearSSL is not very forgiving of programming errors, and
+ * does not include much failsafes or error reporting when the problem
+ * does not arise from external transient conditions, and can be fixed
+ * only in the application code. This is done so in order to make the
+ * total code footprint lighter.
+ *
+ *
+ * ## `NULL` values
+ *
+ * Function parameters with a pointer type shall not be `NULL` unless
+ * explicitly authorised by the documentation. As an exception, when
+ * the pointer aims at a sequence of bytes and is accompanied with
+ * a length parameter, and the length is zero (meaning that there is
+ * no byte at all to retrieve), then the pointer may be `NULL` even if
+ * not explicitly allowed.
+ *
+ *
+ * ## Memory Allocation
+ *
+ * BearSSL does not perform dynamic memory allocation. This implies that
+ * for any functionality that requires a non-transient state, the caller
+ * is responsible for allocating the relevant context structure. Such
+ * allocation can be done in any appropriate area, including static data
+ * segments, the heap, and the stack, provided that proper alignment is
+ * respected. The header files define these context structures
+ * (including size and contents), so the C compiler should handle
+ * alignment automatically.
+ *
+ * Since there is no dynamic resource allocation, there is also nothing to
+ * release. When the calling code is done with a BearSSL feature, it
+ * may simple release the context structures it allocated itself, with
+ * no "close function" to call. If the context structures were allocated
+ * on the stack (as local variables), then even that release operation is
+ * implicit.
+ *
+ *
+ * ## Structure Contents
+ *
+ * Except when explicitly indicated, structure contents are opaque: they
+ * are included in the header files so that calling code may know the
+ * structure sizes and alignment requirements, but callers SHALL NOT
+ * access individual fields directly. For fields that are supposed to
+ * be read from or written to, the API defines accessor functions (the
+ * simplest of these accessor functions are defined as `static inline`
+ * functions, and the C compiler will optimise them away).
+ *
+ *
+ * # API Usage
+ *
+ * BearSSL usage for running a SSL/TLS client or server is described
+ * on the [BearSSL Web site](https://www.bearssl.org/api1.html). The
+ * BearSSL source archive also comes with sample code.
+ */
+
+#include "bearssl_hash.h"
+#include "bearssl_hmac.h"
+#include "bearssl_kdf.h"
+#include "bearssl_rand.h"
+#include "bearssl_prf.h"
+#include "bearssl_block.h"
+#include "bearssl_aead.h"
+#include "bearssl_rsa.h"
+#include "bearssl_ec.h"
+#include "bearssl_ssl.h"
+#include "bearssl_x509.h"
+#include "bearssl_pem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Type for a configuration option.
+ *
+ * A "configuration option" is a value that is selected when the BearSSL
+ * library itself is compiled. Most options are boolean; their value is
+ * then either 1 (option is enabled) or 0 (option is disabled). Some
+ * values have other integer values. Option names correspond to macro
+ * names. Some of the options can be explicitly set in the internal
+ * `"config.h"` file.
+ */
+typedef struct {
+	/** \brief Configurable option name. */
+	const char *name;
+	/** \brief Configurable option value. */
+	long value;
+} br_config_option;
+
+/** \brief Get configuration report.
+ *
+ * This function returns compiled configuration options, each as a
+ * 'long' value. Names match internal macro names, in particular those
+ * that can be set in the `"config.h"` inner file. For boolean options,
+ * the numerical value is 1 if enabled, 0 if disabled. For maximum
+ * key sizes, values are expressed in bits.
+ *
+ * The returned array is terminated by an entry whose `name` is `NULL`.
+ *
+ * \return  the configuration report.
+ */
+const br_config_option *br_get_config(void);
+
+/* ======================================================================= */
+
+/** \brief Version feature: support for time callback. */
+#define BR_FEATURE_X509_TIME_CALLBACK   1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_aead.h b/third_party/bearssl/inc/bearssl_aead.h
new file mode 100644
index 0000000..8e35a1f
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_aead.h
@@ -0,0 +1,1059 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_AEAD_H__
+#define BR_BEARSSL_AEAD_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_block.h"
+#include "bearssl_hash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_aead.h
+ *
+ * # Authenticated Encryption with Additional Data
+ *
+ * This file documents the API for AEAD encryption.
+ *
+ *
+ * ## Procedural API
+ *
+ * An AEAD algorithm processes messages and provides confidentiality
+ * (encryption) and checked integrity (MAC). It uses the following
+ * parameters:
+ *
+ *   - A symmetric key. Exact size depends on the AEAD algorithm.
+ *
+ *   - A nonce (IV). Size depends on the AEAD algorithm; for most
+ *     algorithms, it is crucial for security that any given nonce
+ *     value is never used twice for the same key and distinct
+ *     messages.
+ *
+ *   - Data to encrypt and protect.
+ *
+ *   - Additional authenticated data, which is covered by the MAC but
+ *     otherwise left untouched (i.e. not encrypted).
+ *
+ * The AEAD algorithm encrypts the data, and produces an authentication
+ * tag. It is assumed that the encrypted data, the tag, the additional
+ * authenticated data and the nonce are sent to the receiver; the
+ * additional data and the nonce may be implicit (e.g. using elements of
+ * the underlying transport protocol, such as record sequence numbers).
+ * The receiver will recompute the tag value and compare it with the one
+ * received; if they match, then the data is correct, and can be
+ * decrypted and used; otherwise, at least one of the elements was
+ * altered in transit, normally leading to wholesale rejection of the
+ * complete message.
+ *
+ * For each AEAD algorithm, identified by a symbolic name (hereafter
+ * denoted as "`xxx`"), the following functions are defined:
+ *
+ *   - `br_xxx_init()`
+ *
+ *     Initialise the AEAD algorithm, on a provided context structure.
+ *     Exact parameters depend on the algorithm, and may include
+ *     pointers to extra implementations and context structures. The
+ *     secret key is provided at this point, either directly or
+ *     indirectly.
+ *
+ *   - `br_xxx_reset()`
+ *
+ *     Start a new AEAD computation. The nonce value is provided as
+ *     parameter to this function.
+ *
+ *   - `br_xxx_aad_inject()`
+ *
+ *     Inject some additional authenticated data. Additional data may
+ *     be provided in several chunks of arbitrary length.
+ *
+ *   - `br_xxx_flip()`
+ *
+ *     This function MUST be called after injecting all additional
+ *     authenticated data, and before beginning to encrypt the plaintext
+ *     (or decrypt the ciphertext).
+ *
+ *   - `br_xxx_run()`
+ *
+ *     Process some plaintext (to encrypt) or ciphertext (to decrypt).
+ *     Encryption/decryption is done in place. Data may be provided in
+ *     several chunks of arbitrary length.
+ *
+ *   - `br_xxx_get_tag()`
+ *
+ *     Compute the authentication tag. All message data (encrypted or
+ *     decrypted) must have been injected at that point. Also, this
+ *     call may modify internal context elements, so it may be called
+ *     only once for a given AEAD computation.
+ *
+ *   - `br_xxx_check_tag()`
+ *
+ *     An alternative to `br_xxx_get_tag()`, meant to be used by the
+ *     receiver: the authentication tag is internally recomputed, and
+ *     compared with the one provided as parameter.
+ *
+ * This API makes the following assumptions on the AEAD algorithm:
+ *
+ *   - Encryption does not expand the size of the ciphertext; there is
+ *     no padding. This is true of most modern AEAD modes such as GCM.
+ *
+ *   - The additional authenticated data must be processed first,
+ *     before the encrypted/decrypted data.
+ *
+ *   - Nonce, plaintext and additional authenticated data all consist
+ *     in an integral number of bytes. There is no provision to use
+ *     elements whose length in bits is not a multiple of 8.
+ *
+ * Each AEAD algorithm has its own requirements and limits on the sizes
+ * of additional data and plaintext. This API does not provide any
+ * way to report invalid usage; it is up to the caller to ensure that
+ * the provided key, nonce, and data elements all fit the algorithm's
+ * requirements.
+ *
+ *
+ * ## Object-Oriented API
+ *
+ * Each context structure begins with a field (called `vtable`) that
+ * points to an instance of a structure that references the relevant
+ * functions through pointers. Each such structure contains the
+ * following:
+ *
+ *   - `reset`
+ *
+ *     Pointer to the reset function, that allows starting a new
+ *     computation.
+ *
+ *   - `aad_inject`
+ *
+ *     Pointer to the additional authenticated data injection function.
+ *
+ *   - `flip`
+ *
+ *     Pointer to the function that transitions from additional data
+ *     to main message data processing.
+ *
+ *   - `get_tag`
+ *
+ *     Pointer to the function that computes and returns the tag.
+ *
+ *   - `check_tag`
+ *
+ *     Pointer to the function that computes and verifies the tag against
+ *     a received value.
+ *
+ * Note that there is no OOP method for context initialisation: the
+ * various AEAD algorithms have different requirements that would not
+ * map well to a single initialisation API.
+ *
+ * The OOP API is not provided for CCM, due to its specific requirements
+ * (length of plaintext must be known in advance).
+ */
+
+/**
+ * \brief Class type of an AEAD algorithm.
+ */
+typedef struct br_aead_class_ br_aead_class;
+struct br_aead_class_ {
+
+	/**
+	 * \brief Size (in bytes) of authentication tags created by
+	 * this AEAD algorithm.
+	 */
+	size_t tag_size;
+
+	/**
+	 * \brief Reset an AEAD context.
+	 *
+	 * This function resets an already initialised AEAD context for
+	 * a new computation run. Implementations and keys are
+	 * conserved. This function can be called at any time; it
+	 * cancels any ongoing AEAD computation that uses the provided
+	 * context structure.
+
+	 * The provided IV is a _nonce_. Each AEAD algorithm has its
+	 * own requirements on IV size and contents; for most of them,
+	 * it is crucial to security that each nonce value is used
+	 * only once for a given secret key.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param iv    AEAD nonce to use.
+	 * \param len   AEAD nonce length (in bytes).
+	 */
+	void (*reset)(const br_aead_class **cc, const void *iv, size_t len);
+
+	/**
+	 * \brief Inject additional authenticated data.
+	 *
+	 * The provided data is injected into a running AEAD
+	 * computation. Additional data must be injected _before_ the
+	 * call to `flip()`. Additional data can be injected in several
+	 * chunks of arbitrary length.
+	 *
+	 * \param cc     AEAD context structure.
+	 * \param data   pointer to additional authenticated data.
+	 * \param len    length of additional authenticated data (in bytes).
+	 */
+	void (*aad_inject)(const br_aead_class **cc,
+		const void *data, size_t len);
+
+	/**
+	 * \brief Finish injection of additional authenticated data.
+	 *
+	 * This function MUST be called before beginning the actual
+	 * encryption or decryption (with `run()`), even if no
+	 * additional authenticated data was injected. No additional
+	 * authenticated data may be injected after this function call.
+	 *
+	 * \param cc   AEAD context structure.
+	 */
+	void (*flip)(const br_aead_class **cc);
+
+	/**
+	 * \brief Encrypt or decrypt some data.
+	 *
+	 * Data encryption or decryption can be done after `flip()` has
+	 * been called on the context. If `encrypt` is non-zero, then
+	 * the provided data shall be plaintext, and it is encrypted in
+	 * place. Otherwise, the data shall be ciphertext, and it is
+	 * decrypted in place.
+	 *
+	 * Data may be provided in several chunks of arbitrary length.
+	 *
+	 * \param cc        AEAD context structure.
+	 * \param encrypt   non-zero for encryption, zero for decryption.
+	 * \param data      data to encrypt or decrypt.
+	 * \param len       data length (in bytes).
+	 */
+	void (*run)(const br_aead_class **cc, int encrypt,
+		void *data, size_t len);
+
+	/**
+	 * \brief Compute authentication tag.
+	 *
+	 * Compute the AEAD authentication tag. The tag length depends
+	 * on the AEAD algorithm; it is written in the provided `tag`
+	 * buffer. This call terminates the AEAD run: no data may be
+	 * processed with that AEAD context afterwards, until `reset()`
+	 * is called to initiate a new AEAD run.
+	 *
+	 * The tag value must normally be sent along with the encrypted
+	 * data. When decrypting, the tag value must be recomputed and
+	 * compared with the received tag: if the two tag values differ,
+	 * then either the tag or the encrypted data was altered in
+	 * transit. As an alternative to this function, the
+	 * `check_tag()` function may be used to compute and check the
+	 * tag value.
+	 *
+	 * Tag length depends on the AEAD algorithm.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param tag   destination buffer for the tag.
+	 */
+	void (*get_tag)(const br_aead_class **cc, void *tag);
+
+	/**
+	 * \brief Compute and check authentication tag.
+	 *
+	 * This function is an alternative to `get_tag()`, and is
+	 * normally used on the receiving end (i.e. when decrypting
+	 * messages). The tag value is recomputed and compared with the
+	 * provided tag value. If they match, 1 is returned; on
+	 * mismatch, 0 is returned. A returned value of 0 means that the
+	 * data or the tag was altered in transit, normally leading to
+	 * wholesale rejection of the complete message.
+	 *
+	 * Tag length depends on the AEAD algorithm.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param tag   tag value to compare with.
+	 * \return  1 on success (exact match of tag value), 0 otherwise.
+	 */
+	uint32_t (*check_tag)(const br_aead_class **cc, const void *tag);
+
+	/**
+	 * \brief Compute authentication tag (with truncation).
+	 *
+	 * This function is similar to `get_tag()`, except that the tag
+	 * length is provided. Some AEAD algorithms allow several tag
+	 * lengths, usually by truncating the normal tag. Shorter tags
+	 * mechanically increase success probability of forgeries.
+	 * The range of allowed tag lengths depends on the algorithm.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param tag   destination buffer for the tag.
+	 * \param len   tag length (in bytes).
+	 */
+	void (*get_tag_trunc)(const br_aead_class **cc, void *tag, size_t len);
+
+	/**
+	 * \brief Compute and check authentication tag (with truncation).
+	 *
+	 * This function is similar to `check_tag()` except that it
+	 * works over an explicit tag length. See `get_tag()` for a
+	 * discussion of explicit tag lengths; the range of allowed tag
+	 * lengths depends on the algorithm.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param tag   tag value to compare with.
+	 * \param len   tag length (in bytes).
+	 * \return  1 on success (exact match of tag value), 0 otherwise.
+	 */
+	uint32_t (*check_tag_trunc)(const br_aead_class **cc,
+		const void *tag, size_t len);
+};
+
+/**
+ * \brief Context structure for GCM.
+ *
+ * GCM is an AEAD mode that combines a block cipher in CTR mode with a
+ * MAC based on GHASH, to provide authenticated encryption:
+ *
+ *   - Any block cipher with 16-byte blocks can be used with GCM.
+ *
+ *   - The nonce can have any length, from 0 up to 2^64-1 bits; however,
+ *     96-bit nonces (12 bytes) are recommended (nonces with a length
+ *     distinct from 12 bytes are internally hashed, which risks reusing
+ *     nonce value with a small but not always negligible probability).
+ *
+ *   - Additional authenticated data may have length up to 2^64-1 bits.
+ *
+ *   - Message length may range up to 2^39-256 bits at most.
+ *
+ *   - The authentication tag has length 16 bytes.
+ *
+ * The GCM initialisation function receives as parameter an
+ * _initialised_ block cipher implementation context, with the secret
+ * key already set. A pointer to that context will be kept within the
+ * GCM context structure. It is up to the caller to allocate and
+ * initialise that block cipher context.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_aead_class *vtable;
+
+#ifndef BR_DOXYGEN_IGNORE
+	const br_block_ctr_class **bctx;
+	br_ghash gh;
+	unsigned char h[16];
+	unsigned char j0_1[12];
+	unsigned char buf[16];
+	unsigned char y[16];
+	uint32_t j0_2, jc;
+	uint64_t count_aad, count_ctr;
+#endif
+} br_gcm_context;
+
+/**
+ * \brief Initialize a GCM context.
+ *
+ * A block cipher implementation, with its initialised context structure,
+ * is provided. The block cipher MUST use 16-byte blocks in CTR mode,
+ * and its secret key MUST have been already set in the provided context.
+ * A GHASH implementation must also be provided. The parameters are linked
+ * in the GCM context.
+ *
+ * After this function has been called, the `br_gcm_reset()` function must
+ * be called, to provide the IV for GCM computation.
+ *
+ * \param ctx    GCM context structure.
+ * \param bctx   block cipher context (already initialised with secret key).
+ * \param gh     GHASH implementation.
+ */
+void br_gcm_init(br_gcm_context *ctx,
+	const br_block_ctr_class **bctx, br_ghash gh);
+
+/**
+ * \brief Reset a GCM context.
+ *
+ * This function resets an already initialised GCM context for a new
+ * computation run. Implementations and keys are conserved. This function
+ * can be called at any time; it cancels any ongoing GCM computation that
+ * uses the provided context structure.
+ *
+ * The provided IV is a _nonce_. It is critical to GCM security that IV
+ * values are not repeated for the same encryption key. IV can have
+ * arbitrary length (up to 2^64-1 bits), but the "normal" length is
+ * 96 bits (12 bytes).
+ *
+ * \param ctx   GCM context structure.
+ * \param iv    GCM nonce to use.
+ * \param len   GCM nonce length (in bytes).
+ */
+void br_gcm_reset(br_gcm_context *ctx, const void *iv, size_t len);
+
+/**
+ * \brief Inject additional authenticated data into GCM.
+ *
+ * The provided data is injected into a running GCM computation. Additional
+ * data must be injected _before_ the call to `br_gcm_flip()`.
+ * Additional data can be injected in several chunks of arbitrary length;
+ * the maximum total size of additional authenticated data is 2^64-1
+ * bits.
+ *
+ * \param ctx    GCM context structure.
+ * \param data   pointer to additional authenticated data.
+ * \param len    length of additional authenticated data (in bytes).
+ */
+void br_gcm_aad_inject(br_gcm_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Finish injection of additional authenticated data into GCM.
+ *
+ * This function MUST be called before beginning the actual encryption
+ * or decryption (with `br_gcm_run()`), even if no additional authenticated
+ * data was injected. No additional authenticated data may be injected
+ * after this function call.
+ *
+ * \param ctx   GCM context structure.
+ */
+void br_gcm_flip(br_gcm_context *ctx);
+
+/**
+ * \brief Encrypt or decrypt some data with GCM.
+ *
+ * Data encryption or decryption can be done after `br_gcm_flip()`
+ * has been called on the context. If `encrypt` is non-zero, then the
+ * provided data shall be plaintext, and it is encrypted in place.
+ * Otherwise, the data shall be ciphertext, and it is decrypted in place.
+ *
+ * Data may be provided in several chunks of arbitrary length. The maximum
+ * total length for data is 2^39-256 bits, i.e. about 65 gigabytes.
+ *
+ * \param ctx       GCM context structure.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ */
+void br_gcm_run(br_gcm_context *ctx, int encrypt, void *data, size_t len);
+
+/**
+ * \brief Compute GCM authentication tag.
+ *
+ * Compute the GCM authentication tag. The tag is a 16-byte value which
+ * is written in the provided `tag` buffer. This call terminates the
+ * GCM run: no data may be processed with that GCM context afterwards,
+ * until `br_gcm_reset()` is called to initiate a new GCM run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_gcm_check_tag()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   GCM context structure.
+ * \param tag   destination buffer for the tag (16 bytes).
+ */
+void br_gcm_get_tag(br_gcm_context *ctx, void *tag);
+
+/**
+ * \brief Compute and check GCM authentication tag.
+ *
+ * This function is an alternative to `br_gcm_get_tag()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * \param ctx   GCM context structure.
+ * \param tag   tag value to compare with (16 bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_gcm_check_tag(br_gcm_context *ctx, const void *tag);
+
+/**
+ * \brief Compute GCM authentication tag (with truncation).
+ *
+ * This function is similar to `br_gcm_get_tag()`, except that it allows
+ * the tag to be truncated to a smaller length. The intended tag length
+ * is provided as `len` (in bytes); it MUST be no more than 16, but
+ * it may be smaller. Note that decreasing tag length mechanically makes
+ * forgeries easier; NIST SP 800-38D specifies that the tag length shall
+ * lie between 12 and 16 bytes (inclusive), but may be truncated down to
+ * 4 or 8 bytes, for specific applications that can tolerate it. It must
+ * also be noted that successful forgeries leak information on the
+ * authentication key, making subsequent forgeries easier. Therefore,
+ * tag truncation, and in particular truncation to sizes lower than 12
+ * bytes, shall be envisioned only with great care.
+ *
+ * The tag is written in the provided `tag` buffer. This call terminates
+ * the GCM run: no data may be processed with that GCM context
+ * afterwards, until `br_gcm_reset()` is called to initiate a new GCM
+ * run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_gcm_check_tag_trunc()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   GCM context structure.
+ * \param tag   destination buffer for the tag.
+ * \param len   tag length (16 bytes or less).
+ */
+void br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len);
+
+/**
+ * \brief Compute and check GCM authentication tag (with truncation).
+ *
+ * This function is an alternative to `br_gcm_get_tag_trunc()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * Tag length MUST be 16 bytes or less. The normal GCM tag length is 16
+ * bytes. See `br_check_tag_trunc()` for some discussion on the potential
+ * perils of truncating authentication tags.
+ *
+ * \param ctx   GCM context structure.
+ * \param tag   tag value to compare with.
+ * \param len   tag length (in bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_gcm_check_tag_trunc(br_gcm_context *ctx,
+	const void *tag, size_t len);
+
+/**
+ * \brief Class instance for GCM.
+ */
+extern const br_aead_class br_gcm_vtable;
+
+/**
+ * \brief Context structure for EAX.
+ *
+ * EAX is an AEAD mode that combines a block cipher in CTR mode with
+ * CBC-MAC using the same block cipher and the same key, to provide
+ * authenticated encryption:
+ *
+ *   - Any block cipher with 16-byte blocks can be used with EAX
+ *     (technically, other block sizes are defined as well, but this
+ *     is not implemented by these functions; shorter blocks also
+ *     imply numerous security issues).
+ *
+ *   - The nonce can have any length, as long as nonce values are
+ *     not reused (thus, if nonces are randomly selected, the nonce
+ *     size should be such that reuse probability is negligible).
+ *
+ *   - Additional authenticated data length is unlimited.
+ *
+ *   - Message length is unlimited.
+ *
+ *   - The authentication tag has length 16 bytes.
+ *
+ * The EAX initialisation function receives as parameter an
+ * _initialised_ block cipher implementation context, with the secret
+ * key already set. A pointer to that context will be kept within the
+ * EAX context structure. It is up to the caller to allocate and
+ * initialise that block cipher context.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_aead_class *vtable;
+
+#ifndef BR_DOXYGEN_IGNORE
+	const br_block_ctrcbc_class **bctx;
+	unsigned char L2[16];
+	unsigned char L4[16];
+	unsigned char nonce[16];
+	unsigned char head[16];
+	unsigned char ctr[16];
+	unsigned char cbcmac[16];
+	unsigned char buf[16];
+	size_t ptr;
+#endif
+} br_eax_context;
+
+/**
+ * \brief EAX captured state.
+ *
+ * Some internal values computed by EAX may be captured at various
+ * points, and reused for another EAX run with the same secret key,
+ * for lower per-message overhead. Captured values do not depend on
+ * the nonce.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char st[3][16];
+#endif
+} br_eax_state;
+
+/**
+ * \brief Initialize an EAX context.
+ *
+ * A block cipher implementation, with its initialised context
+ * structure, is provided. The block cipher MUST use 16-byte blocks in
+ * CTR + CBC-MAC mode, and its secret key MUST have been already set in
+ * the provided context. The parameters are linked in the EAX context.
+ *
+ * After this function has been called, the `br_eax_reset()` function must
+ * be called, to provide the nonce for EAX computation.
+ *
+ * \param ctx    EAX context structure.
+ * \param bctx   block cipher context (already initialised with secret key).
+ */
+void br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx);
+
+/**
+ * \brief Capture pre-AAD state.
+ *
+ * This function precomputes key-dependent data, and stores it in the
+ * provided `st` structure. This structure should then be used with
+ * `br_eax_reset_pre_aad()`, or updated with `br_eax_get_aad_mac()`
+ * and then used with `br_eax_reset_post_aad()`.
+ *
+ * The EAX context structure is unmodified by this call.
+ *
+ * \param ctx   EAX context structure.
+ * \param st    recipient for captured state.
+ */
+void br_eax_capture(const br_eax_context *ctx, br_eax_state *st);
+
+/**
+ * \brief Reset an EAX context.
+ *
+ * This function resets an already initialised EAX context for a new
+ * computation run. Implementations and keys are conserved. This function
+ * can be called at any time; it cancels any ongoing EAX computation that
+ * uses the provided context structure.
+ *
+ * It is critical to EAX security that nonce values are not repeated for
+ * the same encryption key. Nonces can have arbitrary length. If nonces
+ * are randomly generated, then a nonce length of at least 128 bits (16
+ * bytes) is recommended, to make nonce reuse probability sufficiently
+ * low.
+ *
+ * \param ctx     EAX context structure.
+ * \param nonce   EAX nonce to use.
+ * \param len     EAX nonce length (in bytes).
+ */
+void br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len);
+
+/**
+ * \brief Reset an EAX context with a pre-AAD captured state.
+ *
+ * This function is an alternative to `br_eax_reset()`, that reuses a
+ * previously captured state structure for lower per-message overhead.
+ * The state should have been populated with `br_eax_capture_state()`
+ * but not updated with `br_eax_get_aad_mac()`.
+ *
+ * After this function is called, additional authenticated data MUST
+ * be injected. At least one byte of additional authenticated data
+ * MUST be provided with `br_eax_aad_inject()`; computation result will
+ * be incorrect if `br_eax_flip()` is called right away.
+ *
+ * After injection of the AAD and call to `br_eax_flip()`, at least
+ * one message byte must be provided. Empty messages are not supported
+ * with this reset mode.
+ *
+ * \param ctx     EAX context structure.
+ * \param st      pre-AAD captured state.
+ * \param nonce   EAX nonce to use.
+ * \param len     EAX nonce length (in bytes).
+ */
+void br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len);
+
+/**
+ * \brief Reset an EAX context with a post-AAD captured state.
+ *
+ * This function is an alternative to `br_eax_reset()`, that reuses a
+ * previously captured state structure for lower per-message overhead.
+ * The state should have been populated with `br_eax_capture_state()`
+ * and then updated with `br_eax_get_aad_mac()`.
+ *
+ * After this function is called, message data MUST be injected. The
+ * `br_eax_flip()` function MUST NOT be called. At least one byte of
+ * message data MUST be provided with `br_eax_run()`; empty messages
+ * are not supported with this reset mode.
+ *
+ * \param ctx     EAX context structure.
+ * \param st      post-AAD captured state.
+ * \param nonce   EAX nonce to use.
+ * \param len     EAX nonce length (in bytes).
+ */
+void br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len);
+
+/**
+ * \brief Inject additional authenticated data into EAX.
+ *
+ * The provided data is injected into a running EAX computation. Additional
+ * data must be injected _before_ the call to `br_eax_flip()`.
+ * Additional data can be injected in several chunks of arbitrary length;
+ * the total amount of additional authenticated data is unlimited.
+ *
+ * \param ctx    EAX context structure.
+ * \param data   pointer to additional authenticated data.
+ * \param len    length of additional authenticated data (in bytes).
+ */
+void br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Finish injection of additional authenticated data into EAX.
+ *
+ * This function MUST be called before beginning the actual encryption
+ * or decryption (with `br_eax_run()`), even if no additional authenticated
+ * data was injected. No additional authenticated data may be injected
+ * after this function call.
+ *
+ * \param ctx   EAX context structure.
+ */
+void br_eax_flip(br_eax_context *ctx);
+
+/**
+ * \brief Obtain a copy of the MAC on additional authenticated data.
+ *
+ * This function may be called only after `br_eax_flip()`; it copies the
+ * AAD-specific MAC value into the provided state. The MAC value depends
+ * on the secret key and the additional data itself, but not on the
+ * nonce. The updated state `st` is meant to be used as parameter for a
+ * further `br_eax_reset_post_aad()` call.
+ *
+ * \param ctx   EAX context structure.
+ * \param st    captured state to update.
+ */
+static inline void
+br_eax_get_aad_mac(const br_eax_context *ctx, br_eax_state *st)
+{
+	memcpy(st->st[1], ctx->head, sizeof ctx->head);
+}
+
+/**
+ * \brief Encrypt or decrypt some data with EAX.
+ *
+ * Data encryption or decryption can be done after `br_eax_flip()`
+ * has been called on the context. If `encrypt` is non-zero, then the
+ * provided data shall be plaintext, and it is encrypted in place.
+ * Otherwise, the data shall be ciphertext, and it is decrypted in place.
+ *
+ * Data may be provided in several chunks of arbitrary length.
+ *
+ * \param ctx       EAX context structure.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ */
+void br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len);
+
+/**
+ * \brief Compute EAX authentication tag.
+ *
+ * Compute the EAX authentication tag. The tag is a 16-byte value which
+ * is written in the provided `tag` buffer. This call terminates the
+ * EAX run: no data may be processed with that EAX context afterwards,
+ * until `br_eax_reset()` is called to initiate a new EAX run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_eax_check_tag()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   destination buffer for the tag (16 bytes).
+ */
+void br_eax_get_tag(br_eax_context *ctx, void *tag);
+
+/**
+ * \brief Compute and check EAX authentication tag.
+ *
+ * This function is an alternative to `br_eax_get_tag()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   tag value to compare with (16 bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_eax_check_tag(br_eax_context *ctx, const void *tag);
+
+/**
+ * \brief Compute EAX authentication tag (with truncation).
+ *
+ * This function is similar to `br_eax_get_tag()`, except that it allows
+ * the tag to be truncated to a smaller length. The intended tag length
+ * is provided as `len` (in bytes); it MUST be no more than 16, but
+ * it may be smaller. Note that decreasing tag length mechanically makes
+ * forgeries easier; NIST SP 800-38D specifies that the tag length shall
+ * lie between 12 and 16 bytes (inclusive), but may be truncated down to
+ * 4 or 8 bytes, for specific applications that can tolerate it. It must
+ * also be noted that successful forgeries leak information on the
+ * authentication key, making subsequent forgeries easier. Therefore,
+ * tag truncation, and in particular truncation to sizes lower than 12
+ * bytes, shall be envisioned only with great care.
+ *
+ * The tag is written in the provided `tag` buffer. This call terminates
+ * the EAX run: no data may be processed with that EAX context
+ * afterwards, until `br_eax_reset()` is called to initiate a new EAX
+ * run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_eax_check_tag_trunc()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   destination buffer for the tag.
+ * \param len   tag length (16 bytes or less).
+ */
+void br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len);
+
+/**
+ * \brief Compute and check EAX authentication tag (with truncation).
+ *
+ * This function is an alternative to `br_eax_get_tag_trunc()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * Tag length MUST be 16 bytes or less. The normal EAX tag length is 16
+ * bytes. See `br_check_tag_trunc()` for some discussion on the potential
+ * perils of truncating authentication tags.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   tag value to compare with.
+ * \param len   tag length (in bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_eax_check_tag_trunc(br_eax_context *ctx,
+	const void *tag, size_t len);
+
+/**
+ * \brief Class instance for EAX.
+ */
+extern const br_aead_class br_eax_vtable;
+
+/**
+ * \brief Context structure for CCM.
+ *
+ * CCM is an AEAD mode that combines a block cipher in CTR mode with
+ * CBC-MAC using the same block cipher and the same key, to provide
+ * authenticated encryption:
+ *
+ *   - Any block cipher with 16-byte blocks can be used with CCM
+ *     (technically, other block sizes are defined as well, but this
+ *     is not implemented by these functions; shorter blocks also
+ *     imply numerous security issues).
+ *
+ *   - The authentication tag length, and plaintext length, MUST be
+ *     known when starting processing data. Plaintext and ciphertext
+ *     can still be provided by chunks, but the total size must match
+ *     the value provided upon initialisation.
+ *
+ *   - The nonce length is constrained between 7 and 13 bytes (inclusive).
+ *     Furthermore, the plaintext length, when encoded, must fit over
+ *     15-nonceLen bytes; thus, if the nonce has length 13 bytes, then
+ *     the plaintext length cannot exceed 65535 bytes.
+ *
+ *   - Additional authenticated data length is practically unlimited
+ *     (formal limit is at 2^64 bytes).
+ *
+ *   - The authentication tag has length 4 to 16 bytes (even values only).
+ *
+ * The CCM initialisation function receives as parameter an
+ * _initialised_ block cipher implementation context, with the secret
+ * key already set. A pointer to that context will be kept within the
+ * CCM context structure. It is up to the caller to allocate and
+ * initialise that block cipher context.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	const br_block_ctrcbc_class **bctx;
+	unsigned char ctr[16];
+	unsigned char cbcmac[16];
+	unsigned char tagmask[16];
+	unsigned char buf[16];
+	size_t ptr;
+	size_t tag_len;
+#endif
+} br_ccm_context;
+
+/**
+ * \brief Initialize a CCM context.
+ *
+ * A block cipher implementation, with its initialised context
+ * structure, is provided. The block cipher MUST use 16-byte blocks in
+ * CTR + CBC-MAC mode, and its secret key MUST have been already set in
+ * the provided context. The parameters are linked in the CCM context.
+ *
+ * After this function has been called, the `br_ccm_reset()` function must
+ * be called, to provide the nonce for CCM computation.
+ *
+ * \param ctx    CCM context structure.
+ * \param bctx   block cipher context (already initialised with secret key).
+ */
+void br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx);
+
+/**
+ * \brief Reset a CCM context.
+ *
+ * This function resets an already initialised CCM context for a new
+ * computation run. Implementations and keys are conserved. This function
+ * can be called at any time; it cancels any ongoing CCM computation that
+ * uses the provided context structure.
+ *
+ * The `aad_len` parameter contains the total length, in bytes, of the
+ * additional authenticated data. It may be zero. That length MUST be
+ * exact.
+ *
+ * The `data_len` parameter contains the total length, in bytes, of the
+ * data that will be injected (plaintext or ciphertext). That length MUST
+ * be exact. Moreover, that length MUST be less than 2^(8*(15-nonce_len)).
+ *
+ * The nonce length (`nonce_len`), in bytes, must be in the 7..13 range
+ * (inclusive).
+ *
+ * The tag length (`tag_len`), in bytes, must be in the 4..16 range, and
+ * be an even integer. Short tags mechanically allow for higher forgery
+ * probabilities; hence, tag sizes smaller than 12 bytes shall be used only
+ * with care.
+ *
+ * It is critical to CCM security that nonce values are not repeated for
+ * the same encryption key. Random generation of nonces is not generally
+ * recommended, due to the relatively small maximum nonce value.
+ *
+ * Returned value is 1 on success, 0 on error. An error is reported if
+ * the tag or nonce length is out of range, or if the
+ * plaintext/ciphertext length cannot be encoded with the specified
+ * nonce length.
+ *
+ * \param ctx         CCM context structure.
+ * \param nonce       CCM nonce to use.
+ * \param nonce_len   CCM nonce length (in bytes, 7 to 13).
+ * \param aad_len     additional authenticated data length (in bytes).
+ * \param data_len    plaintext/ciphertext length (in bytes).
+ * \param tag_len     tag length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+int br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
+	uint64_t aad_len, uint64_t data_len, size_t tag_len);
+
+/**
+ * \brief Inject additional authenticated data into CCM.
+ *
+ * The provided data is injected into a running CCM computation. Additional
+ * data must be injected _before_ the call to `br_ccm_flip()`.
+ * Additional data can be injected in several chunks of arbitrary length,
+ * but the total amount MUST exactly match the value which was provided
+ * to `br_ccm_reset()`.
+ *
+ * \param ctx    CCM context structure.
+ * \param data   pointer to additional authenticated data.
+ * \param len    length of additional authenticated data (in bytes).
+ */
+void br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Finish injection of additional authenticated data into CCM.
+ *
+ * This function MUST be called before beginning the actual encryption
+ * or decryption (with `br_ccm_run()`), even if no additional authenticated
+ * data was injected. No additional authenticated data may be injected
+ * after this function call.
+ *
+ * \param ctx   CCM context structure.
+ */
+void br_ccm_flip(br_ccm_context *ctx);
+
+/**
+ * \brief Encrypt or decrypt some data with CCM.
+ *
+ * Data encryption or decryption can be done after `br_ccm_flip()`
+ * has been called on the context. If `encrypt` is non-zero, then the
+ * provided data shall be plaintext, and it is encrypted in place.
+ * Otherwise, the data shall be ciphertext, and it is decrypted in place.
+ *
+ * Data may be provided in several chunks of arbitrary length, provided
+ * that the total length exactly matches the length provided to the
+ * `br_ccm_reset()` call.
+ *
+ * \param ctx       CCM context structure.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ */
+void br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len);
+
+/**
+ * \brief Compute CCM authentication tag.
+ *
+ * Compute the CCM authentication tag. This call terminates the CCM
+ * run: all data must have been injected with `br_ccm_run()` (in zero,
+ * one or more successive calls). After this function has been called,
+ * no more data can br processed; a `br_ccm_reset()` call is required
+ * to start a new message.
+ *
+ * The tag length was provided upon context initialisation (last call
+ * to `br_ccm_reset()`); it is returned by this function.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_ccm_check_tag()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   CCM context structure.
+ * \param tag   destination buffer for the tag (up to 16 bytes).
+ * \return  the tag length (in bytes).
+ */
+size_t br_ccm_get_tag(br_ccm_context *ctx, void *tag);
+
+/**
+ * \brief Compute and check CCM authentication tag.
+ *
+ * This function is an alternative to `br_ccm_get_tag()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * \param ctx   CCM context structure.
+ * \param tag   tag value to compare with (up to 16 bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_ccm_check_tag(br_ccm_context *ctx, const void *tag);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_block.h b/third_party/bearssl/inc/bearssl_block.h
new file mode 100644
index 0000000..683a490
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_block.h
@@ -0,0 +1,2618 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_BLOCK_H__
+#define BR_BEARSSL_BLOCK_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_block.h
+ *
+ * # Block Ciphers and Symmetric Ciphers
+ *
+ * This file documents the API for block ciphers and other symmetric
+ * ciphers.
+ *
+ *
+ * ## Procedural API
+ *
+ * For a block cipher implementation, up to three separate sets of
+ * functions are provided, for CBC encryption, CBC decryption, and CTR
+ * encryption/decryption. Each set has its own context structure,
+ * initialised with the encryption key.
+ *
+ * For CBC encryption and decryption, the data to encrypt or decrypt is
+ * referenced as a sequence of blocks. The implementations assume that
+ * there is no partial block; no padding is applied or removed. The
+ * caller is responsible for handling any kind of padding.
+ *
+ * Function for CTR encryption are defined only for block ciphers with
+ * blocks of 16 bytes or more (i.e. AES, but not DES/3DES).
+ *
+ * Each implemented block cipher is identified by an "internal name"
+ * from which are derived the names of structures and functions that
+ * implement the cipher. For the block cipher of internal name "`xxx`",
+ * the following are defined:
+ *
+ *   - `br_xxx_BLOCK_SIZE`
+ *
+ *     A macro that evaluates to the block size (in bytes) of the
+ *     cipher. For all implemented block ciphers, this value is a
+ *     power of two.
+ *
+ *   - `br_xxx_cbcenc_keys`
+ *
+ *     Context structure that contains the subkeys resulting from the key
+ *     expansion. These subkeys are appropriate for CBC encryption. The
+ *     structure first field is called `vtable` and points to the
+ *     appropriate OOP structure.
+ *
+ *   - `br_xxx_cbcenc_init(br_xxx_cbcenc_keys *ctx, const void *key, size_t len)`
+ *
+ *     Perform key expansion: subkeys for CBC encryption are computed and
+ *     written in the provided context structure. The key length MUST be
+ *     adequate for the implemented block cipher. This function also sets
+ *     the `vtable` field.
+ *
+ *   - `br_xxx_cbcenc_run(const br_xxx_cbcenc_keys *ctx, void *iv, void *data, size_t len)`
+ *
+ *     Perform CBC encryption of `len` bytes, in place. The encrypted data
+ *     replaces the cleartext. `len` MUST be a multiple of the block length
+ *     (if it is not, the function may loop forever or overflow a buffer).
+ *     The IV is provided with the `iv` pointer; it is also updated with
+ *     a copy of the last encrypted block.
+ *
+ *   - `br_xxx_cbcdec_keys`
+ *
+ *     Context structure that contains the subkeys resulting from the key
+ *     expansion. These subkeys are appropriate for CBC decryption. The
+ *     structure first field is called `vtable` and points to the
+ *     appropriate OOP structure.
+ *
+ *   - `br_xxx_cbcdec_init(br_xxx_cbcenc_keys *ctx, const void *key, size_t len)`
+ *
+ *     Perform key expansion: subkeys for CBC decryption are computed and
+ *     written in the provided context structure. The key length MUST be
+ *     adequate for the implemented block cipher. This function also sets
+ *     the `vtable` field.
+ *
+ *   - `br_xxx_cbcdec_run(const br_xxx_cbcdec_keys *ctx, void *iv, void *data, size_t num_blocks)`
+ *
+ *     Perform CBC decryption of `len` bytes, in place. The decrypted data
+ *     replaces the ciphertext. `len` MUST be a multiple of the block length
+ *     (if it is not, the function may loop forever or overflow a buffer).
+ *     The IV is provided with the `iv` pointer; it is also updated with
+ *     a copy of the last _encrypted_ block.
+ *
+ *   - `br_xxx_ctr_keys`
+ *
+ *     Context structure that contains the subkeys resulting from the key
+ *     expansion. These subkeys are appropriate for CTR encryption and
+ *     decryption. The structure first field is called `vtable` and
+ *     points to the appropriate OOP structure.
+ *
+ *   - `br_xxx_ctr_init(br_xxx_ctr_keys *ctx, const void *key, size_t len)`
+ *
+ *     Perform key expansion: subkeys for CTR encryption and decryption
+ *     are computed and written in the provided context structure. The
+ *     key length MUST be adequate for the implemented block cipher. This
+ *     function also sets the `vtable` field.
+ *
+ *   - `br_xxx_ctr_run(const br_xxx_ctr_keys *ctx, const void *iv, uint32_t cc, void *data, size_t len)` (returns `uint32_t`)
+ *
+ *     Perform CTR encryption/decryption of some data. Processing is done
+ *     "in place" (the output data replaces the input data). This function
+ *     implements the "standard incrementing function" from NIST SP800-38A,
+ *     annex B: the IV length shall be 4 bytes less than the block size
+ *     (i.e. 12 bytes for AES) and the counter is the 32-bit value starting
+ *     with `cc`. The data length (`len`) is not necessarily a multiple of
+ *     the block size. The new counter value is returned, which supports
+ *     chunked processing, provided that each chunk length (except possibly
+ *     the last one) is a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_keys`
+ *
+ *     Context structure that contains the subkeys resulting from the
+ *     key expansion. These subkeys are appropriate for doing combined
+ *     CTR encryption/decryption and CBC-MAC, as used in the CCM and EAX
+ *     authenticated encryption modes. The structure first field is
+ *     called `vtable` and points to the appropriate OOP structure.
+ *
+ *   - `br_xxx_ctrcbc_init(br_xxx_ctr_keys *ctx, const void *key, size_t len)`
+ *
+ *     Perform key expansion: subkeys for combined CTR
+ *     encryption/decryption and CBC-MAC are computed and written in the
+ *     provided context structure. The key length MUST be adequate for
+ *     the implemented block cipher. This function also sets the
+ *     `vtable` field.
+ *
+ *   - `br_xxx_ctrcbc_encrypt(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *cbcmac, void *data, size_t len)`
+ *
+ *     Perform CTR encryption of some data, and CBC-MAC. Processing is
+ *     done "in place" (the output data replaces the input data). This
+ *     function applies CTR encryption on the data, using a full
+ *     block-size counter (i.e. for 128-bit blocks, the counter is
+ *     incremented as a 128-bit value). The 'ctr' array contains the
+ *     initial value for the counter (used in the first block) and it is
+ *     updated with the new value after data processing. The 'cbcmac'
+ *     value shall point to a block-sized value which is used as IV for
+ *     CBC-MAC, computed over the encrypted data (output of CTR
+ *     encryption); the resulting CBC-MAC is written over 'cbcmac' on
+ *     output.
+ *
+ *     The data length MUST be a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_decrypt(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *cbcmac, void *data, size_t len)`
+ *
+ *     Perform CTR decryption of some data, and CBC-MAC. Processing is
+ *     done "in place" (the output data replaces the input data). This
+ *     function applies CTR decryption on the data, using a full
+ *     block-size counter (i.e. for 128-bit blocks, the counter is
+ *     incremented as a 128-bit value). The 'ctr' array contains the
+ *     initial value for the counter (used in the first block) and it is
+ *     updated with the new value after data processing. The 'cbcmac'
+ *     value shall point to a block-sized value which is used as IV for
+ *     CBC-MAC, computed over the encrypted data (input of CTR
+ *     encryption); the resulting CBC-MAC is written over 'cbcmac' on
+ *     output.
+ *
+ *     The data length MUST be a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_ctr(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *data, size_t len)`
+ *
+ *     Perform CTR encryption or decryption of the provided data. The
+ *     data is processed "in place" (the output data replaces the input
+ *     data). A full block-sized counter is applied (i.e. for 128-bit
+ *     blocks, the counter is incremented as a 128-bit value). The 'ctr'
+ *     array contains the initial value for the counter (used in the
+ *     first block), and it is updated with the new value after data
+ *     processing.
+ *
+ *     The data length MUST be a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_mac(const br_xxx_ctrcbc_keys *ctx, void *cbcmac, const void *data, size_t len)`
+ *
+ *     Compute CBC-MAC over the provided data. The IV for CBC-MAC is
+ *     provided as 'cbcmac'; the output is written over the same array.
+ *     The data itself is untouched. The data length MUST be a multiple
+ *     of the block size.
+ *
+ *
+ * It shall be noted that the key expansion functions return `void`. If
+ * the provided key length is not allowed, then there will be no error
+ * reporting; implementations need not validate the key length, thus an
+ * invalid key length may result in undefined behaviour (e.g. buffer
+ * overflow).
+ *
+ * Subkey structures contain no interior pointer, and no external
+ * resources are allocated upon key expansion. They can thus be
+ * discarded without any explicit deallocation.
+ *
+ *
+ * ## Object-Oriented API
+ *
+ * Each context structure begins with a field (called `vtable`) that
+ * points to an instance of a structure that references the relevant
+ * functions through pointers. Each such structure contains the
+ * following:
+ *
+ *   - `context_size`
+ *
+ *     The size (in bytes) of the context structure for subkeys.
+ *
+ *   - `block_size`
+ *
+ *     The cipher block size (in bytes).
+ *
+ *   - `log_block_size`
+ *
+ *     The base-2 logarithm of cipher block size (e.g. 4 for blocks
+ *     of 16 bytes).
+ *
+ *   - `init`
+ *
+ *     Pointer to the key expansion function.
+ *
+ *   - `run`
+ *
+ *     Pointer to the encryption/decryption function.
+ *
+ * For combined CTR/CBC-MAC encryption, the `vtable` has a slightly
+ * different structure:
+ *
+ *   - `context_size`
+ *
+ *     The size (in bytes) of the context structure for subkeys.
+ *
+ *   - `block_size`
+ *
+ *     The cipher block size (in bytes).
+ *
+ *   - `log_block_size`
+ *
+ *     The base-2 logarithm of cipher block size (e.g. 4 for blocks
+ *     of 16 bytes).
+ *
+ *   - `init`
+ *
+ *     Pointer to the key expansion function.
+ *
+ *   - `encrypt`
+ *
+ *     Pointer to the CTR encryption + CBC-MAC function.
+ *
+ *   - `decrypt`
+ *
+ *     Pointer to the CTR decryption + CBC-MAC function.
+ *
+ *   - `ctr`
+ *
+ *     Pointer to the CTR encryption/decryption function.
+ *
+ *   - `mac`
+ *
+ *     Pointer to the CBC-MAC function.
+ *
+ * For block cipher "`xxx`", static, constant instances of these
+ * structures are defined, under the names:
+ *
+ *   - `br_xxx_cbcenc_vtable`
+ *   - `br_xxx_cbcdec_vtable`
+ *   - `br_xxx_ctr_vtable`
+ *   - `br_xxx_ctrcbc_vtable`
+ *
+ *
+ * ## Implemented Block Ciphers
+ * 
+ * Provided implementations are:
+ *
+ * | Name      | Function | Block Size (bytes) | Key lengths (bytes) |
+ * | :-------- | :------- | :----------------: | :-----------------: |
+ * | aes_big   | AES      |        16          | 16, 24 and 32       |
+ * | aes_small | AES      |        16          | 16, 24 and 32       |
+ * | aes_ct    | AES      |        16          | 16, 24 and 32       |
+ * | aes_ct64  | AES      |        16          | 16, 24 and 32       |
+ * | aes_x86ni | AES      |        16          | 16, 24 and 32       |
+ * | aes_pwr8  | AES      |        16          | 16, 24 and 32       |
+ * | des_ct    | DES/3DES |         8          | 8, 16 and 24        |
+ * | des_tab   | DES/3DES |         8          | 8, 16 and 24        |
+ *
+ * **Note:** DES/3DES nominally uses keys of 64, 128 and 192 bits (i.e. 8,
+ * 16 and 24 bytes), but some of the bits are ignored by the algorithm, so
+ * the _effective_ key lengths, from a security point of view, are 56,
+ * 112 and 168 bits, respectively.
+ *
+ * `aes_big` is a "classical" AES implementation, using tables. It
+ * is fast but not constant-time, since it makes data-dependent array
+ * accesses.
+ *
+ * `aes_small` is an AES implementation optimized for code size. It
+ * is substantially slower than `aes_big`; it is not constant-time
+ * either.
+ *
+ * `aes_ct` is a constant-time implementation of AES; its code is about
+ * as big as that of `aes_big`, while its performance is comparable to
+ * that of `aes_small`. However, it is constant-time. This
+ * implementation should thus be considered to be the "default" AES in
+ * BearSSL, to be used unless the operational context guarantees that a
+ * non-constant-time implementation is safe, or an architecture-specific
+ * constant-time implementation can be used (e.g. using dedicated
+ * hardware opcodes).
+ *
+ * `aes_ct64` is another constant-time implementation of AES. It is
+ * similar to `aes_ct` but uses 64-bit values. On 32-bit machines,
+ * `aes_ct64` is not faster than `aes_ct`, often a bit slower, and has
+ * a larger footprint; however, on 64-bit architectures, `aes_ct64`
+ * is typically twice faster than `aes_ct` for modes that allow parallel
+ * operations (i.e. CTR, and CBC decryption, but not CBC encryption).
+ *
+ * `aes_x86ni` exists only on x86 architectures (32-bit and 64-bit). It
+ * uses the AES-NI opcodes when available.
+ *
+ * `aes_pwr8` exists only on PowerPC / POWER architectures (32-bit and
+ * 64-bit, both little-endian and big-endian). It uses the AES opcodes
+ * present in POWER8 and later.
+ *
+ * `des_tab` is a classic, table-based implementation of DES/3DES. It
+ * is not constant-time.
+ *
+ * `des_ct` is an constant-time implementation of DES/3DES. It is
+ * substantially slower than `des_tab`.
+ *
+ * ## ChaCha20 and Poly1305
+ *
+ * ChaCha20 is a stream cipher. Poly1305 is a MAC algorithm. They
+ * are described in [RFC 7539](https://tools.ietf.org/html/rfc7539).
+ *
+ * Two function pointer types are defined:
+ *
+ *   - `br_chacha20_run` describes a function that implements ChaCha20
+ *     only.
+ *
+ *   - `br_poly1305_run` describes an implementation of Poly1305,
+ *     in the AEAD combination with ChaCha20 specified in RFC 7539
+ *     (the ChaCha20 implementation is provided as a function pointer).
+ *
+ * `chacha20_ct` is a straightforward implementation of ChaCha20 in
+ * plain C; it is constant-time, small, and reasonably fast.
+ *
+ * `chacha20_sse2` leverages SSE2 opcodes (on x86 architectures that
+ * support these opcodes). It is faster than `chacha20_ct`.
+ *
+ * `poly1305_ctmul` is an implementation of the ChaCha20+Poly1305 AEAD
+ * construction, where the Poly1305 part is performed with mixed 32-bit
+ * multiplications (operands are 32-bit, result is 64-bit).
+ *
+ * `poly1305_ctmul32` implements ChaCha20+Poly1305 using pure 32-bit
+ * multiplications (32-bit operands, 32-bit result). It is slower than
+ * `poly1305_ctmul`, except on some specific architectures such as
+ * the ARM Cortex M0+.
+ *
+ * `poly1305_ctmulq` implements ChaCha20+Poly1305 with mixed 64-bit
+ * multiplications (operands are 64-bit, result is 128-bit) on 64-bit
+ * platforms that support such operations.
+ *
+ * `poly1305_i15` implements ChaCha20+Poly1305 with the generic "i15"
+ * big integer implementation. It is meant mostly for testing purposes,
+ * although it can help with saving a few hundred bytes of code footprint
+ * on systems where code size is scarce.
+ */
+
+/**
+ * \brief Class type for CBC encryption implementations.
+ *
+ * A `br_block_cbcenc_class` instance points to the functions implementing
+ * a specific block cipher, when used in CBC mode for encrypting data.
+ */
+typedef struct br_block_cbcenc_class_ br_block_cbcenc_class;
+struct br_block_cbcenc_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate
+	 * for containing subkeys.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Size of individual blocks (in bytes).
+	 */
+	unsigned block_size;
+
+	/**
+	 * \brief Base-2 logarithm of the size of individual blocks,
+	 * expressed in bytes.
+	 */
+	unsigned log_block_size;
+
+	/**
+	 * \brief Initialisation function.
+	 *
+	 * This function sets the `vtable` field in the context structure.
+	 * The key length MUST be one of the key lengths supported by
+	 * the implementation.
+	 *
+	 * \param ctx       context structure to initialise.
+	 * \param key       secret key.
+	 * \param key_len   key length (in bytes).
+	 */
+	void (*init)(const br_block_cbcenc_class **ctx,
+		const void *key, size_t key_len);
+
+	/**
+	 * \brief Run the CBC encryption.
+	 *
+	 * The `iv` parameter points to the IV for this run; it is
+	 * updated with a copy of the last encrypted block. The data
+	 * is encrypted "in place"; its length (`len`) MUST be a
+	 * multiple of the block size.
+	 *
+	 * \param ctx    context structure (already initialised).
+	 * \param iv     IV for CBC encryption (updated).
+	 * \param data   data to encrypt.
+	 * \param len    data length (in bytes, multiple of block size).
+	 */
+	void (*run)(const br_block_cbcenc_class *const *ctx,
+		void *iv, void *data, size_t len);
+};
+
+/**
+ * \brief Class type for CBC decryption implementations.
+ *
+ * A `br_block_cbcdec_class` instance points to the functions implementing
+ * a specific block cipher, when used in CBC mode for decrypting data.
+ */
+typedef struct br_block_cbcdec_class_ br_block_cbcdec_class;
+struct br_block_cbcdec_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate
+	 * for containing subkeys.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Size of individual blocks (in bytes).
+	 */
+	unsigned block_size;
+
+	/**
+	 * \brief Base-2 logarithm of the size of individual blocks,
+	 * expressed in bytes.
+	 */
+	unsigned log_block_size;
+
+	/**
+	 * \brief Initialisation function.
+	 *
+	 * This function sets the `vtable` field in the context structure.
+	 * The key length MUST be one of the key lengths supported by
+	 * the implementation.
+	 *
+	 * \param ctx       context structure to initialise.
+	 * \param key       secret key.
+	 * \param key_len   key length (in bytes).
+	 */
+	void (*init)(const br_block_cbcdec_class **ctx,
+		const void *key, size_t key_len);
+
+	/**
+	 * \brief Run the CBC decryption.
+	 *
+	 * The `iv` parameter points to the IV for this run; it is
+	 * updated with a copy of the last encrypted block. The data
+	 * is decrypted "in place"; its length (`len`) MUST be a
+	 * multiple of the block size.
+	 *
+	 * \param ctx    context structure (already initialised).
+	 * \param iv     IV for CBC decryption (updated).
+	 * \param data   data to decrypt.
+	 * \param len    data length (in bytes, multiple of block size).
+	 */
+	void (*run)(const br_block_cbcdec_class *const *ctx,
+		void *iv, void *data, size_t len);
+};
+
+/**
+ * \brief Class type for CTR encryption/decryption implementations.
+ *
+ * A `br_block_ctr_class` instance points to the functions implementing
+ * a specific block cipher, when used in CTR mode for encrypting or
+ * decrypting data.
+ */
+typedef struct br_block_ctr_class_ br_block_ctr_class;
+struct br_block_ctr_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate
+	 * for containing subkeys.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Size of individual blocks (in bytes).
+	 */
+	unsigned block_size;
+
+	/**
+	 * \brief Base-2 logarithm of the size of individual blocks,
+	 * expressed in bytes.
+	 */
+	unsigned log_block_size;
+
+	/**
+	 * \brief Initialisation function.
+	 *
+	 * This function sets the `vtable` field in the context structure.
+	 * The key length MUST be one of the key lengths supported by
+	 * the implementation.
+	 *
+	 * \param ctx       context structure to initialise.
+	 * \param key       secret key.
+	 * \param key_len   key length (in bytes).
+	 */
+	void (*init)(const br_block_ctr_class **ctx,
+		const void *key, size_t key_len);
+
+	/**
+	 * \brief Run the CTR encryption or decryption.
+	 *
+	 * The `iv` parameter points to the IV for this run; its
+	 * length is exactly 4 bytes less than the block size (e.g.
+	 * 12 bytes for AES/CTR). The IV is combined with a 32-bit
+	 * block counter to produce the block value which is processed
+	 * with the block cipher.
+	 *
+	 * The data to encrypt or decrypt is updated "in place". Its
+	 * length (`len` bytes) is not required to be a multiple of
+	 * the block size; if the final block is partial, then the
+	 * corresponding key stream bits are dropped.
+	 *
+	 * The resulting counter value is returned.
+	 *
+	 * \param ctx    context structure (already initialised).
+	 * \param iv     IV for CTR encryption/decryption.
+	 * \param cc     initial value for the block counter.
+	 * \param data   data to encrypt or decrypt.
+	 * \param len    data length (in bytes).
+	 * \return  the new block counter value.
+	 */
+	uint32_t (*run)(const br_block_ctr_class *const *ctx,
+		const void *iv, uint32_t cc, void *data, size_t len);
+};
+
+/**
+ * \brief Class type for combined CTR and CBC-MAC implementations.
+ *
+ * A `br_block_ctrcbc_class` instance points to the functions implementing
+ * a specific block cipher, when used in CTR mode for encrypting or
+ * decrypting data, along with CBC-MAC.
+ */
+typedef struct br_block_ctrcbc_class_ br_block_ctrcbc_class;
+struct br_block_ctrcbc_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate
+	 * for containing subkeys.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Size of individual blocks (in bytes).
+	 */
+	unsigned block_size;
+
+	/**
+	 * \brief Base-2 logarithm of the size of individual blocks,
+	 * expressed in bytes.
+	 */
+	unsigned log_block_size;
+
+	/**
+	 * \brief Initialisation function.
+	 *
+	 * This function sets the `vtable` field in the context structure.
+	 * The key length MUST be one of the key lengths supported by
+	 * the implementation.
+	 *
+	 * \param ctx       context structure to initialise.
+	 * \param key       secret key.
+	 * \param key_len   key length (in bytes).
+	 */
+	void (*init)(const br_block_ctrcbc_class **ctx,
+		const void *key, size_t key_len);
+
+	/**
+	 * \brief Run the CTR encryption + CBC-MAC.
+	 *
+	 * The `ctr` parameter points to the counter; its length shall
+	 * be equal to the block size. It is updated by this function
+	 * as encryption proceeds.
+	 *
+	 * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC
+	 * is computed over the encrypted data (output of CTR
+	 * encryption). Its length shall be equal to the block size. The
+	 * computed CBC-MAC value is written over the `cbcmac` array.
+	 *
+	 * The data to encrypt is updated "in place". Its length (`len`
+	 * bytes) MUST be a multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param ctr      counter for CTR encryption (initial and final).
+	 * \param cbcmac   IV and output buffer for CBC-MAC.
+	 * \param data     data to encrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*encrypt)(const br_block_ctrcbc_class *const *ctx,
+		void *ctr, void *cbcmac, void *data, size_t len);
+
+	/**
+	 * \brief Run the CTR decryption + CBC-MAC.
+	 *
+	 * The `ctr` parameter points to the counter; its length shall
+	 * be equal to the block size. It is updated by this function
+	 * as decryption proceeds.
+	 *
+	 * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC
+	 * is computed over the encrypted data (i.e. before CTR
+	 * decryption). Its length shall be equal to the block size. The
+	 * computed CBC-MAC value is written over the `cbcmac` array.
+	 *
+	 * The data to decrypt is updated "in place". Its length (`len`
+	 * bytes) MUST be a multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param ctr      counter for CTR encryption (initial and final).
+	 * \param cbcmac   IV and output buffer for CBC-MAC.
+	 * \param data     data to decrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*decrypt)(const br_block_ctrcbc_class *const *ctx,
+		void *ctr, void *cbcmac, void *data, size_t len);
+
+	/**
+	 * \brief Run the CTR encryption/decryption only.
+	 *
+	 * The `ctr` parameter points to the counter; its length shall
+	 * be equal to the block size. It is updated by this function
+	 * as decryption proceeds.
+	 *
+	 * The data to decrypt is updated "in place". Its length (`len`
+	 * bytes) MUST be a multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param ctr      counter for CTR encryption (initial and final).
+	 * \param data     data to decrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*ctr)(const br_block_ctrcbc_class *const *ctx,
+		void *ctr, void *data, size_t len);
+
+	/**
+	 * \brief Run the CBC-MAC only.
+	 *
+	 * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC
+	 * is computed over the encrypted data (i.e. before CTR
+	 * decryption). Its length shall be equal to the block size. The
+	 * computed CBC-MAC value is written over the `cbcmac` array.
+	 *
+	 * The data is unmodified. Its length (`len` bytes) MUST be a
+	 * multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param cbcmac   IV and output buffer for CBC-MAC.
+	 * \param data     data to decrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*mac)(const br_block_ctrcbc_class *const *ctx,
+		void *cbcmac, const void *data, size_t len);
+};
+
+/*
+ * Traditional, table-based AES implementation. It is fast, but uses
+ * internal tables (in particular a 1 kB table for encryption, another
+ * 1 kB table for decryption, and a 256-byte table for key schedule),
+ * and it is not constant-time. In contexts where cache-timing attacks
+ * apply, this implementation may leak the secret key.
+ */
+
+/** \brief AES block size (16 bytes). */
+#define br_aes_big_BLOCK_SIZE   16
+
+/**
+ * \brief Context for AES subkeys (`aes_big` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_big_cbcenc_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_big` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_big_cbcdec_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_big` implementation, CTR encryption
+ * and decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctr_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_big_ctr_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_big` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_big_ctrcbc_keys;
+
+/**
+ * \brief Class instance for AES CBC encryption (`aes_big` implementation).
+ */
+extern const br_block_cbcenc_class br_aes_big_cbcenc_vtable;
+
+/**
+ * \brief Class instance for AES CBC decryption (`aes_big` implementation).
+ */
+extern const br_block_cbcdec_class br_aes_big_cbcdec_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption and decryption
+ * (`aes_big` implementation).
+ */
+extern const br_block_ctr_class br_aes_big_ctr_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_big` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC encryption
+ * (`aes_big` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC decryption
+ * (`aes_big` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR encryption
+ * and decryption (`aes_big` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_big` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with AES (`aes_big` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with AES (`aes_big` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CTR encryption and decryption with AES (`aes_big` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (constant, 12 bytes).
+ * \param cc     initial block counter value.
+ * \param data   data to encrypt or decrypt (updated).
+ * \param len    data length (in bytes).
+ * \return  new block counter value.
+ */
+uint32_t br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
+/*
+ * AES implementation optimized for size. It is slower than the
+ * traditional table-based AES implementation, but requires much less
+ * code. It still uses data-dependent table accesses (albeit within a
+ * much smaller 256-byte table), which makes it conceptually vulnerable
+ * to cache-timing attacks.
+ */
+
+/** \brief AES block size (16 bytes). */
+#define br_aes_small_BLOCK_SIZE   16
+
+/**
+ * \brief Context for AES subkeys (`aes_small` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_small_cbcenc_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_small` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_small_cbcdec_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_small` implementation, CTR encryption
+ * and decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctr_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_small_ctr_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_small` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_small_ctrcbc_keys;
+
+/**
+ * \brief Class instance for AES CBC encryption (`aes_small` implementation).
+ */
+extern const br_block_cbcenc_class br_aes_small_cbcenc_vtable;
+
+/**
+ * \brief Class instance for AES CBC decryption (`aes_small` implementation).
+ */
+extern const br_block_cbcdec_class br_aes_small_cbcdec_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption and decryption
+ * (`aes_small` implementation).
+ */
+extern const br_block_ctr_class br_aes_small_ctr_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_small` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC encryption
+ * (`aes_small` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC decryption
+ * (`aes_small` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR encryption
+ * and decryption (`aes_small` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_small` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with AES (`aes_small` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with AES (`aes_small` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CTR encryption and decryption with AES (`aes_small` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (constant, 12 bytes).
+ * \param cc     initial block counter value.
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes).
+ * \return  new block counter value.
+ */
+uint32_t br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
+/*
+ * Constant-time AES implementation. Its size is similar to that of
+ * 'aes_big', and its performance is similar to that of 'aes_small' (faster
+ * decryption, slower encryption). However, it is constant-time, i.e.
+ * immune to cache-timing and similar attacks.
+ */
+
+/** \brief AES block size (16 bytes). */
+#define br_aes_ct_BLOCK_SIZE   16
+
+/**
+ * \brief Context for AES subkeys (`aes_ct` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_ct_cbcenc_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_ct` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_ct_cbcdec_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_ct` implementation, CTR encryption
+ * and decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctr_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_ct_ctr_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_ct` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_ct_ctrcbc_keys;
+
+/**
+ * \brief Class instance for AES CBC encryption (`aes_ct` implementation).
+ */
+extern const br_block_cbcenc_class br_aes_ct_cbcenc_vtable;
+
+/**
+ * \brief Class instance for AES CBC decryption (`aes_ct` implementation).
+ */
+extern const br_block_cbcdec_class br_aes_ct_cbcdec_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption and decryption
+ * (`aes_ct` implementation).
+ */
+extern const br_block_ctr_class br_aes_ct_ctr_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_ct` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC encryption
+ * (`aes_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC decryption
+ * (`aes_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR encryption
+ * and decryption (`aes_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with AES (`aes_ct` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with AES (`aes_ct` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CTR encryption and decryption with AES (`aes_ct` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (constant, 12 bytes).
+ * \param cc     initial block counter value.
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes).
+ * \return  new block counter value.
+ */
+uint32_t br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
+/*
+ * 64-bit constant-time AES implementation. It is similar to 'aes_ct'
+ * but uses 64-bit registers, making it about twice faster than 'aes_ct'
+ * on 64-bit platforms, while remaining constant-time and with a similar
+ * code size. (The doubling in performance is only for CBC decryption
+ * and CTR mode; CBC encryption is non-parallel and cannot benefit from
+ * the larger registers.)
+ */
+
+/** \brief AES block size (16 bytes). */
+#define br_aes_ct64_BLOCK_SIZE   16
+
+/**
+ * \brief Context for AES subkeys (`aes_ct64` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t skey[30];
+	unsigned num_rounds;
+#endif
+} br_aes_ct64_cbcenc_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_ct64` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t skey[30];
+	unsigned num_rounds;
+#endif
+} br_aes_ct64_cbcdec_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_ct64` implementation, CTR encryption
+ * and decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctr_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t skey[30];
+	unsigned num_rounds;
+#endif
+} br_aes_ct64_ctr_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_ct64` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t skey[30];
+	unsigned num_rounds;
+#endif
+} br_aes_ct64_ctrcbc_keys;
+
+/**
+ * \brief Class instance for AES CBC encryption (`aes_ct64` implementation).
+ */
+extern const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable;
+
+/**
+ * \brief Class instance for AES CBC decryption (`aes_ct64` implementation).
+ */
+extern const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption and decryption
+ * (`aes_ct64` implementation).
+ */
+extern const br_block_ctr_class br_aes_ct64_ctr_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_ct64` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC encryption
+ * (`aes_ct64` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC decryption
+ * (`aes_ct64` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR encryption
+ * and decryption (`aes_ct64` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_ct64` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with AES (`aes_ct64` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with AES (`aes_ct64` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CTR encryption and decryption with AES (`aes_ct64` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (constant, 12 bytes).
+ * \param cc     initial block counter value.
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes).
+ * \return  new block counter value.
+ */
+uint32_t br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
+/*
+ * AES implementation using AES-NI opcodes (x86 platform).
+ */
+
+/** \brief AES block size (16 bytes). */
+#define br_aes_x86ni_BLOCK_SIZE   16
+
+/**
+ * \brief Context for AES subkeys (`aes_x86ni` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_x86ni_cbcenc_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_x86ni` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_x86ni_cbcdec_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_x86ni` implementation, CTR encryption
+ * and decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctr_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_x86ni_ctr_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_x86ni` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_x86ni_ctrcbc_keys;
+
+/**
+ * \brief Class instance for AES CBC encryption (`aes_x86ni` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_x86ni_cbcenc_get_vtable()`.
+ */
+extern const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable;
+
+/**
+ * \brief Class instance for AES CBC decryption (`aes_x86ni` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_x86ni_cbcdec_get_vtable()`.
+ */
+extern const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption and decryption
+ * (`aes_x86ni` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_x86ni_ctr_get_vtable()`.
+ */
+extern const br_block_ctr_class br_aes_x86ni_ctr_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_x86ni` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_x86ni_ctrcbc_get_vtable()`.
+ */
+extern const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC encryption
+ * (`aes_x86ni` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC decryption
+ * (`aes_x86ni` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR encryption
+ * and decryption (`aes_x86ni` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_x86ni` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CTR encryption and decryption with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (constant, 12 bytes).
+ * \param cc     initial block counter value.
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes).
+ * \return  new block counter value.
+ */
+uint32_t br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
+/**
+ * \brief Obtain the `aes_x86ni` AES-CBC (encryption) implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_x86ni_cbcenc_vtable`, if
+ * that implementation was compiled in the library _and_ the x86 AES
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_x86ni` AES-CBC (encryption) implementation, or `NULL`.
+ */
+const br_block_cbcenc_class *br_aes_x86ni_cbcenc_get_vtable(void);
+
+/**
+ * \brief Obtain the `aes_x86ni` AES-CBC (decryption) implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_x86ni_cbcdec_vtable`, if
+ * that implementation was compiled in the library _and_ the x86 AES
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_x86ni` AES-CBC (decryption) implementation, or `NULL`.
+ */
+const br_block_cbcdec_class *br_aes_x86ni_cbcdec_get_vtable(void);
+
+/**
+ * \brief Obtain the `aes_x86ni` AES-CTR implementation, if available.
+ *
+ * This function returns a pointer to `br_aes_x86ni_ctr_vtable`, if
+ * that implementation was compiled in the library _and_ the x86 AES
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_x86ni` AES-CTR implementation, or `NULL`.
+ */
+const br_block_ctr_class *br_aes_x86ni_ctr_get_vtable(void);
+
+/**
+ * \brief Obtain the `aes_x86ni` AES-CTR + CBC-MAC implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_x86ni_ctrcbc_vtable`, if
+ * that implementation was compiled in the library _and_ the x86 AES
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_x86ni` AES-CTR implementation, or `NULL`.
+ */
+const br_block_ctrcbc_class *br_aes_x86ni_ctrcbc_get_vtable(void);
+
+/*
+ * AES implementation using POWER8 opcodes.
+ */
+
+/** \brief AES block size (16 bytes). */
+#define br_aes_pwr8_BLOCK_SIZE   16
+
+/**
+ * \brief Context for AES subkeys (`aes_pwr8` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_pwr8_cbcenc_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_pwr8` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_pwr8_cbcdec_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
+ * and decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctr_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_pwr8_ctr_keys;
+
+/**
+ * \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_pwr8_ctrcbc_keys;
+
+/**
+ * \brief Class instance for AES CBC encryption (`aes_pwr8` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_pwr8_cbcenc_get_vtable()`.
+ */
+extern const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable;
+
+/**
+ * \brief Class instance for AES CBC decryption (`aes_pwr8` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_pwr8_cbcdec_get_vtable()`.
+ */
+extern const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption and decryption
+ * (`aes_pwr8` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_pwr8_ctr_get_vtable()`.
+ */
+extern const br_block_ctr_class br_aes_pwr8_ctr_vtable;
+
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_pwr8` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_pwr8_ctrcbc_get_vtable()`.
+ */
+extern const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC encryption
+ * (`aes_pwr8` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CBC decryption
+ * (`aes_pwr8` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR encryption
+ * and decryption (`aes_pwr8` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_pwr8` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 16).
+ */
+void br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CTR encryption and decryption with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (constant, 12 bytes).
+ * \param cc     initial block counter value.
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes).
+ * \return  new block counter value.
+ */
+uint32_t br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_pwr8` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
+/**
+ * \brief Obtain the `aes_pwr8` AES-CBC (encryption) implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_pwr8_cbcenc_vtable`, if
+ * that implementation was compiled in the library _and_ the POWER8
+ * crypto opcodes are available on the currently running CPU. If either
+ * of these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_pwr8` AES-CBC (encryption) implementation, or `NULL`.
+ */
+const br_block_cbcenc_class *br_aes_pwr8_cbcenc_get_vtable(void);
+
+/**
+ * \brief Obtain the `aes_pwr8` AES-CBC (decryption) implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_pwr8_cbcdec_vtable`, if
+ * that implementation was compiled in the library _and_ the POWER8
+ * crypto opcodes are available on the currently running CPU. If either
+ * of these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_pwr8` AES-CBC (decryption) implementation, or `NULL`.
+ */
+const br_block_cbcdec_class *br_aes_pwr8_cbcdec_get_vtable(void);
+
+/**
+ * \brief Obtain the `aes_pwr8` AES-CTR implementation, if available.
+ *
+ * This function returns a pointer to `br_aes_pwr8_ctr_vtable`, if that
+ * implementation was compiled in the library _and_ the POWER8 crypto
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_pwr8` AES-CTR implementation, or `NULL`.
+ */
+const br_block_ctr_class *br_aes_pwr8_ctr_get_vtable(void);
+
+/**
+ * \brief Obtain the `aes_pwr8` AES-CTR + CBC-MAC implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_pwr8_ctrcbc_vtable`, if
+ * that implementation was compiled in the library _and_ the POWER8 AES
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_pwr8` AES-CTR implementation, or `NULL`.
+ */
+const br_block_ctrcbc_class *br_aes_pwr8_ctrcbc_get_vtable(void);
+
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CBC encryption) for all AES implementations.
+ */
+typedef union {
+	const br_block_cbcenc_class *vtable;
+	br_aes_big_cbcenc_keys c_big;
+	br_aes_small_cbcenc_keys c_small;
+	br_aes_ct_cbcenc_keys c_ct;
+	br_aes_ct64_cbcenc_keys c_ct64;
+	br_aes_x86ni_cbcenc_keys c_x86ni;
+	br_aes_pwr8_cbcenc_keys c_pwr8;
+} br_aes_gen_cbcenc_keys;
+
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CBC decryption) for all AES implementations.
+ */
+typedef union {
+	const br_block_cbcdec_class *vtable;
+	br_aes_big_cbcdec_keys c_big;
+	br_aes_small_cbcdec_keys c_small;
+	br_aes_ct_cbcdec_keys c_ct;
+	br_aes_ct64_cbcdec_keys c_ct64;
+	br_aes_x86ni_cbcdec_keys c_x86ni;
+	br_aes_pwr8_cbcdec_keys c_pwr8;
+} br_aes_gen_cbcdec_keys;
+
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CTR encryption and decryption) for all AES implementations.
+ */
+typedef union {
+	const br_block_ctr_class *vtable;
+	br_aes_big_ctr_keys c_big;
+	br_aes_small_ctr_keys c_small;
+	br_aes_ct_ctr_keys c_ct;
+	br_aes_ct64_ctr_keys c_ct64;
+	br_aes_x86ni_ctr_keys c_x86ni;
+	br_aes_pwr8_ctr_keys c_pwr8;
+} br_aes_gen_ctr_keys;
+
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CTR encryption/decryption + CBC-MAC) for all AES implementations.
+ */
+typedef union {
+	const br_block_ctrcbc_class *vtable;
+	br_aes_big_ctrcbc_keys c_big;
+	br_aes_small_ctrcbc_keys c_small;
+	br_aes_ct_ctrcbc_keys c_ct;
+	br_aes_ct64_ctrcbc_keys c_ct64;
+	br_aes_x86ni_ctrcbc_keys c_x86ni;
+	br_aes_pwr8_ctrcbc_keys c_pwr8;
+} br_aes_gen_ctrcbc_keys;
+
+/*
+ * Traditional, table-based implementation for DES/3DES. Since tables are
+ * used, cache-timing attacks are conceptually possible.
+ */
+
+/** \brief DES/3DES block size (8 bytes). */
+#define br_des_tab_BLOCK_SIZE   8
+
+/**
+ * \brief Context for DES subkeys (`des_tab` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[96];
+	unsigned num_rounds;
+#endif
+} br_des_tab_cbcenc_keys;
+
+/**
+ * \brief Context for DES subkeys (`des_tab` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[96];
+	unsigned num_rounds;
+#endif
+} br_des_tab_cbcdec_keys;
+
+/**
+ * \brief Class instance for DES CBC encryption (`des_tab` implementation).
+ */
+extern const br_block_cbcenc_class br_des_tab_cbcenc_vtable;
+
+/**
+ * \brief Class instance for DES CBC decryption (`des_tab` implementation).
+ */
+extern const br_block_cbcdec_class br_des_tab_cbcdec_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for DES CBC encryption
+ * (`des_tab` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for DES CBC decryption
+ * (`des_tab` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with DES (`des_tab` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 8).
+ */
+void br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with DES (`des_tab` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 8).
+ */
+void br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/*
+ * Constant-time implementation for DES/3DES. It is substantially slower
+ * (by a factor of about 4x), but also immune to cache-timing attacks.
+ */
+
+/** \brief DES/3DES block size (8 bytes). */
+#define br_des_ct_BLOCK_SIZE   8
+
+/**
+ * \brief Context for DES subkeys (`des_ct` implementation, CBC encryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcenc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[96];
+	unsigned num_rounds;
+#endif
+} br_des_ct_cbcenc_keys;
+
+/**
+ * \brief Context for DES subkeys (`des_ct` implementation, CBC decryption).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_cbcdec_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[96];
+	unsigned num_rounds;
+#endif
+} br_des_ct_cbcdec_keys;
+
+/**
+ * \brief Class instance for DES CBC encryption (`des_ct` implementation).
+ */
+extern const br_block_cbcenc_class br_des_ct_cbcenc_vtable;
+
+/**
+ * \brief Class instance for DES CBC decryption (`des_ct` implementation).
+ */
+extern const br_block_cbcdec_class br_des_ct_cbcdec_vtable;
+
+/**
+ * \brief Context initialisation (key schedule) for DES CBC encryption
+ * (`des_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief Context initialisation (key schedule) for DES CBC decryption
+ * (`des_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx,
+	const void *key, size_t len);
+
+/**
+ * \brief CBC encryption with DES (`des_ct` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to encrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 8).
+ */
+void br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/**
+ * \brief CBC decryption with DES (`des_ct` implementation).
+ *
+ * \param ctx    context (already initialised).
+ * \param iv     IV (updated).
+ * \param data   data to decrypt (updated).
+ * \param len    data length (in bytes, MUST be multiple of 8).
+ */
+void br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx, void *iv,
+	void *data, size_t len);
+
+/*
+ * These structures are large enough to accommodate subkeys for all
+ * DES/3DES implementations.
+ */
+
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CBC encryption) for all DES implementations.
+ */
+typedef union {
+	const br_block_cbcenc_class *vtable;
+	br_des_tab_cbcenc_keys tab;
+	br_des_ct_cbcenc_keys ct;
+} br_des_gen_cbcenc_keys;
+
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CBC decryption) for all DES implementations.
+ */
+typedef union {
+	const br_block_cbcdec_class *vtable;
+	br_des_tab_cbcdec_keys c_tab;
+	br_des_ct_cbcdec_keys c_ct;
+} br_des_gen_cbcdec_keys;
+
+/**
+ * \brief Type for a ChaCha20 implementation.
+ *
+ * An implementation follows the description in RFC 7539:
+ *
+ *   - Key is 256 bits (`key` points to exactly 32 bytes).
+ *
+ *   - IV is 96 bits (`iv` points to exactly 12 bytes).
+ *
+ *   - Block counter is over 32 bits and starts at value `cc`; the
+ *     resulting value is returned.
+ *
+ * Data (pointed to by `data`, of length `len`) is encrypted/decrypted
+ * in place. If `len` is not a multiple of 64, then the excess bytes from
+ * the last block processing are dropped (therefore, "chunked" processing
+ * works only as long as each non-final chunk has a length multiple of 64).
+ *
+ * \param key    secret key (32 bytes).
+ * \param iv     IV (12 bytes).
+ * \param cc     initial counter value.
+ * \param data   data to encrypt or decrypt.
+ * \param len    data length (in bytes).
+ */
+typedef uint32_t (*br_chacha20_run)(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief ChaCha20 implementation (straightforward C code, constant-time).
+ *
+ * \see br_chacha20_run
+ *
+ * \param key    secret key (32 bytes).
+ * \param iv     IV (12 bytes).
+ * \param cc     initial counter value.
+ * \param data   data to encrypt or decrypt.
+ * \param len    data length (in bytes).
+ */
+uint32_t br_chacha20_ct_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief ChaCha20 implementation (SSE2 code, constant-time).
+ *
+ * This implementation is available only on x86 platforms, depending on
+ * compiler support. Moreover, in 32-bit mode, it might not actually run,
+ * if the underlying hardware does not implement the SSE2 opcode (in
+ * 64-bit mode, SSE2 is part of the ABI, so if the code could be compiled
+ * at all, then it can run). Use `br_chacha20_sse2_get()` to safely obtain
+ * a pointer to that function.
+ *
+ * \see br_chacha20_run
+ *
+ * \param key    secret key (32 bytes).
+ * \param iv     IV (12 bytes).
+ * \param cc     initial counter value.
+ * \param data   data to encrypt or decrypt.
+ * \param len    data length (in bytes).
+ */
+uint32_t br_chacha20_sse2_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len);
+
+/**
+ * \brief Obtain the `sse2` ChaCha20 implementation, if available.
+ *
+ * This function returns a pointer to `br_chacha20_sse2_run`, if
+ * that implementation was compiled in the library _and_ the SSE2
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `0`.
+ *
+ * \return  the `sse2` ChaCha20 implementation, or `0`.
+ */
+br_chacha20_run br_chacha20_sse2_get(void);
+
+/**
+ * \brief Type for a ChaCha20+Poly1305 AEAD implementation.
+ *
+ * The provided data is encrypted or decrypted with ChaCha20. The
+ * authentication tag is computed on the concatenation of the
+ * additional data and the ciphertext, with the padding and lengths
+ * as described in RFC 7539 (section 2.8).
+ *
+ * After decryption, the caller is responsible for checking that the
+ * computed tag matches the expected value.
+ *
+ * \param key       secret key (32 bytes).
+ * \param iv        nonce (12 bytes).
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ * \param aad       additional authenticated data.
+ * \param aad_len   length of additional authenticated data (in bytes).
+ * \param tag       output buffer for the authentication tag.
+ * \param ichacha   implementation of ChaCha20.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ */
+typedef void (*br_poly1305_run)(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt);
+
+/**
+ * \brief ChaCha20+Poly1305 AEAD implementation (mixed 32-bit multiplications).
+ *
+ * \see br_poly1305_run
+ *
+ * \param key       secret key (32 bytes).
+ * \param iv        nonce (12 bytes).
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ * \param aad       additional authenticated data.
+ * \param aad_len   length of additional authenticated data (in bytes).
+ * \param tag       output buffer for the authentication tag.
+ * \param ichacha   implementation of ChaCha20.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ */
+void br_poly1305_ctmul_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt);
+
+/**
+ * \brief ChaCha20+Poly1305 AEAD implementation (pure 32-bit multiplications).
+ *
+ * \see br_poly1305_run
+ *
+ * \param key       secret key (32 bytes).
+ * \param iv        nonce (12 bytes).
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ * \param aad       additional authenticated data.
+ * \param aad_len   length of additional authenticated data (in bytes).
+ * \param tag       output buffer for the authentication tag.
+ * \param ichacha   implementation of ChaCha20.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ */
+void br_poly1305_ctmul32_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt);
+
+/**
+ * \brief ChaCha20+Poly1305 AEAD implementation (i15).
+ *
+ * This implementation relies on the generic big integer code "i15"
+ * (which uses pure 32-bit multiplications). As such, it may save a
+ * little code footprint in a context where "i15" is already included
+ * (e.g. for elliptic curves or for RSA); however, it is also
+ * substantially slower than the ctmul and ctmul32 implementations.
+ *
+ * \see br_poly1305_run
+ *
+ * \param key       secret key (32 bytes).
+ * \param iv        nonce (12 bytes).
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ * \param aad       additional authenticated data.
+ * \param aad_len   length of additional authenticated data (in bytes).
+ * \param tag       output buffer for the authentication tag.
+ * \param ichacha   implementation of ChaCha20.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ */
+void br_poly1305_i15_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt);
+
+/**
+ * \brief ChaCha20+Poly1305 AEAD implementation (ctmulq).
+ *
+ * This implementation uses 64-bit multiplications (result over 128 bits).
+ * It is available only on platforms that offer such a primitive (in
+ * practice, 64-bit architectures). Use `br_poly1305_ctmulq_get()` to
+ * dynamically obtain a pointer to that function, or 0 if not supported.
+ *
+ * \see br_poly1305_run
+ *
+ * \param key       secret key (32 bytes).
+ * \param iv        nonce (12 bytes).
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ * \param aad       additional authenticated data.
+ * \param aad_len   length of additional authenticated data (in bytes).
+ * \param tag       output buffer for the authentication tag.
+ * \param ichacha   implementation of ChaCha20.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ */
+void br_poly1305_ctmulq_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt);
+
+/**
+ * \brief Get the ChaCha20+Poly1305 "ctmulq" implementation, if available.
+ *
+ * This function returns a pointer to the `br_poly1305_ctmulq_run()`
+ * function if supported on the current platform; otherwise, it returns 0.
+ *
+ * \return  the ctmulq ChaCha20+Poly1305 implementation, or 0.
+ */
+br_poly1305_run br_poly1305_ctmulq_get(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_ec.h b/third_party/bearssl/inc/bearssl_ec.h
new file mode 100644
index 0000000..acd3a2b
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_ec.h
@@ -0,0 +1,967 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_EC_H__
+#define BR_BEARSSL_EC_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_rand.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_ec.h
+ *
+ * # Elliptic Curves
+ *
+ * This file documents the EC implementations provided with BearSSL, and
+ * ECDSA.
+ *
+ * ## Elliptic Curve API
+ *
+ * Only "named curves" are supported. Each EC implementation supports
+ * one or several named curves, identified by symbolic identifiers.
+ * These identifiers are small integers, that correspond to the values
+ * registered by the
+ * [IANA](http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8).
+ *
+ * Since all currently defined elliptic curve identifiers are in the 0..31
+ * range, it is convenient to encode support of some curves in a 32-bit
+ * word, such that bit x corresponds to curve of identifier x.
+ *
+ * An EC implementation is incarnated by a `br_ec_impl` instance, that
+ * offers the following fields:
+ *
+ *   - `supported_curves`
+ *
+ *      A 32-bit word that documents the identifiers of the curves supported
+ *      by this implementation.
+ *
+ *   - `generator()`
+ *
+ *      Callback method that returns a pointer to the conventional generator
+ *      point for that curve.
+ *
+ *   - `order()`
+ *
+ *      Callback method that returns a pointer to the subgroup order for
+ *      that curve. That value uses unsigned big-endian encoding.
+ *
+ *   - `xoff()`
+ *
+ *      Callback method that returns the offset and length of the X
+ *      coordinate in an encoded point.
+ *
+ *   - `mul()`
+ *
+ *      Multiply a curve point with an integer.
+ *
+ *   - `mulgen()`
+ *
+ *      Multiply the curve generator with an integer. This may be faster
+ *      than the generic `mul()`.
+ *
+ *   - `muladd()`
+ *
+ *      Multiply two curve points by two integers, and return the sum of
+ *      the two products.
+ *
+ * All curve points are represented in uncompressed format. The `mul()`
+ * and `muladd()` methods take care to validate that the provided points
+ * are really part of the relevant curve subgroup.
+ *
+ * For all point multiplication functions, the following holds:
+ *
+ *   - Functions validate that the provided points are valid members
+ *     of the relevant curve subgroup. An error is reported if that is
+ *     not the case.
+ *
+ *   - Processing is constant-time, even if the point operands are not
+ *     valid. This holds for both the source and resulting points, and
+ *     the multipliers (integers). Only the byte length of the provided
+ *     multiplier arrays (not their actual value length in bits) may
+ *     leak through timing-based side channels.
+ *
+ *   - The multipliers (integers) MUST be lower than the subgroup order.
+ *     If this property is not met, then the result is indeterminate,
+ *     but an error value is not necessarily returned.
+ * 
+ *
+ * ## ECDSA
+ *
+ * ECDSA signatures have two standard formats, called "raw" and "asn1".
+ * Internally, such a signature is a pair of modular integers `(r,s)`.
+ * The "raw" format is the concatenation of the unsigned big-endian
+ * encodings of these two integers, possibly left-padded with zeros so
+ * that they have the same encoded length. The "asn1" format is the
+ * DER encoding of an ASN.1 structure that contains the two integer
+ * values:
+ *
+ *     ECDSASignature ::= SEQUENCE {
+ *         r   INTEGER,
+ *         s   INTEGER
+ *     }
+ *
+ * In general, in all of X.509 and SSL/TLS, the "asn1" format is used.
+ * BearSSL offers ECDSA implementations for both formats; conversion
+ * functions between the two formats are also provided. Conversion of a
+ * "raw" format signature into "asn1" may enlarge a signature by no more
+ * than 9 bytes for all supported curves; conversely, conversion of an
+ * "asn1" signature to "raw" may expand the signature but the "raw"
+ * length will never be more than twice the length of the "asn1" length
+ * (and usually it will be shorter).
+ *
+ * Note that for a given signature, the "raw" format is not fully
+ * deterministic, in that it does not enforce a minimal common length.
+ */
+
+/*
+ * Standard curve ID. These ID are equal to the assigned numerical
+ * identifiers assigned to these curves for TLS:
+ *    http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8
+ */
+
+/** \brief Identifier for named curve sect163k1. */
+#define BR_EC_sect163k1           1
+
+/** \brief Identifier for named curve sect163r1. */
+#define BR_EC_sect163r1           2
+
+/** \brief Identifier for named curve sect163r2. */
+#define BR_EC_sect163r2           3
+
+/** \brief Identifier for named curve sect193r1. */
+#define BR_EC_sect193r1           4
+
+/** \brief Identifier for named curve sect193r2. */
+#define BR_EC_sect193r2           5
+
+/** \brief Identifier for named curve sect233k1. */
+#define BR_EC_sect233k1           6
+
+/** \brief Identifier for named curve sect233r1. */
+#define BR_EC_sect233r1           7
+
+/** \brief Identifier for named curve sect239k1. */
+#define BR_EC_sect239k1           8
+
+/** \brief Identifier for named curve sect283k1. */
+#define BR_EC_sect283k1           9
+
+/** \brief Identifier for named curve sect283r1. */
+#define BR_EC_sect283r1          10
+
+/** \brief Identifier for named curve sect409k1. */
+#define BR_EC_sect409k1          11
+
+/** \brief Identifier for named curve sect409r1. */
+#define BR_EC_sect409r1          12
+
+/** \brief Identifier for named curve sect571k1. */
+#define BR_EC_sect571k1          13
+
+/** \brief Identifier for named curve sect571r1. */
+#define BR_EC_sect571r1          14
+
+/** \brief Identifier for named curve secp160k1. */
+#define BR_EC_secp160k1          15
+
+/** \brief Identifier for named curve secp160r1. */
+#define BR_EC_secp160r1          16
+
+/** \brief Identifier for named curve secp160r2. */
+#define BR_EC_secp160r2          17
+
+/** \brief Identifier for named curve secp192k1. */
+#define BR_EC_secp192k1          18
+
+/** \brief Identifier for named curve secp192r1. */
+#define BR_EC_secp192r1          19
+
+/** \brief Identifier for named curve secp224k1. */
+#define BR_EC_secp224k1          20
+
+/** \brief Identifier for named curve secp224r1. */
+#define BR_EC_secp224r1          21
+
+/** \brief Identifier for named curve secp256k1. */
+#define BR_EC_secp256k1          22
+
+/** \brief Identifier for named curve secp256r1. */
+#define BR_EC_secp256r1          23
+
+/** \brief Identifier for named curve secp384r1. */
+#define BR_EC_secp384r1          24
+
+/** \brief Identifier for named curve secp521r1. */
+#define BR_EC_secp521r1          25
+
+/** \brief Identifier for named curve brainpoolP256r1. */
+#define BR_EC_brainpoolP256r1    26
+
+/** \brief Identifier for named curve brainpoolP384r1. */
+#define BR_EC_brainpoolP384r1    27
+
+/** \brief Identifier for named curve brainpoolP512r1. */
+#define BR_EC_brainpoolP512r1    28
+
+/** \brief Identifier for named curve Curve25519. */
+#define BR_EC_curve25519         29
+
+/** \brief Identifier for named curve Curve448. */
+#define BR_EC_curve448           30
+
+/**
+ * \brief Structure for an EC public key.
+ */
+typedef struct {
+	/** \brief Identifier for the curve used by this key. */
+	int curve;
+	/** \brief Public curve point (uncompressed format). */
+	unsigned char *q;
+	/** \brief Length of public curve point (in bytes). */
+	size_t qlen;
+} br_ec_public_key;
+
+/**
+ * \brief Structure for an EC private key.
+ *
+ * The private key is an integer modulo the curve subgroup order. The
+ * encoding below tolerates extra leading zeros. In general, it is
+ * recommended that the private key has the same length as the curve
+ * subgroup order.
+ */
+typedef struct {
+	/** \brief Identifier for the curve used by this key. */
+	int curve;
+	/** \brief Private key (integer, unsigned big-endian encoding). */
+	unsigned char *x;
+	/** \brief Private key length (in bytes). */
+	size_t xlen;
+} br_ec_private_key;
+
+/**
+ * \brief Type for an EC implementation.
+ */
+typedef struct {
+	/**
+	 * \brief Supported curves.
+	 *
+	 * This word is a bitfield: bit `x` is set if the curve of ID `x`
+	 * is supported. E.g. an implementation supporting both NIST P-256
+	 * (secp256r1, ID 23) and NIST P-384 (secp384r1, ID 24) will have
+	 * value `0x01800000` in this field.
+	 */
+	uint32_t supported_curves;
+
+	/**
+	 * \brief Get the conventional generator.
+	 *
+	 * This function returns the conventional generator (encoded
+	 * curve point) for the specified curve. This function MUST NOT
+	 * be called if the curve is not supported.
+	 *
+	 * \param curve   curve identifier.
+	 * \param len     receiver for the encoded generator length (in bytes).
+	 * \return  the encoded generator.
+	 */
+	const unsigned char *(*generator)(int curve, size_t *len);
+
+	/**
+	 * \brief Get the subgroup order.
+	 *
+	 * This function returns the order of the subgroup generated by
+	 * the conventional generator, for the specified curve. Unsigned
+	 * big-endian encoding is used. This function MUST NOT be called
+	 * if the curve is not supported.
+	 *
+	 * \param curve   curve identifier.
+	 * \param len     receiver for the encoded order length (in bytes).
+	 * \return  the encoded order.
+	 */
+	const unsigned char *(*order)(int curve, size_t *len);
+
+	/**
+	 * \brief Get the offset and length for the X coordinate.
+	 *
+	 * This function returns the offset and length (in bytes) of
+	 * the X coordinate in an encoded non-zero point.
+	 *
+	 * \param curve   curve identifier.
+	 * \param len     receiver for the X coordinate length (in bytes).
+	 * \return  the offset for the X coordinate (in bytes).
+	 */
+	size_t (*xoff)(int curve, size_t *len);
+
+	/**
+	 * \brief Multiply a curve point by an integer.
+	 *
+	 * The source point is provided in array `G` (of size `Glen` bytes);
+	 * the multiplication result is written over it. The multiplier
+	 * `x` (of size `xlen` bytes) uses unsigned big-endian encoding.
+	 *
+	 * Rules:
+	 *
+	 *   - The specified curve MUST be supported.
+	 *
+	 *   - The source point must be a valid point on the relevant curve
+	 *     subgroup (and not the "point at infinity" either). If this is
+	 *     not the case, then this function returns an error (0).
+	 *
+	 *   - The multiplier integer MUST be non-zero and less than the
+	 *     curve subgroup order. If this property does not hold, then
+	 *     the result is indeterminate and an error code is not
+	 *     guaranteed.
+	 *
+	 * Returned value is 1 on success, 0 on error. On error, the
+	 * contents of `G` are indeterminate.
+	 *
+	 * \param G       point to multiply.
+	 * \param Glen    length of the encoded point (in bytes).
+	 * \param x       multiplier (unsigned big-endian).
+	 * \param xlen    multiplier length (in bytes).
+	 * \param curve   curve identifier.
+	 * \return  1 on success, 0 on error.
+	 */
+	uint32_t (*mul)(unsigned char *G, size_t Glen,
+		const unsigned char *x, size_t xlen, int curve);
+
+	/**
+	 * \brief Multiply the generator by an integer.
+	 *
+	 * The multiplier MUST be non-zero and less than the curve
+	 * subgroup order. Results are indeterminate if this property
+	 * does not hold.
+	 *
+	 * \param R       output buffer for the point.
+	 * \param x       multiplier (unsigned big-endian).
+	 * \param xlen    multiplier length (in bytes).
+	 * \param curve   curve identifier.
+	 * \return  encoded result point length (in bytes).
+	 */
+	size_t (*mulgen)(unsigned char *R,
+		const unsigned char *x, size_t xlen, int curve);
+
+	/**
+	 * \brief Multiply two points by two integers and add the
+	 * results.
+	 *
+	 * The point `x*A + y*B` is computed and written back in the `A`
+	 * array.
+	 *
+	 * Rules:
+	 *
+	 *   - The specified curve MUST be supported.
+	 *
+	 *   - The source points (`A` and `B`)  must be valid points on
+	 *     the relevant curve subgroup (and not the "point at
+	 *     infinity" either). If this is not the case, then this
+	 *     function returns an error (0).
+	 *
+	 *   - If the `B` pointer is `NULL`, then the conventional
+	 *     subgroup generator is used. With some implementations,
+	 *     this may be faster than providing a pointer to the
+	 *     generator.
+	 *
+	 *   - The multiplier integers (`x` and `y`) MUST be non-zero
+	 *     and less than the curve subgroup order. If either integer
+	 *     is zero, then an error is reported, but if one of them is
+	 *     not lower than the subgroup order, then the result is
+	 *     indeterminate and an error code is not guaranteed.
+	 *
+	 *   - If the final result is the point at infinity, then an
+	 *     error is returned.
+	 *
+	 * Returned value is 1 on success, 0 on error. On error, the
+	 * contents of `A` are indeterminate.
+	 *
+	 * \param A       first point to multiply.
+	 * \param B       second point to multiply (`NULL` for the generator).
+	 * \param len     common length of the encoded points (in bytes).
+	 * \param x       multiplier for `A` (unsigned big-endian).
+	 * \param xlen    length of multiplier for `A` (in bytes).
+	 * \param y       multiplier for `A` (unsigned big-endian).
+	 * \param ylen    length of multiplier for `A` (in bytes).
+	 * \param curve   curve identifier.
+	 * \return  1 on success, 0 on error.
+	 */
+	uint32_t (*muladd)(unsigned char *A, const unsigned char *B, size_t len,
+		const unsigned char *x, size_t xlen,
+		const unsigned char *y, size_t ylen, int curve);
+} br_ec_impl;
+
+/**
+ * \brief EC implementation "i31".
+ *
+ * This implementation internally uses generic code for modular integers,
+ * with a representation as sequences of 31-bit words. It supports secp256r1,
+ * secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
+ */
+extern const br_ec_impl br_ec_prime_i31;
+
+/**
+ * \brief EC implementation "i15".
+ *
+ * This implementation internally uses generic code for modular integers,
+ * with a representation as sequences of 15-bit words. It supports secp256r1,
+ * secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
+ */
+extern const br_ec_impl br_ec_prime_i15;
+
+/**
+ * \brief EC implementation "m15" for P-256.
+ *
+ * This implementation uses specialised code for curve secp256r1 (also
+ * known as NIST P-256), with optional Karatsuba decomposition, and fast
+ * modular reduction thanks to the field modulus special format. Only
+ * 32-bit multiplications are used (with 32-bit results, not 64-bit).
+ */
+extern const br_ec_impl br_ec_p256_m15;
+
+/**
+ * \brief EC implementation "m31" for P-256.
+ *
+ * This implementation uses specialised code for curve secp256r1 (also
+ * known as NIST P-256), relying on multiplications of 31-bit values
+ * (MUL31).
+ */
+extern const br_ec_impl br_ec_p256_m31;
+
+/**
+ * \brief EC implementation "m62" (specialised code) for P-256.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 64 bits, with a 128-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_p256_m62_get()` to dynamically obtain a pointer
+ * to that implementation.
+ */
+extern const br_ec_impl br_ec_p256_m62;
+
+/**
+ * \brief Get the "m62" implementation of P-256, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_p256_m62_get(void);
+
+/**
+ * \brief EC implementation "m64" (specialised code) for P-256.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 64 bits, with a 128-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_p256_m64_get()` to dynamically obtain a pointer
+ * to that implementation.
+ */
+extern const br_ec_impl br_ec_p256_m64;
+
+/**
+ * \brief Get the "m64" implementation of P-256, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_p256_m64_get(void);
+
+/**
+ * \brief EC implementation "i15" (generic code) for Curve25519.
+ *
+ * This implementation uses the generic code for modular integers (with
+ * 15-bit words) to support Curve25519. Due to the specificities of the
+ * curve definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_i15;
+
+/**
+ * \brief EC implementation "i31" (generic code) for Curve25519.
+ *
+ * This implementation uses the generic code for modular integers (with
+ * 31-bit words) to support Curve25519. Due to the specificities of the
+ * curve definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_i31;
+
+/**
+ * \brief EC implementation "m15" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 15 bits. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m15;
+
+/**
+ * \brief EC implementation "m31" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 31 bits. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m31;
+
+/**
+ * \brief EC implementation "m62" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 62 bits, with a 124-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_c25519_m62_get()` to dynamically obtain a pointer
+ * to that implementation. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m62;
+
+/**
+ * \brief Get the "m62" implementation of Curve25519, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_c25519_m62_get(void);
+
+/**
+ * \brief EC implementation "m64" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 64 bits, with a 128-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_c25519_m64_get()` to dynamically obtain a pointer
+ * to that implementation. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m64;
+
+/**
+ * \brief Get the "m64" implementation of Curve25519, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_c25519_m64_get(void);
+
+/**
+ * \brief Aggregate EC implementation "m15".
+ *
+ * This implementation is a wrapper for:
+ *
+ *   - `br_ec_c25519_m15` for Curve25519
+ *   - `br_ec_p256_m15` for NIST P-256
+ *   - `br_ec_prime_i15` for other curves (NIST P-384 and NIST-P512)
+ */
+extern const br_ec_impl br_ec_all_m15;
+
+/**
+ * \brief Aggregate EC implementation "m31".
+ *
+ * This implementation is a wrapper for:
+ *
+ *   - `br_ec_c25519_m31` for Curve25519
+ *   - `br_ec_p256_m31` for NIST P-256
+ *   - `br_ec_prime_i31` for other curves (NIST P-384 and NIST-P512)
+ */
+extern const br_ec_impl br_ec_all_m31;
+
+/**
+ * \brief Get the "default" EC implementation for the current system.
+ *
+ * This returns a pointer to the preferred implementation on the
+ * current system.
+ *
+ * \return  the default EC implementation.
+ */
+const br_ec_impl *br_ec_get_default(void);
+
+/**
+ * \brief Convert a signature from "raw" to "asn1".
+ *
+ * Conversion is done "in place" and the new length is returned.
+ * Conversion may enlarge the signature, but by no more than 9 bytes at
+ * most. On error, 0 is returned (error conditions include an odd raw
+ * signature length, or an oversized integer).
+ *
+ * \param sig       signature to convert.
+ * \param sig_len   signature length (in bytes).
+ * \return  the new signature length, or 0 on error.
+ */
+size_t br_ecdsa_raw_to_asn1(void *sig, size_t sig_len);
+
+/**
+ * \brief Convert a signature from "asn1" to "raw".
+ *
+ * Conversion is done "in place" and the new length is returned.
+ * Conversion may enlarge the signature, but the new signature length
+ * will be less than twice the source length at most. On error, 0 is
+ * returned (error conditions include an invalid ASN.1 structure or an
+ * oversized integer).
+ *
+ * \param sig       signature to convert.
+ * \param sig_len   signature length (in bytes).
+ * \return  the new signature length, or 0 on error.
+ */
+size_t br_ecdsa_asn1_to_raw(void *sig, size_t sig_len);
+
+/**
+ * \brief Type for an ECDSA signer function.
+ *
+ * A pointer to the EC implementation is provided. The hash value is
+ * assumed to have the length inferred from the designated hash function
+ * class.
+ *
+ * Signature is written in the buffer pointed to by `sig`, and the length
+ * (in bytes) is returned. On error, nothing is written in the buffer,
+ * and 0 is returned. This function returns 0 if the specified curve is
+ * not supported by the provided EC implementation.
+ *
+ * The signature format is either "raw" or "asn1", depending on the
+ * implementation; maximum length is predictable from the implemented
+ * curve:
+ *
+ * | curve      | raw | asn1 |
+ * | :--------- | --: | ---: |
+ * | NIST P-256 |  64 |   72 |
+ * | NIST P-384 |  96 |  104 |
+ * | NIST P-521 | 132 |  139 |
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+typedef size_t (*br_ecdsa_sign)(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief Type for an ECDSA signature verification function.
+ *
+ * A pointer to the EC implementation is provided. The hashed value,
+ * computed over the purportedly signed data, is also provided with
+ * its length.
+ *
+ * The signature format is either "raw" or "asn1", depending on the
+ * implementation.
+ *
+ * Returned value is 1 on success (valid signature), 0 on error. This
+ * function returns 0 if the specified curve is not supported by the
+ * provided EC implementation.
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_ecdsa_vrfy)(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature generator, "i31" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i31_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature generator, "i31" implementation, "raw" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i31_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature verifier, "i31" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i31_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature verifier, "i31" implementation, "raw" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature generator, "i15" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature generator, "i15" implementation, "raw" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature verifier, "i15" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature verifier, "i15" implementation, "raw" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief Get "default" ECDSA implementation (signer, asn1 format).
+ *
+ * This returns the preferred implementation of ECDSA signature generation
+ * ("asn1" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_sign br_ecdsa_sign_asn1_get_default(void);
+
+/**
+ * \brief Get "default" ECDSA implementation (signer, raw format).
+ *
+ * This returns the preferred implementation of ECDSA signature generation
+ * ("raw" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_sign br_ecdsa_sign_raw_get_default(void);
+
+/**
+ * \brief Get "default" ECDSA implementation (verifier, asn1 format).
+ *
+ * This returns the preferred implementation of ECDSA signature verification
+ * ("asn1" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_vrfy br_ecdsa_vrfy_asn1_get_default(void);
+
+/**
+ * \brief Get "default" ECDSA implementation (verifier, raw format).
+ *
+ * This returns the preferred implementation of ECDSA signature verification
+ * ("raw" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_vrfy br_ecdsa_vrfy_raw_get_default(void);
+
+/**
+ * \brief Maximum size for EC private key element buffer.
+ *
+ * This is the largest number of bytes that `br_ec_keygen()` may need or
+ * ever return.
+ */
+#define BR_EC_KBUF_PRIV_MAX_SIZE   72
+
+/**
+ * \brief Maximum size for EC public key element buffer.
+ *
+ * This is the largest number of bytes that `br_ec_compute_public()` may
+ * need or ever return.
+ */
+#define BR_EC_KBUF_PUB_MAX_SIZE    145
+
+/**
+ * \brief Generate a new EC private key.
+ *
+ * If the specified `curve` is not supported by the elliptic curve
+ * implementation (`impl`), then this function returns zero.
+ *
+ * The `sk` structure fields are set to the new private key data. In
+ * particular, `sk.x` is made to point to the provided key buffer (`kbuf`),
+ * in which the actual private key data is written. That buffer is assumed
+ * to be large enough. The `BR_EC_KBUF_PRIV_MAX_SIZE` defines the maximum
+ * size for all supported curves.
+ *
+ * The number of bytes used in `kbuf` is returned. If `kbuf` is `NULL`, then
+ * the private key is not actually generated, and `sk` may also be `NULL`;
+ * the minimum length for `kbuf` is still computed and returned.
+ *
+ * If `sk` is `NULL` but `kbuf` is not `NULL`, then the private key is
+ * still generated and stored in `kbuf`.
+ *
+ * \param rng_ctx   source PRNG context (already initialized).
+ * \param impl      the elliptic curve implementation.
+ * \param sk        the private key structure to fill, or `NULL`.
+ * \param kbuf      the key element buffer, or `NULL`.
+ * \param curve     the curve identifier.
+ * \return  the key data length (in bytes), or zero.
+ */
+size_t br_ec_keygen(const br_prng_class **rng_ctx,
+	const br_ec_impl *impl, br_ec_private_key *sk,
+	void *kbuf, int curve);
+
+/**
+ * \brief Compute EC public key from EC private key.
+ *
+ * This function uses the provided elliptic curve implementation (`impl`)
+ * to compute the public key corresponding to the private key held in `sk`.
+ * The public key point is written into `kbuf`, which is then linked from
+ * the `*pk` structure. The size of the public key point, i.e. the number
+ * of bytes used in `kbuf`, is returned.
+ *
+ * If `kbuf` is `NULL`, then the public key point is NOT computed, and
+ * the public key structure `*pk` is unmodified (`pk` may be `NULL` in
+ * that case). The size of the public key point is still returned.
+ *
+ * If `pk` is `NULL` but `kbuf` is not `NULL`, then the public key
+ * point is computed and stored in `kbuf`, and its size is returned.
+ *
+ * If the curve used by the private key is not supported by the curve
+ * implementation, then this function returns zero.
+ *
+ * The private key MUST be valid. An off-range private key value is not
+ * necessarily detected, and leads to unpredictable results.
+ *
+ * \param impl   the elliptic curve implementation.
+ * \param pk     the public key structure to fill (or `NULL`).
+ * \param kbuf   the public key point buffer (or `NULL`).
+ * \param sk     the source private key.
+ * \return  the public key point length (in bytes), or zero.
+ */
+size_t br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
+	void *kbuf, const br_ec_private_key *sk);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_hash.h b/third_party/bearssl/inc/bearssl_hash.h
new file mode 100644
index 0000000..ca4fa26
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_hash.h
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_HASH_H__
+#define BR_BEARSSL_HASH_H__
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_hash.h
+ *
+ * # Hash Functions
+ *
+ * This file documents the API for hash functions.
+ *
+ *
+ * ## Procedural API
+ *
+ * For each implemented hash function, of name "`xxx`", the following
+ * elements are defined:
+ *
+ *   - `br_xxx_vtable`
+ *
+ *     An externally defined instance of `br_hash_class`.
+ *
+ *   - `br_xxx_SIZE`
+ *
+ *     A macro that evaluates to the output size (in bytes) of the
+ *     hash function.
+ *
+ *   - `br_xxx_ID`
+ *
+ *     A macro that evaluates to a symbolic identifier for the hash
+ *     function. Such identifiers are used with HMAC and signature
+ *     algorithm implementations.
+ *
+ *     NOTE: for the "standard" hash functions defined in [the TLS
+ *     standard](https://tools.ietf.org/html/rfc5246#section-7.4.1.4.1),
+ *     the symbolic identifiers match the constants used in TLS, i.e.
+ *     1 to 6 for MD5, SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512,
+ *     respectively.
+ *
+ *   - `br_xxx_context`
+ *
+ *     Context for an ongoing computation. It is allocated by the
+ *     caller, and a pointer to it is passed to all functions. A
+ *     context contains no interior pointer, so it can be moved around
+ *     and cloned (with a simple `memcpy()` or equivalent) in order to
+ *     capture the function state at some point. Computations that use
+ *     distinct context structures are independent of each other. The
+ *     first field of `br_xxx_context` is always a pointer to the
+ *     `br_xxx_vtable` structure; `br_xxx_init()` sets that pointer.
+ *
+ *   - `br_xxx_init(br_xxx_context *ctx)`
+ *
+ *     Initialise the provided context. Previous contents of the structure
+ *     are ignored. This calls resets the context to the start of a new
+ *     hash computation; it also sets the first field of the context
+ *     structure (called `vtable`) to a pointer to the statically
+ *     allocated constant `br_xxx_vtable` structure.
+ *
+ *   - `br_xxx_update(br_xxx_context *ctx, const void *data, size_t len)`
+ *
+ *     Add some more bytes to the hash computation represented by the
+ *     provided context.
+ *
+ *   - `br_xxx_out(const br_xxx_context *ctx, void *out)`
+ *
+ *     Complete the hash computation and write the result in the provided
+ *     buffer. The output buffer MUST be large enough to accommodate the
+ *     result. The context is NOT modified by this operation, so this
+ *     function can be used to get a "partial hash" while still keeping
+ *     the possibility of adding more bytes to the input.
+ *
+ *   - `br_xxx_state(const br_xxx_context *ctx, void *out)`
+ *
+ *     Get a copy of the "current state" for the computation so far. For
+ *     MD functions (MD5, SHA-1, SHA-2 family), this is the running state
+ *     resulting from the processing of the last complete input block.
+ *     Returned value is the current input length (in bytes).
+ *
+ *   - `br_xxx_set_state(br_xxx_context *ctx, const void *stb, uint64_t count)`
+ *
+ *     Set the internal state to the provided values. The 'stb' and
+ *     'count' values shall match that which was obtained from
+ *     `br_xxx_state()`. This restores the hash state only if the state
+ *     values were at an appropriate block boundary. This does NOT set
+ *     the `vtable` pointer in the context.
+ *
+ * Context structures can be discarded without any explicit deallocation.
+ * Hash function implementations are purely software and don't reserve
+ * any resources outside of the context structure itself.
+ *
+ *
+ * ## Object-Oriented API
+ *
+ * For each hash function that follows the procedural API described
+ * above, an object-oriented API is also provided. In that API, function
+ * pointers from the vtable (`br_xxx_vtable`) are used. The vtable
+ * incarnates object-oriented programming. An introduction on the OOP
+ * concept used here can be read on the BearSSL Web site:<br />
+ * &nbsp;&nbsp;&nbsp;[https://www.bearssl.org/oop.html](https://www.bearssl.org/oop.html)
+ *
+ * The vtable offers functions called `init()`, `update()`, `out()`,
+ * `set()` and `set_state()`, which are in fact the functions from
+ * the procedural API. That vtable also contains two informative fields:
+ *
+ *   - `context_size`
+ *
+ *     The size of the context structure (`br_xxx_context`), in bytes.
+ *     This can be used by generic implementations to perform dynamic
+ *     context allocation.
+ *
+ *   - `desc`
+ *
+ *     A "descriptor" field that encodes some information on the hash
+ *     function: symbolic identifier, output size, state size,
+ *     internal block size, details on the padding.
+ *
+ * Users of this object-oriented API (in particular generic HMAC
+ * implementations) may make the following assumptions:
+ *
+ *   - Hash output size is no more than 64 bytes.
+ *   - Hash internal state size is no more than 64 bytes.
+ *   - Internal block size is a power of two, no less than 16 and no more
+ *     than 256.
+ *
+ *
+ * ## Implemented Hash Functions
+ *
+ * Implemented hash functions are:
+ *
+ * | Function  | Name    | Output length | State length |
+ * | :-------- | :------ | :-----------: | :----------: |
+ * | MD5       | md5     |     16        |     16       |
+ * | SHA-1     | sha1    |     20        |     20       |
+ * | SHA-224   | sha224  |     28        |     32       |
+ * | SHA-256   | sha256  |     32        |     32       |
+ * | SHA-384   | sha384  |     48        |     64       |
+ * | SHA-512   | sha512  |     64        |     64       |
+ * | MD5+SHA-1 | md5sha1 |     36        |     36       |
+ *
+ * (MD5+SHA-1 is the concatenation of MD5 and SHA-1 computed over the
+ * same input; in the implementation, the internal data buffer is
+ * shared, thus making it more memory-efficient than separate MD5 and
+ * SHA-1. It can be useful in implementing SSL 3.0, TLS 1.0 and TLS
+ * 1.1.)
+ *
+ *
+ * ## Multi-Hasher
+ *
+ * An aggregate hasher is provided, that can compute several standard
+ * hash functions in parallel. It uses `br_multihash_context` and a
+ * procedural API. It is configured with the implementations (the vtables)
+ * that it should use; it will then compute all these hash functions in
+ * parallel, on the same input. It is meant to be used in cases when the
+ * hash of an object will be used, but the exact hash function is not
+ * known yet (typically, streamed processing on X.509 certificates).
+ *
+ * Only the standard hash functions (MD5, SHA-1, SHA-224, SHA-256, SHA-384
+ * and SHA-512) are supported by the multi-hasher.
+ *
+ *
+ * ## GHASH
+ *
+ * GHASH is not a generic hash function; it is a _universal_ hash function,
+ * which, as the name does not say, means that it CANNOT be used in most
+ * places where a hash function is needed. GHASH is used within the GCM
+ * encryption mode, to provide the checked integrity functionality.
+ *
+ * A GHASH implementation is basically a function that uses the type defined
+ * in this file under the name `br_ghash`:
+ *
+ *     typedef void (*br_ghash)(void *y, const void *h, const void *data, size_t len);
+ *
+ * The `y` pointer refers to a 16-byte value which is used as input, and
+ * receives the output of the GHASH invocation. `h` is a 16-byte secret
+ * value (that serves as key). `data` and `len` define the input data.
+ *
+ * Three GHASH implementations are provided, all constant-time, based on
+ * the use of integer multiplications with appropriate masking to cancel
+ * carry propagation.
+ */
+
+/**
+ * \brief Class type for hash function implementations.
+ *
+ * A `br_hash_class` instance references the methods implementing a hash
+ * function. Constant instances of this structure are defined for each
+ * implemented hash function. Such instances are also called "vtables".
+ *
+ * Vtables are used to support object-oriented programming, as
+ * described on [the BearSSL Web site](https://www.bearssl.org/oop.html).
+ */
+typedef struct br_hash_class_ br_hash_class;
+struct br_hash_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate for
+	 * computing this hash function.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Descriptor word that contains information about the hash
+	 * function.
+	 *
+	 * For each word `xxx` described below, use `BR_HASHDESC_xxx_OFF`
+	 * and `BR_HASHDESC_xxx_MASK` to access the specific value, as
+	 * follows:
+	 *
+	 *     (hf->desc >> BR_HASHDESC_xxx_OFF) & BR_HASHDESC_xxx_MASK
+	 *
+	 * The defined elements are:
+	 *
+	 *  - `ID`: the symbolic identifier for the function, as defined
+	 *    in [TLS](https://tools.ietf.org/html/rfc5246#section-7.4.1.4.1)
+	 *    (MD5 = 1, SHA-1 = 2,...).
+	 *
+	 *  - `OUT`: hash output size, in bytes.
+	 *
+	 *  - `STATE`: internal running state size, in bytes.
+	 *
+	 *  - `LBLEN`: base-2 logarithm for the internal block size, as
+	 *    defined for HMAC processing (this is 6 for MD5, SHA-1, SHA-224
+	 *    and SHA-256, since these functions use 64-byte blocks; for
+	 *    SHA-384 and SHA-512, this is 7, corresponding to their
+	 *    128-byte blocks).
+	 *
+	 * The descriptor may contain a few other flags.
+	 */
+	uint32_t desc;
+
+	/**
+	 * \brief Initialisation method.
+	 *
+	 * This method takes as parameter a pointer to a context area,
+	 * that it initialises. The first field of the context is set
+	 * to this vtable; other elements are initialised for a new hash
+	 * computation.
+	 *
+	 * \param ctx   pointer to (the first field of) the context.
+	 */
+	void (*init)(const br_hash_class **ctx);
+
+	/**
+	 * \brief Data injection method.
+	 *
+	 * The `len` bytes starting at address `data` are injected into
+	 * the running hash computation incarnated by the specified
+	 * context. The context is updated accordingly. It is allowed
+	 * to have `len == 0`, in which case `data` is ignored (and could
+	 * be `NULL`), and nothing happens.
+	 * on the input data.
+	 *
+	 * \param ctx    pointer to (the first field of) the context.
+	 * \param data   pointer to the first data byte to inject.
+	 * \param len    number of bytes to inject.
+	 */
+	void (*update)(const br_hash_class **ctx, const void *data, size_t len);
+
+	/**
+	 * \brief Produce hash output.
+	 *
+	 * The hash output corresponding to all data bytes injected in the
+	 * context since the last `init()` call is computed, and written
+	 * in the buffer pointed to by `dst`. The hash output size depends
+	 * on the implemented hash function (e.g. 16 bytes for MD5).
+	 * The context is _not_ modified by this call, so further bytes
+	 * may be afterwards injected to continue the current computation.
+	 *
+	 * \param ctx   pointer to (the first field of) the context.
+	 * \param dst   destination buffer for the hash output.
+	 */
+	void (*out)(const br_hash_class *const *ctx, void *dst);
+
+	/**
+	 * \brief Get running state.
+	 *
+	 * This method saves the current running state into the `dst`
+	 * buffer. What constitutes the "running state" depends on the
+	 * hash function; for Merkle-Damgård hash functions (like
+	 * MD5 or SHA-1), this is the output obtained after processing
+	 * each block. The number of bytes injected so far is returned.
+	 * The context is not modified by this call.
+	 *
+	 * \param ctx   pointer to (the first field of) the context.
+	 * \param dst   destination buffer for the state.
+	 * \return  the injected total byte length.
+	 */
+	uint64_t (*state)(const br_hash_class *const *ctx, void *dst);
+
+	/**
+	 * \brief Set running state.
+	 *
+	 * This methods replaces the running state for the function.
+	 *
+	 * \param ctx     pointer to (the first field of) the context.
+	 * \param stb     source buffer for the state.
+	 * \param count   injected total byte length.
+	 */
+	void (*set_state)(const br_hash_class **ctx,
+		const void *stb, uint64_t count);
+};
+
+#ifndef BR_DOXYGEN_IGNORE
+#define BR_HASHDESC_ID(id)           ((uint32_t)(id) << BR_HASHDESC_ID_OFF)
+#define BR_HASHDESC_ID_OFF           0
+#define BR_HASHDESC_ID_MASK          0xFF
+
+#define BR_HASHDESC_OUT(size)        ((uint32_t)(size) << BR_HASHDESC_OUT_OFF)
+#define BR_HASHDESC_OUT_OFF          8
+#define BR_HASHDESC_OUT_MASK         0x7F
+
+#define BR_HASHDESC_STATE(size)      ((uint32_t)(size) << BR_HASHDESC_STATE_OFF)
+#define BR_HASHDESC_STATE_OFF        15
+#define BR_HASHDESC_STATE_MASK       0xFF
+
+#define BR_HASHDESC_LBLEN(ls)        ((uint32_t)(ls) << BR_HASHDESC_LBLEN_OFF)
+#define BR_HASHDESC_LBLEN_OFF        23
+#define BR_HASHDESC_LBLEN_MASK       0x0F
+
+#define BR_HASHDESC_MD_PADDING       ((uint32_t)1 << 28)
+#define BR_HASHDESC_MD_PADDING_128   ((uint32_t)1 << 29)
+#define BR_HASHDESC_MD_PADDING_BE    ((uint32_t)1 << 30)
+#endif
+
+/*
+ * Specific hash functions.
+ *
+ * Rules for contexts:
+ * -- No interior pointer.
+ * -- No pointer to external dynamically allocated resources.
+ * -- First field is called 'vtable' and is a pointer to a
+ *    const-qualified br_hash_class instance (pointer is set by init()).
+ * -- SHA-224 and SHA-256 contexts are identical.
+ * -- SHA-384 and SHA-512 contexts are identical.
+ *
+ * Thus, contexts can be moved and cloned to capture the hash function
+ * current state; and there is no need for any explicit "release" function.
+ */
+
+/**
+ * \brief Symbolic identifier for MD5.
+ */
+#define br_md5_ID     1
+
+/**
+ * \brief MD5 output size (in bytes).
+ */
+#define br_md5_SIZE   16
+
+/**
+ * \brief Constant vtable for MD5.
+ */
+extern const br_hash_class br_md5_vtable;
+
+/**
+ * \brief MD5 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char buf[64];
+	uint64_t count;
+	uint32_t val[4];
+#endif
+} br_md5_context;
+
+/**
+ * \brief MD5 context initialisation.
+ *
+ * This function initialises or resets a context for a new MD5
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_md5_init(br_md5_context *ctx);
+
+/**
+ * \brief Inject some data bytes in a running MD5 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_md5_update(br_md5_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute MD5 output.
+ *
+ * The MD5 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_md5_out(const br_md5_context *ctx, void *out);
+
+/**
+ * \brief Save MD5 running state.
+ *
+ * The running state for MD5 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_md5_state(const br_md5_context *ctx, void *out);
+
+/**
+ * \brief Restore MD5 running state.
+ *
+ * The running state for MD5 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_md5_set_state(br_md5_context *ctx, const void *stb, uint64_t count);
+
+/**
+ * \brief Symbolic identifier for SHA-1.
+ */
+#define br_sha1_ID     2
+
+/**
+ * \brief SHA-1 output size (in bytes).
+ */
+#define br_sha1_SIZE   20
+
+/**
+ * \brief Constant vtable for SHA-1.
+ */
+extern const br_hash_class br_sha1_vtable;
+
+/**
+ * \brief SHA-1 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char buf[64];
+	uint64_t count;
+	uint32_t val[5];
+#endif
+} br_sha1_context;
+
+/**
+ * \brief SHA-1 context initialisation.
+ *
+ * This function initialises or resets a context for a new SHA-1
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_sha1_init(br_sha1_context *ctx);
+
+/**
+ * \brief Inject some data bytes in a running SHA-1 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_sha1_update(br_sha1_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute SHA-1 output.
+ *
+ * The SHA-1 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_sha1_out(const br_sha1_context *ctx, void *out);
+
+/**
+ * \brief Save SHA-1 running state.
+ *
+ * The running state for SHA-1 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_sha1_state(const br_sha1_context *ctx, void *out);
+
+/**
+ * \brief Restore SHA-1 running state.
+ *
+ * The running state for SHA-1 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_sha1_set_state(br_sha1_context *ctx, const void *stb, uint64_t count);
+
+/**
+ * \brief Symbolic identifier for SHA-224.
+ */
+#define br_sha224_ID     3
+
+/**
+ * \brief SHA-224 output size (in bytes).
+ */
+#define br_sha224_SIZE   28
+
+/**
+ * \brief Constant vtable for SHA-224.
+ */
+extern const br_hash_class br_sha224_vtable;
+
+/**
+ * \brief SHA-224 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char buf[64];
+	uint64_t count;
+	uint32_t val[8];
+#endif
+} br_sha224_context;
+
+/**
+ * \brief SHA-224 context initialisation.
+ *
+ * This function initialises or resets a context for a new SHA-224
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_sha224_init(br_sha224_context *ctx);
+
+/**
+ * \brief Inject some data bytes in a running SHA-224 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_sha224_update(br_sha224_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute SHA-224 output.
+ *
+ * The SHA-224 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_sha224_out(const br_sha224_context *ctx, void *out);
+
+/**
+ * \brief Save SHA-224 running state.
+ *
+ * The running state for SHA-224 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_sha224_state(const br_sha224_context *ctx, void *out);
+
+/**
+ * \brief Restore SHA-224 running state.
+ *
+ * The running state for SHA-224 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_sha224_set_state(br_sha224_context *ctx,
+	const void *stb, uint64_t count);
+
+/**
+ * \brief Symbolic identifier for SHA-256.
+ */
+#define br_sha256_ID     4
+
+/**
+ * \brief SHA-256 output size (in bytes).
+ */
+#define br_sha256_SIZE   32
+
+/**
+ * \brief Constant vtable for SHA-256.
+ */
+extern const br_hash_class br_sha256_vtable;
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief SHA-256 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+} br_sha256_context;
+#else
+typedef br_sha224_context br_sha256_context;
+#endif
+
+/**
+ * \brief SHA-256 context initialisation.
+ *
+ * This function initialises or resets a context for a new SHA-256
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_sha256_init(br_sha256_context *ctx);
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief Inject some data bytes in a running SHA-256 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_sha256_update(br_sha256_context *ctx, const void *data, size_t len);
+#else
+#define br_sha256_update      br_sha224_update
+#endif
+
+/**
+ * \brief Compute SHA-256 output.
+ *
+ * The SHA-256 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_sha256_out(const br_sha256_context *ctx, void *out);
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief Save SHA-256 running state.
+ *
+ * The running state for SHA-256 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_sha256_state(const br_sha256_context *ctx, void *out);
+#else
+#define br_sha256_state       br_sha224_state
+#endif
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief Restore SHA-256 running state.
+ *
+ * The running state for SHA-256 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_sha256_set_state(br_sha256_context *ctx,
+	const void *stb, uint64_t count);
+#else
+#define br_sha256_set_state   br_sha224_set_state
+#endif
+
+/**
+ * \brief Symbolic identifier for SHA-384.
+ */
+#define br_sha384_ID     5
+
+/**
+ * \brief SHA-384 output size (in bytes).
+ */
+#define br_sha384_SIZE   48
+
+/**
+ * \brief Constant vtable for SHA-384.
+ */
+extern const br_hash_class br_sha384_vtable;
+
+/**
+ * \brief SHA-384 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char buf[128];
+	uint64_t count;
+	uint64_t val[8];
+#endif
+} br_sha384_context;
+
+/**
+ * \brief SHA-384 context initialisation.
+ *
+ * This function initialises or resets a context for a new SHA-384
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_sha384_init(br_sha384_context *ctx);
+
+/**
+ * \brief Inject some data bytes in a running SHA-384 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_sha384_update(br_sha384_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute SHA-384 output.
+ *
+ * The SHA-384 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_sha384_out(const br_sha384_context *ctx, void *out);
+
+/**
+ * \brief Save SHA-384 running state.
+ *
+ * The running state for SHA-384 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_sha384_state(const br_sha384_context *ctx, void *out);
+
+/**
+ * \brief Restore SHA-384 running state.
+ *
+ * The running state for SHA-384 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_sha384_set_state(br_sha384_context *ctx,
+	const void *stb, uint64_t count);
+
+/**
+ * \brief Symbolic identifier for SHA-512.
+ */
+#define br_sha512_ID     6
+
+/**
+ * \brief SHA-512 output size (in bytes).
+ */
+#define br_sha512_SIZE   64
+
+/**
+ * \brief Constant vtable for SHA-512.
+ */
+extern const br_hash_class br_sha512_vtable;
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief SHA-512 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+} br_sha512_context;
+#else
+typedef br_sha384_context br_sha512_context;
+#endif
+
+/**
+ * \brief SHA-512 context initialisation.
+ *
+ * This function initialises or resets a context for a new SHA-512
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_sha512_init(br_sha512_context *ctx);
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief Inject some data bytes in a running SHA-512 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_sha512_update(br_sha512_context *ctx, const void *data, size_t len);
+#else
+#define br_sha512_update   br_sha384_update
+#endif
+
+/**
+ * \brief Compute SHA-512 output.
+ *
+ * The SHA-512 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_sha512_out(const br_sha512_context *ctx, void *out);
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief Save SHA-512 running state.
+ *
+ * The running state for SHA-512 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_sha512_state(const br_sha512_context *ctx, void *out);
+#else
+#define br_sha512_state   br_sha384_state
+#endif
+
+#ifdef BR_DOXYGEN_IGNORE
+/**
+ * \brief Restore SHA-512 running state.
+ *
+ * The running state for SHA-512 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_sha512_set_state(br_sha512_context *ctx,
+	const void *stb, uint64_t count);
+#else
+#define br_sha512_set_state   br_sha384_set_state
+#endif
+
+/*
+ * "md5sha1" is a special hash function that computes both MD5 and SHA-1
+ * on the same input, and produces a 36-byte output (MD5 and SHA-1
+ * concatenation, in that order). State size is also 36 bytes.
+ */
+
+/**
+ * \brief Symbolic identifier for MD5+SHA-1.
+ *
+ * MD5+SHA-1 is the concatenation of MD5 and SHA-1, computed over the
+ * same input. It is not one of the functions identified in TLS, so
+ * we give it a symbolic identifier of value 0.
+ */
+#define br_md5sha1_ID     0
+
+/**
+ * \brief MD5+SHA-1 output size (in bytes).
+ */
+#define br_md5sha1_SIZE   36
+
+/**
+ * \brief Constant vtable for MD5+SHA-1.
+ */
+extern const br_hash_class br_md5sha1_vtable;
+
+/**
+ * \brief MD5+SHA-1 context.
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to vtable for this context.
+	 */
+	const br_hash_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char buf[64];
+	uint64_t count;
+	uint32_t val_md5[4];
+	uint32_t val_sha1[5];
+#endif
+} br_md5sha1_context;
+
+/**
+ * \brief MD5+SHA-1 context initialisation.
+ *
+ * This function initialises or resets a context for a new SHA-512
+ * computation. It also sets the vtable pointer.
+ *
+ * \param ctx   pointer to the context structure.
+ */
+void br_md5sha1_init(br_md5sha1_context *ctx);
+
+/**
+ * \brief Inject some data bytes in a running MD5+SHA-1 computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_md5sha1_update(br_md5sha1_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute MD5+SHA-1 output.
+ *
+ * The MD5+SHA-1 output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `out`. The context
+ * itself is not modified, so extra bytes may be injected afterwards
+ * to continue that computation.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the hash output.
+ */
+void br_md5sha1_out(const br_md5sha1_context *ctx, void *out);
+
+/**
+ * \brief Save MD5+SHA-1 running state.
+ *
+ * The running state for MD5+SHA-1 (output of the last internal block
+ * processing) is written in the buffer pointed to by `out`. The
+ * number of bytes injected since the last initialisation or reset
+ * call is returned. The context is not modified.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param out   destination buffer for the running state.
+ * \return  the injected total byte length.
+ */
+uint64_t br_md5sha1_state(const br_md5sha1_context *ctx, void *out);
+
+/**
+ * \brief Restore MD5+SHA-1 running state.
+ *
+ * The running state for MD5+SHA-1 is set to the provided values.
+ *
+ * \param ctx     pointer to the context structure.
+ * \param stb     source buffer for the running state.
+ * \param count   the injected total byte length.
+ */
+void br_md5sha1_set_state(br_md5sha1_context *ctx,
+	const void *stb, uint64_t count);
+
+/**
+ * \brief Aggregate context for configurable hash function support.
+ *
+ * The `br_hash_compat_context` type is a type which is large enough to
+ * serve as context for all standard hash functions defined above.
+ */
+typedef union {
+	const br_hash_class *vtable;
+	br_md5_context md5;
+	br_sha1_context sha1;
+	br_sha224_context sha224;
+	br_sha256_context sha256;
+	br_sha384_context sha384;
+	br_sha512_context sha512;
+	br_md5sha1_context md5sha1;
+} br_hash_compat_context;
+
+/*
+ * The multi-hasher is a construct that handles hashing of the same input
+ * data with several hash functions, with a single shared input buffer.
+ * It can handle MD5, SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512
+ * simultaneously, though which functions are activated depends on
+ * the set implementation pointers.
+ */
+
+/**
+ * \brief Multi-hasher context structure.
+ *
+ * The multi-hasher runs up to six hash functions in the standard TLS list
+ * (MD5, SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512) in parallel, over
+ * the same input.
+ *
+ * The multi-hasher does _not_ follow the OOP structure with a vtable.
+ * Instead, it is configured with the vtables of the hash functions it
+ * should run. Structure fields are not supposed to be accessed directly.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char buf[128];
+	uint64_t count;
+	uint32_t val_32[25];
+	uint64_t val_64[16];
+	const br_hash_class *impl[6];
+#endif
+} br_multihash_context;
+
+/**
+ * \brief Clear a multi-hasher context.
+ *
+ * This should always be called once on a given context, _before_ setting
+ * the implementation pointers.
+ *
+ * \param ctx   the multi-hasher context.
+ */
+void br_multihash_zero(br_multihash_context *ctx);
+
+/**
+ * \brief Set a hash function implementation.
+ *
+ * Implementations shall be set _after_ clearing the context (with
+ * `br_multihash_zero()`) but _before_ initialising the computation
+ * (with `br_multihash_init()`). The hash function implementation
+ * MUST be one of the standard hash functions (MD5, SHA-1, SHA-224,
+ * SHA-256, SHA-384 or SHA-512); it may also be `NULL` to remove
+ * an implementation from the multi-hasher.
+ *
+ * \param ctx    the multi-hasher context.
+ * \param id     the hash function symbolic identifier.
+ * \param impl   the hash function vtable, or `NULL`.
+ */
+static inline void
+br_multihash_setimpl(br_multihash_context *ctx,
+	int id, const br_hash_class *impl)
+{
+	/*
+	 * This code relies on hash functions ID being values 1 to 6,
+	 * in the MD5 to SHA-512 order.
+	 */
+	ctx->impl[id - 1] = impl;
+}
+
+/**
+ * \brief Get a hash function implementation.
+ *
+ * This function returns the currently configured vtable for a given
+ * hash function (by symbolic ID). If no such function was configured in
+ * the provided multi-hasher context, then this function returns `NULL`.
+ *
+ * \param ctx    the multi-hasher context.
+ * \param id     the hash function symbolic identifier.
+ * \return  the hash function vtable, or `NULL`.
+ */
+static inline const br_hash_class *
+br_multihash_getimpl(const br_multihash_context *ctx, int id)
+{
+	return ctx->impl[id - 1];
+}
+
+/**
+ * \brief Reset a multi-hasher context.
+ *
+ * This function prepares the context for a new hashing computation,
+ * for all implementations configured at that point.
+ *
+ * \param ctx    the multi-hasher context.
+ */
+void br_multihash_init(br_multihash_context *ctx);
+
+/**
+ * \brief Inject some data bytes in a running multi-hashing computation.
+ *
+ * The provided context is updated with some data bytes. If the number
+ * of bytes (`len`) is zero, then the data pointer (`data`) is ignored
+ * and may be `NULL`, and this function does nothing.
+ *
+ * \param ctx    pointer to the context structure.
+ * \param data   pointer to the injected data.
+ * \param len    injected data length (in bytes).
+ */
+void br_multihash_update(br_multihash_context *ctx,
+	const void *data, size_t len);
+
+/**
+ * \brief Compute a hash output from a multi-hasher.
+ *
+ * The hash output for the concatenation of all bytes injected in the
+ * provided context since the last initialisation or reset call, is
+ * computed and written in the buffer pointed to by `dst`. The hash
+ * function to use is identified by `id` and must be one of the standard
+ * hash functions. If that hash function was indeed configured in the
+ * multi-hasher context, the corresponding hash value is written in
+ * `dst` and its length (in bytes) is returned. If the hash function
+ * was _not_ configured, then nothing is written in `dst` and 0 is
+ * returned.
+ *
+ * The context itself is not modified, so extra bytes may be injected
+ * afterwards to continue the hash computations.
+ *
+ * \param ctx   pointer to the context structure.
+ * \param id    the hash function symbolic identifier.
+ * \param dst   destination buffer for the hash output.
+ * \return  the hash output length (in bytes), or 0.
+ */
+size_t br_multihash_out(const br_multihash_context *ctx, int id, void *dst);
+
+/**
+ * \brief Type for a GHASH implementation.
+ *
+ * GHASH is a sort of keyed hash meant to be used to implement GCM in
+ * combination with a block cipher (with 16-byte blocks).
+ *
+ * The `y` array has length 16 bytes and is used for input and output; in
+ * a complete GHASH run, it starts with an all-zero value. `h` is a 16-byte
+ * value that serves as key (it is derived from the encryption key in GCM,
+ * using the block cipher). The data length (`len`) is expressed in bytes.
+ * The `y` array is updated.
+ *
+ * If the data length is not a multiple of 16, then the data is implicitly
+ * padded with zeros up to the next multiple of 16. Thus, when using GHASH
+ * in GCM, this method may be called twice, for the associated data and
+ * for the ciphertext, respectively; the zero-padding implements exactly
+ * the GCM rules.
+ *
+ * \param y      the array to update.
+ * \param h      the GHASH key.
+ * \param data   the input data (may be `NULL` if `len` is zero).
+ * \param len    the input data length (in bytes).
+ */
+typedef void (*br_ghash)(void *y, const void *h, const void *data, size_t len);
+
+/**
+ * \brief GHASH implementation using multiplications (mixed 32-bit).
+ *
+ * This implementation uses multiplications of 32-bit values, with a
+ * 64-bit result. It is constant-time (if multiplications are
+ * constant-time).
+ *
+ * \param y      the array to update.
+ * \param h      the GHASH key.
+ * \param data   the input data (may be `NULL` if `len` is zero).
+ * \param len    the input data length (in bytes).
+ */
+void br_ghash_ctmul(void *y, const void *h, const void *data, size_t len);
+
+/**
+ * \brief GHASH implementation using multiplications (strict 32-bit).
+ *
+ * This implementation uses multiplications of 32-bit values, with a
+ * 32-bit result. It is usually somewhat slower than `br_ghash_ctmul()`,
+ * but it is expected to be faster on architectures for which the
+ * 32-bit multiplication opcode does not yield the upper 32 bits of the
+ * product. It is constant-time (if multiplications are constant-time).
+ *
+ * \param y      the array to update.
+ * \param h      the GHASH key.
+ * \param data   the input data (may be `NULL` if `len` is zero).
+ * \param len    the input data length (in bytes).
+ */
+void br_ghash_ctmul32(void *y, const void *h, const void *data, size_t len);
+
+/**
+ * \brief GHASH implementation using multiplications (64-bit).
+ *
+ * This implementation uses multiplications of 64-bit values, with a
+ * 64-bit result. It is constant-time (if multiplications are
+ * constant-time). It is substantially faster than `br_ghash_ctmul()`
+ * and `br_ghash_ctmul32()` on most 64-bit architectures.
+ *
+ * \param y      the array to update.
+ * \param h      the GHASH key.
+ * \param data   the input data (may be `NULL` if `len` is zero).
+ * \param len    the input data length (in bytes).
+ */
+void br_ghash_ctmul64(void *y, const void *h, const void *data, size_t len);
+
+/**
+ * \brief GHASH implementation using the `pclmulqdq` opcode (part of the
+ * AES-NI instructions).
+ *
+ * This implementation is available only on x86 platforms where the
+ * compiler supports the relevant intrinsic functions. Even if the
+ * compiler supports these functions, the local CPU might not support
+ * the `pclmulqdq` opcode, meaning that a call will fail with an
+ * illegal instruction exception. To safely obtain a pointer to this
+ * function when supported (or 0 otherwise), use `br_ghash_pclmul_get()`.
+ *
+ * \param y      the array to update.
+ * \param h      the GHASH key.
+ * \param data   the input data (may be `NULL` if `len` is zero).
+ * \param len    the input data length (in bytes).
+ */
+void br_ghash_pclmul(void *y, const void *h, const void *data, size_t len);
+
+/**
+ * \brief Obtain the `pclmul` GHASH implementation, if available.
+ *
+ * If the `pclmul` implementation was compiled in the library (depending
+ * on the compiler abilities) _and_ the local CPU appears to support the
+ * opcode, then this function will return a pointer to the
+ * `br_ghash_pclmul()` function. Otherwise, it will return `0`.
+ *
+ * \return  the `pclmul` GHASH implementation, or `0`.
+ */
+br_ghash br_ghash_pclmul_get(void);
+
+/**
+ * \brief GHASH implementation using the POWER8 opcodes.
+ *
+ * This implementation is available only on POWER8 platforms (and later).
+ * To safely obtain a pointer to this function when supported (or 0
+ * otherwise), use `br_ghash_pwr8_get()`.
+ *
+ * \param y      the array to update.
+ * \param h      the GHASH key.
+ * \param data   the input data (may be `NULL` if `len` is zero).
+ * \param len    the input data length (in bytes).
+ */
+void br_ghash_pwr8(void *y, const void *h, const void *data, size_t len);
+
+/**
+ * \brief Obtain the `pwr8` GHASH implementation, if available.
+ *
+ * If the `pwr8` implementation was compiled in the library (depending
+ * on the compiler abilities) _and_ the local CPU appears to support the
+ * opcode, then this function will return a pointer to the
+ * `br_ghash_pwr8()` function. Otherwise, it will return `0`.
+ *
+ * \return  the `pwr8` GHASH implementation, or `0`.
+ */
+br_ghash br_ghash_pwr8_get(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_hmac.h b/third_party/bearssl/inc/bearssl_hmac.h
new file mode 100644
index 0000000..4dc01ca
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_hmac.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_HMAC_H__
+#define BR_BEARSSL_HMAC_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_hash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_hmac.h
+ *
+ * # HMAC
+ *
+ * HMAC is initialized with a key and an underlying hash function; it
+ * then fills a "key context". That context contains the processed
+ * key.
+ *
+ * With the key context, a HMAC context can be initialized to process
+ * the input bytes and obtain the MAC output. The key context is not
+ * modified during that process, and can be reused.
+ *
+ * IMPORTANT: HMAC shall be used only with functions that have the
+ * following properties:
+ *
+ *   - hash output size does not exceed 64 bytes;
+ *   - hash internal state size does not exceed 64 bytes;
+ *   - internal block length is a power of 2 between 16 and 256 bytes.
+ */
+
+/**
+ * \brief HMAC key context.
+ *
+ * The HMAC key context is initialised with a hash function implementation
+ * and a secret key. Contents are opaque (callers should not access them
+ * directly). The caller is responsible for allocating the context where
+ * appropriate. Context initialisation and usage incurs no dynamic
+ * allocation, so there is no release function.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	const br_hash_class *dig_vtable;
+	unsigned char ksi[64], kso[64];
+#endif
+} br_hmac_key_context;
+
+/**
+ * \brief HMAC key context initialisation.
+ *
+ * Initialise the key context with the provided key, using the hash function
+ * identified by `digest_vtable`. This supports arbitrary key lengths.
+ *
+ * \param kc              HMAC key context to initialise.
+ * \param digest_vtable   pointer to the hash function implementation vtable.
+ * \param key             pointer to the HMAC secret key.
+ * \param key_len         HMAC secret key length (in bytes).
+ */
+void br_hmac_key_init(br_hmac_key_context *kc,
+	const br_hash_class *digest_vtable, const void *key, size_t key_len);
+
+/*
+ * \brief Get the underlying hash function.
+ *
+ * This function returns a pointer to the implementation vtable of the
+ * hash function used for this HMAC key context.
+ *
+ * \param kc   HMAC key context.
+ * \return  the hash function implementation.
+ */
+static inline const br_hash_class *br_hmac_key_get_digest(
+	const br_hmac_key_context *kc)
+{
+	return kc->dig_vtable;
+}
+
+/**
+ * \brief HMAC computation context.
+ *
+ * The HMAC computation context maintains the state for a single HMAC
+ * computation. It is modified as input bytes are injected. The context
+ * is caller-allocated and has no release function since it does not
+ * dynamically allocate external resources. Its contents are opaque.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	br_hash_compat_context dig;
+	unsigned char kso[64];
+	size_t out_len;
+#endif
+} br_hmac_context;
+
+/**
+ * \brief HMAC computation initialisation.
+ *
+ * Initialise a HMAC context with a key context. The key context is
+ * unmodified. Relevant data from the key context is immediately copied;
+ * the key context can thus be independently reused, modified or released
+ * without impacting this HMAC computation.
+ *
+ * An explicit output length can be specified; the actual output length
+ * will be the minimum of that value and the natural HMAC output length.
+ * If `out_len` is 0, then the natural HMAC output length is selected. The
+ * "natural output length" is the output length of the underlying hash
+ * function.
+ *
+ * \param ctx       HMAC context to initialise.
+ * \param kc        HMAC key context (already initialised with the key).
+ * \param out_len   HMAC output length (0 to select "natural length").
+ */
+void br_hmac_init(br_hmac_context *ctx,
+	const br_hmac_key_context *kc, size_t out_len);
+
+/**
+ * \brief Get the HMAC output size.
+ *
+ * The HMAC output size is the number of bytes that will actually be
+ * produced with `br_hmac_out()` with the provided context. This function
+ * MUST NOT be called on a non-initialised HMAC computation context.
+ * The returned value is the minimum of the HMAC natural length (output
+ * size of the underlying hash function) and the `out_len` parameter which
+ * was used with the last `br_hmac_init()` call on that context (if the
+ * initialisation `out_len` parameter was 0, then this function will
+ * return the HMAC natural length).
+ *
+ * \param ctx   the (already initialised) HMAC computation context.
+ * \return  the HMAC actual output size.
+ */
+static inline size_t
+br_hmac_size(br_hmac_context *ctx)
+{
+	return ctx->out_len;
+}
+
+/*
+ * \brief Get the underlying hash function.
+ *
+ * This function returns a pointer to the implementation vtable of the
+ * hash function used for this HMAC context.
+ *
+ * \param hc   HMAC context.
+ * \return  the hash function implementation.
+ */
+static inline const br_hash_class *br_hmac_get_digest(
+	const br_hmac_context *hc)
+{
+	return hc->dig.vtable;
+}
+
+/**
+ * \brief Inject some bytes in HMAC.
+ *
+ * The provided `len` bytes are injected as extra input in the HMAC
+ * computation incarnated by the `ctx` HMAC context. It is acceptable
+ * that `len` is zero, in which case `data` is ignored (and may be
+ * `NULL`) and this function does nothing.
+ */
+void br_hmac_update(br_hmac_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute the HMAC output.
+ *
+ * The destination buffer MUST be large enough to accommodate the result;
+ * its length is at most the "natural length" of HMAC (i.e. the output
+ * length of the underlying hash function). The context is NOT modified;
+ * further bytes may be processed. Thus, "partial HMAC" values can be
+ * efficiently obtained.
+ *
+ * Returned value is the output length (in bytes).
+ *
+ * \param ctx   HMAC computation context.
+ * \param out   destination buffer for the HMAC output.
+ * \return  the produced value length (in bytes).
+ */
+size_t br_hmac_out(const br_hmac_context *ctx, void *out);
+
+/**
+ * \brief Constant-time HMAC computation.
+ *
+ * This function compute the HMAC output in constant time. Some extra
+ * input bytes are processed, then the output is computed. The extra
+ * input consists in the `len` bytes pointed to by `data`. The `len`
+ * parameter must lie between `min_len` and `max_len` (inclusive);
+ * `max_len` bytes are actually read from `data`. Computing time (and
+ * memory access pattern) will not depend upon the data byte contents or
+ * the value of `len`.
+ *
+ * The output is written in the `out` buffer, that MUST be large enough
+ * to receive it.
+ *
+ * The difference `max_len - min_len` MUST be less than 2<sup>30</sup>
+ * (i.e. about one gigabyte).
+ *
+ * This function computes the output properly only if the underlying
+ * hash function uses MD padding (i.e. MD5, SHA-1, SHA-224, SHA-256,
+ * SHA-384 or SHA-512).
+ *
+ * The provided context is NOT modified.
+ *
+ * \param ctx       the (already initialised) HMAC computation context.
+ * \param data      the extra input bytes.
+ * \param len       the extra input length (in bytes).
+ * \param min_len   minimum extra input length (in bytes).
+ * \param max_len   maximum extra input length (in bytes).
+ * \param out       destination buffer for the HMAC output.
+ * \return  the produced value length (in bytes).
+ */
+size_t br_hmac_outCT(const br_hmac_context *ctx,
+	const void *data, size_t len, size_t min_len, size_t max_len,
+	void *out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_kdf.h b/third_party/bearssl/inc/bearssl_kdf.h
new file mode 100644
index 0000000..955b843
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_kdf.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_KDF_H__
+#define BR_BEARSSL_KDF_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_hash.h"
+#include "bearssl_hmac.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_kdf.h
+ *
+ * # Key Derivation Functions
+ *
+ * KDF are functions that takes a variable length input, and provide a
+ * variable length output, meant to be used to derive subkeys from a
+ * master key.
+ *
+ * ## HKDF
+ *
+ * HKDF is a KDF defined by [RFC 5869](https://tools.ietf.org/html/rfc5869).
+ * It is based on HMAC, itself using an underlying hash function. Any
+ * hash function can be used, as long as it is compatible with the rules
+ * for the HMAC implementation (i.e. output size is 64 bytes or less, hash
+ * internal state size is 64 bytes or less, and the internal block length is
+ * a power of 2 between 16 and 256 bytes). HKDF has two phases:
+ *
+ *  - HKDF-Extract: the input data in ingested, along with a "salt" value.
+ *
+ *  - HKDF-Expand: the output is produced, from the result of processing
+ *    the input and salt, and using an extra non-secret parameter called
+ *    "info".
+ *
+ * The "salt" and "info" strings are non-secret and can be empty. Their role
+ * is normally to bind the input and output, respectively, to conventional
+ * identifiers that qualifu them within the used protocol or application.
+ *
+ * The implementation defined in this file uses the following functions:
+ *
+ *  - `br_hkdf_init()`: initialize an HKDF context, with a hash function,
+ *    and the salt. This starts the HKDF-Extract process.
+ *
+ *  - `br_hkdf_inject()`: inject more input bytes. This function may be
+ *    called repeatedly if the input data is provided by chunks.
+ *
+ *  - `br_hkdf_flip()`: end the HKDF-Extract process, and start the
+ *    HKDF-Expand process.
+ *
+ *  - `br_hkdf_produce()`: get the next bytes of output. This function
+ *    may be called several times to obtain the full output by chunks.
+ *    For correct HKDF processing, the same "info" string must be
+ *    provided for each call.
+ *
+ * Note that the HKDF total output size (the number of bytes that
+ * HKDF-Expand is willing to produce) is limited: if the hash output size
+ * is _n_ bytes, then the maximum output size is _255*n_.
+ *
+ * ## SHAKE
+ *
+ * SHAKE is defined in
+ * [FIPS 202](https://csrc.nist.gov/publications/detail/fips/202/final)
+ * under two versions: SHAKE128 and SHAKE256, offering an alleged
+ * "security level" of 128 and 256 bits, respectively (SHAKE128 is
+ * about 20 to 25% faster than SHAKE256). SHAKE internally relies on
+ * the Keccak family of sponge functions, not on any externally provided
+ * hash function. Contrary to HKDF, SHAKE does not have a concept of
+ * either a "salt" or an "info" string. The API consists in four
+ * functions:
+ *
+ *  - `br_shake_init()`: initialize a SHAKE context for a given
+ *    security level.
+ *
+ *  - `br_shake_inject()`: inject more input bytes. This function may be
+ *    called repeatedly if the input data is provided by chunks.
+ *
+ *  - `br_shake_flip()`: end the data injection process, and start the
+ *    data production process.
+ *
+ *  - `br_shake_produce()`: get the next bytes of output. This function
+ *    may be called several times to obtain the full output by chunks.
+ */
+
+/**
+ * \brief HKDF context.
+ *
+ * The HKDF context is initialized with a hash function implementation
+ * and a salt value. Contents are opaque (callers should not access them
+ * directly). The caller is responsible for allocating the context where
+ * appropriate. Context initialisation and usage incurs no dynamic
+ * allocation, so there is no release function.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		br_hmac_context hmac_ctx;
+		br_hmac_key_context prk_ctx;
+	} u;
+	unsigned char buf[64];
+	size_t ptr;
+	size_t dig_len;
+	unsigned chunk_num;
+#endif
+} br_hkdf_context;
+
+/**
+ * \brief HKDF context initialization.
+ *
+ * The underlying hash function and salt value are provided. Arbitrary
+ * salt lengths can be used.
+ *
+ * HKDF makes a difference between a salt of length zero, and an
+ * absent salt (the latter being equivalent to a salt consisting of
+ * bytes of value zero, of the same length as the hash function output).
+ * If `salt_len` is zero, then this function assumes that the salt is
+ * present but of length zero. To specify an _absent_ salt, use
+ * `BR_HKDF_NO_SALT` as `salt` parameter (`salt_len` is then ignored).
+ *
+ * \param hc              HKDF context to initialise.
+ * \param digest_vtable   pointer to the hash function implementation vtable.
+ * \param salt            HKDF-Extract salt.
+ * \param salt_len        HKDF-Extract salt length (in bytes).
+ */
+void br_hkdf_init(br_hkdf_context *hc, const br_hash_class *digest_vtable,
+	const void *salt, size_t salt_len);
+
+/**
+ * \brief The special "absent salt" value for HKDF.
+ */
+#define BR_HKDF_NO_SALT   (&br_hkdf_no_salt)
+
+#ifndef BR_DOXYGEN_IGNORE
+extern const unsigned char br_hkdf_no_salt;
+#endif
+
+/**
+ * \brief HKDF input injection (HKDF-Extract).
+ *
+ * This function injects some more input bytes ("key material") into
+ * HKDF. This function may be called several times, after `br_hkdf_init()`
+ * but before `br_hkdf_flip()`.
+ *
+ * \param hc        HKDF context.
+ * \param ikm       extra input bytes.
+ * \param ikm_len   number of extra input bytes.
+ */
+void br_hkdf_inject(br_hkdf_context *hc, const void *ikm, size_t ikm_len);
+
+/**
+ * \brief HKDF switch to the HKDF-Expand phase.
+ *
+ * This call terminates the HKDF-Extract process (input injection), and
+ * starts the HKDF-Expand process (output production).
+ *
+ * \param hc   HKDF context.
+ */
+void br_hkdf_flip(br_hkdf_context *hc);
+
+/**
+ * \brief HKDF output production (HKDF-Expand).
+ *
+ * Produce more output bytes from the current state. This function may be
+ * called several times, but only after `br_hkdf_flip()`.
+ *
+ * Returned value is the number of actually produced bytes. The total
+ * output length is limited to 255 times the output length of the
+ * underlying hash function.
+ *
+ * \param hc         HKDF context.
+ * \param info       application specific information string.
+ * \param info_len   application specific information string length (in bytes).
+ * \param out        destination buffer for the HKDF output.
+ * \param out_len    the length of the requested output (in bytes).
+ * \return  the produced output length (in bytes).
+ */
+size_t br_hkdf_produce(br_hkdf_context *hc,
+	const void *info, size_t info_len, void *out, size_t out_len);
+
+/**
+ * \brief SHAKE context.
+ *
+ * The HKDF context is initialized with a "security level". The internal
+ * notion is called "capacity"; the capacity is twice the security level
+ * (for instance, SHAKE128 has capacity 256).
+ *
+ * The caller is responsible for allocating the context where
+ * appropriate. Context initialisation and usage incurs no dynamic
+ * allocation, so there is no release function.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char dbuf[200];
+	size_t dptr;
+	size_t rate;
+	uint64_t A[25];
+#endif
+} br_shake_context;
+
+/**
+ * \brief SHAKE context initialization.
+ *
+ * The context is initialized for the provided "security level".
+ * Internally, this sets the "capacity" to twice the security level;
+ * thus, for SHAKE128, the `security_level` parameter should be 128,
+ * which corresponds to a 256-bit capacity.
+ *
+ * Allowed security levels are all multiples of 32, from 32 to 768,
+ * inclusive. Larger security levels imply lower performance; levels
+ * beyond 256 bits don't make much sense. Standard levels are 128
+ * and 256 bits (for SHAKE128 and SHAKE256, respectively).
+ *
+ * \param sc               SHAKE context to initialise.
+ * \param security_level   security level (in bits).
+ */
+void br_shake_init(br_shake_context *sc, int security_level);
+
+/**
+ * \brief SHAKE input injection.
+ *
+ * This function injects some more input bytes ("key material") into
+ * SHAKE. This function may be called several times, after `br_shake_init()`
+ * but before `br_shake_flip()`.
+ *
+ * \param sc     SHAKE context.
+ * \param data   extra input bytes.
+ * \param len    number of extra input bytes.
+ */
+void br_shake_inject(br_shake_context *sc, const void *data, size_t len);
+
+/**
+ * \brief SHAKE switch to production phase.
+ *
+ * This call terminates the input injection process, and starts the
+ * output production process.
+ *
+ * \param sc   SHAKE context.
+ */
+void br_shake_flip(br_shake_context *hc);
+
+/**
+ * \brief SHAKE output production.
+ *
+ * Produce more output bytes from the current state. This function may be
+ * called several times, but only after `br_shake_flip()`.
+ *
+ * There is no practical limit to the number of bytes that may be produced.
+ *
+ * \param sc    SHAKE context.
+ * \param out   destination buffer for the SHAKE output.
+ * \param len   the length of the requested output (in bytes).
+ */
+void br_shake_produce(br_shake_context *sc, void *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_pem.h b/third_party/bearssl/inc/bearssl_pem.h
new file mode 100644
index 0000000..8dba582
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_pem.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_PEM_H__
+#define BR_BEARSSL_PEM_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_pem.h
+ *
+ * # PEM Support
+ *
+ * PEM is a traditional encoding layer use to store binary objects (in
+ * particular X.509 certificates, and private keys) in text files. While
+ * the acronym comes from an old, defunct standard ("Privacy Enhanced
+ * Mail"), the format has been reused, with some variations, by many
+ * systems, and is a _de facto_ standard, even though it is not, actually,
+ * specified in all clarity anywhere.
+ *
+ * ## Format Details
+ *
+ * BearSSL contains a generic, streamed PEM decoder, which handles the
+ * following format:
+ *
+ *   - The input source (a sequence of bytes) is assumed to be the
+ *     encoding of a text file in an ASCII-compatible charset. This
+ *     includes ISO-8859-1, Windows-1252, and UTF-8 encodings. Each
+ *     line ends on a newline character (U+000A LINE FEED). The
+ *     U+000D CARRIAGE RETURN characters are ignored, so the code
+ *     accepts both Windows-style and Unix-style line endings.
+ *
+ *   - Each object begins with a banner that occurs at the start of
+ *     a line; the first banner characters are "`-----BEGIN `" (five
+ *     dashes, the word "BEGIN", and a space). The banner matching is
+ *     not case-sensitive.
+ *
+ *   - The _object name_ consists in the characters that follow the
+ *     banner start sequence, up to the end of the line, but without
+ *     trailing dashes (in "normal" PEM, there are five trailing
+ *     dashes, but this implementation is not picky about these dashes).
+ *     The BearSSL decoder normalises the name characters to uppercase
+ *     (for ASCII letters only) and accepts names up to 127 characters.
+ *
+ *   - The object ends with a banner that again occurs at the start of
+ *     a line, and starts with "`-----END `" (again case-insensitive).
+ *
+ *   - Between that start and end banner, only Base64 data shall occur.
+ *     Base64 converts each sequence of three bytes into four
+ *     characters; the four characters are ASCII letters, digits, "`+`"
+ *     or "`-`" signs, and one or two "`=`" signs may occur in the last
+ *     quartet. Whitespace is ignored (whitespace is any ASCII character
+ *     of code 32 or less, so control characters are whitespace) and
+ *     lines may have arbitrary length; the only restriction is that the
+ *     four characters of a quartet must appear on the same line (no
+ *     line break inside a quartet).
+ *
+ *   - A single file may contain more than one PEM object. Bytes that
+ *     occur between objects are ignored.
+ *
+ *
+ * ## PEM Decoder API
+ *
+ * The PEM decoder offers a state-machine API. The caller allocates a
+ * decoder context, then injects source bytes. Source bytes are pushed
+ * with `br_pem_decoder_push()`. The decoder stops accepting bytes when
+ * it reaches an "event", which is either the start of an object, the
+ * end of an object, or a decoding error within an object.
+ *
+ * The `br_pem_decoder_event()` function is used to obtain the current
+ * event; it also clears it, thus allowing the decoder to accept more
+ * bytes. When a object start event is raised, the decoder context
+ * offers the found object name (normalised to ASCII uppercase).
+ *
+ * When an object is reached, the caller must set an appropriate callback
+ * function, which will receive (by chunks) the decoded object data.
+ *
+ * Since the decoder context makes no dynamic allocation, it requires
+ * no explicit deallocation.
+ */
+
+/**
+ * \brief PEM decoder context.
+ *
+ * Contents are opaque (they should not be accessed directly).
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	/* CPU for the T0 virtual machine. */
+	struct {
+		uint32_t *dp;
+		uint32_t *rp;
+		const unsigned char *ip;
+	} cpu;
+	uint32_t dp_stack[32];
+	uint32_t rp_stack[32];
+	int err;
+
+	const unsigned char *hbuf;
+	size_t hlen;
+
+	void (*dest)(void *dest_ctx, const void *src, size_t len);
+	void *dest_ctx;
+
+	unsigned char event;
+	char name[128];
+	unsigned char buf[255];
+	size_t ptr;
+#endif
+} br_pem_decoder_context;
+
+/**
+ * \brief Initialise a PEM decoder structure.
+ *
+ * \param ctx   decoder context to initialise.
+ */
+void br_pem_decoder_init(br_pem_decoder_context *ctx);
+
+/**
+ * \brief Push some bytes into the decoder.
+ *
+ * Returned value is the number of bytes actually consumed; this may be
+ * less than the number of provided bytes if an event is raised. When an
+ * event is raised, it must be read (with `br_pem_decoder_event()`);
+ * until the event is read, this function will return 0.
+ *
+ * \param ctx    decoder context.
+ * \param data   new data bytes.
+ * \param len    number of new data bytes.
+ * \return  the number of bytes actually received (may be less than `len`).
+ */
+size_t br_pem_decoder_push(br_pem_decoder_context *ctx,
+	const void *data, size_t len);
+
+/**
+ * \brief Set the receiver for decoded data.
+ *
+ * When an object is entered, the provided function (with opaque context
+ * pointer) will be called repeatedly with successive chunks of decoded
+ * data for that object. If `dest` is set to 0, then decoded data is
+ * simply ignored. The receiver can be set at any time, but, in practice,
+ * it should be called immediately after receiving a "start of object"
+ * event.
+ *
+ * \param ctx        decoder context.
+ * \param dest       callback for receiving decoded data.
+ * \param dest_ctx   opaque context pointer for the `dest` callback.
+ */
+static inline void
+br_pem_decoder_setdest(br_pem_decoder_context *ctx,
+	void (*dest)(void *dest_ctx, const void *src, size_t len),
+	void *dest_ctx)
+{
+	ctx->dest = dest;
+	ctx->dest_ctx = dest_ctx;
+}
+
+/**
+ * \brief Get the last event.
+ *
+ * If an event was raised, then this function returns the event value, and
+ * also clears it, thereby allowing the decoder to proceed. If no event
+ * was raised since the last call to `br_pem_decoder_event()`, then this
+ * function returns 0.
+ *
+ * \param ctx   decoder context.
+ * \return  the raised event, or 0.
+ */
+int br_pem_decoder_event(br_pem_decoder_context *ctx);
+
+/**
+ * \brief Event: start of object.
+ *
+ * This event is raised when the start of a new object has been detected.
+ * The object name (normalised to uppercase) can be accessed with
+ * `br_pem_decoder_name()`.
+ */
+#define BR_PEM_BEGIN_OBJ   1
+
+/**
+ * \brief Event: end of object.
+ *
+ * This event is raised when the end of the current object is reached
+ * (normally, i.e. with no decoding error).
+ */
+#define BR_PEM_END_OBJ     2
+
+/**
+ * \brief Event: decoding error.
+ *
+ * This event is raised when decoding fails within an object.
+ * This formally closes the current object and brings the decoder back
+ * to the "out of any object" state. The offending line in the source
+ * is consumed.
+ */
+#define BR_PEM_ERROR       3
+
+/**
+ * \brief Get the name of the encountered object.
+ *
+ * The encountered object name is defined only when the "start of object"
+ * event is raised. That name is normalised to uppercase (for ASCII letters
+ * only) and does not include trailing dashes.
+ *
+ * \param ctx   decoder context.
+ * \return  the current object name.
+ */
+static inline const char *
+br_pem_decoder_name(br_pem_decoder_context *ctx)
+{
+	return ctx->name;
+}
+
+/**
+ * \brief Encode an object in PEM.
+ *
+ * This function encodes the provided binary object (`data`, of length `len`
+ * bytes) into PEM. The `banner` text will be included in the header and
+ * footer (e.g. use `"CERTIFICATE"` to get a `"BEGIN CERTIFICATE"` header).
+ *
+ * The length (in characters) of the PEM output is returned; that length
+ * does NOT include the terminating zero, that this function nevertheless
+ * adds. If using the returned value for allocation purposes, the allocated
+ * buffer size MUST be at least one byte larger than the returned size.
+ *
+ * If `dest` is `NULL`, then the encoding does not happen; however, the
+ * length of the encoded object is still computed and returned.
+ *
+ * The `data` pointer may be `NULL` only if `len` is zero (when encoding
+ * an object of length zero, which is not very useful), or when `dest`
+ * is `NULL` (in that case, source data bytes are ignored).
+ *
+ * Some `flags` can be specified to alter the encoding behaviour:
+ *
+ *   - If `BR_PEM_LINE64` is set, then line-breaking will occur after
+ *     every 64 characters of output, instead of the default of 76.
+ *
+ *   - If `BR_PEM_CRLF` is set, then end-of-line sequence will use
+ *     CR+LF instead of a single LF.
+ *
+ * The `data` and `dest` buffers may overlap, in which case the source
+ * binary data is destroyed in the process. Note that the PEM-encoded output
+ * is always larger than the source binary.
+ *
+ * \param dest     the destination buffer (or `NULL`).
+ * \param data     the source buffer (can be `NULL` in some cases).
+ * \param len      the source length (in bytes).
+ * \param banner   the PEM banner expression.
+ * \param flags    the behavioural flags.
+ * \return  the PEM object length (in characters), EXCLUDING the final zero.
+ */
+size_t br_pem_encode(void *dest, const void *data, size_t len,
+	const char *banner, unsigned flags);
+
+/**
+ * \brief PEM encoding flag: split lines at 64 characters.
+ */
+#define BR_PEM_LINE64   0x0001
+
+/**
+ * \brief PEM encoding flag: use CR+LF line endings.
+ */
+#define BR_PEM_CRLF     0x0002
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_prf.h b/third_party/bearssl/inc/bearssl_prf.h
new file mode 100644
index 0000000..fdf608c
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_prf.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_PRF_H__
+#define BR_BEARSSL_PRF_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_prf.h
+ *
+ * # The TLS PRF
+ *
+ * The "PRF" is the pseudorandom function used internally during the
+ * SSL/TLS handshake, notably to expand negotiated shared secrets into
+ * the symmetric encryption keys that will be used to process the
+ * application data.
+ *
+ * TLS 1.0 and 1.1 define a PRF that is based on both MD5 and SHA-1. This
+ * is implemented by the `br_tls10_prf()` function.
+ *
+ * TLS 1.2 redefines the PRF, using an explicit hash function. The
+ * `br_tls12_sha256_prf()` and `br_tls12_sha384_prf()` functions apply that
+ * PRF with, respectively, SHA-256 and SHA-384. Most standard cipher suites
+ * rely on the SHA-256 based PRF, but some use SHA-384.
+ *
+ * The PRF always uses as input three parameters: a "secret" (some
+ * bytes), a "label" (ASCII string), and a "seed" (again some bytes). An
+ * arbitrary output length can be produced. The "seed" is provided as an
+ * arbitrary number of binary chunks, that gets internally concatenated.
+ */
+
+/**
+ * \brief Type for a seed chunk.
+ *
+ * Each chunk may have an arbitrary length, and may be empty (no byte at
+ * all). If the chunk length is zero, then the pointer to the chunk data
+ * may be `NULL`.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to the chunk data.
+	 */
+	const void *data;
+
+	/**
+	 * \brief Chunk length (in bytes).
+	 */
+	size_t len;
+} br_tls_prf_seed_chunk;
+
+/**
+ * \brief PRF implementation for TLS 1.0 and 1.1.
+ *
+ * This PRF is the one specified by TLS 1.0 and 1.1. It internally uses
+ * MD5 and SHA-1.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+void br_tls10_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/**
+ * \brief PRF implementation for TLS 1.2, with SHA-256.
+ *
+ * This PRF is the one specified by TLS 1.2, when the underlying hash
+ * function is SHA-256.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+void br_tls12_sha256_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/**
+ * \brief PRF implementation for TLS 1.2, with SHA-384.
+ *
+ * This PRF is the one specified by TLS 1.2, when the underlying hash
+ * function is SHA-384.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+void br_tls12_sha384_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/** 
+ * brief A convenient type name for a PRF implementation.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+typedef void (*br_tls_prf_impl)(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_rand.h b/third_party/bearssl/inc/bearssl_rand.h
new file mode 100644
index 0000000..0a9f544
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_rand.h
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_RAND_H__
+#define BR_BEARSSL_RAND_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_block.h"
+#include "bearssl_hash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_rand.h
+ *
+ * # Pseudo-Random Generators
+ *
+ * A PRNG is a state-based engine that outputs pseudo-random bytes on
+ * demand. It is initialized with an initial seed, and additional seed
+ * bytes can be added afterwards. Bytes produced depend on the seeds and
+ * also on the exact sequence of calls (including sizes requested for
+ * each call).
+ *
+ *
+ * ## Procedural and OOP API
+ *
+ * For the PRNG of name "`xxx`", two API are provided. The _procedural_
+ * API defined a context structure `br_xxx_context` and three functions:
+ *
+ *   - `br_xxx_init()`
+ *
+ *     Initialise the context with an initial seed.
+ *
+ *   - `br_xxx_generate()`
+ *
+ *     Produce some pseudo-random bytes.
+ *
+ *   - `br_xxx_update()`
+ *
+ *     Inject some additional seed.
+ *
+ * The initialisation function sets the first context field (`vtable`)
+ * to a pointer to the vtable that supports the OOP API. The OOP API
+ * provides access to the same functions through function pointers,
+ * named `init()`, `generate()` and `update()`.
+ *
+ * Note that the context initialisation method may accept additional
+ * parameters, provided as a 'const void *' pointer at API level. These
+ * additional parameters depend on the implemented PRNG.
+ *
+ *
+ * ## HMAC_DRBG
+ *
+ * HMAC_DRBG is defined in [NIST SP 800-90A Revision
+ * 1](http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf).
+ * It uses HMAC repeatedly, over some configurable underlying hash
+ * function. In BearSSL, it is implemented under the "`hmac_drbg`" name.
+ * The "extra parameters" pointer for context initialisation should be
+ * set to a pointer to the vtable for the underlying hash function (e.g.
+ * pointer to `br_sha256_vtable` to use HMAC_DRBG with SHA-256).
+ *
+ * According to the NIST standard, each request shall produce up to
+ * 2<sup>19</sup> bits (i.e. 64 kB of data); moreover, the context shall
+ * be reseeded at least once every 2<sup>48</sup> requests. This
+ * implementation does not maintain the reseed counter (the threshold is
+ * too high to be reached in practice) and does not object to producing
+ * more than 64 kB in a single request; thus, the code cannot fail,
+ * which corresponds to the fact that the API has no room for error
+ * codes. However, this implies that requesting more than 64 kB in one
+ * `generate()` request, or making more than 2<sup>48</sup> requests
+ * without reseeding, is formally out of NIST specification. There is
+ * no currently known security penalty for exceeding the NIST limits,
+ * and, in any case, HMAC_DRBG usage in implementing SSL/TLS always
+ * stays much below these thresholds.
+ *
+ *
+ * ## AESCTR_DRBG
+ *
+ * AESCTR_DRBG is a custom PRNG based on AES-128 in CTR mode. This is
+ * meant to be used only in situations where you are desperate for
+ * speed, and have an hardware-optimized AES/CTR implementation. Whether
+ * this will yield perceptible improvements depends on what you use the
+ * pseudorandom bytes for, and how many you want; for instance, RSA key
+ * pair generation uses a substantial amount of randomness, and using
+ * AESCTR_DRBG instead of HMAC_DRBG yields a 15 to 20% increase in key
+ * generation speed on a recent x86 CPU (Intel Core i7-6567U at 3.30 GHz).
+ *
+ * Internally, it uses CTR mode with successive counter values, starting
+ * at zero (counter value expressed over 128 bits, big-endian convention).
+ * The counter is not allowed to reach 32768; thus, every 32768*16 bytes
+ * at most, the `update()` function is run (on an empty seed, if none is
+ * provided). The `update()` function computes the new AES-128 key by
+ * applying a custom hash function to the concatenation of a state-dependent
+ * word (encryption of an all-one block with the current key) and the new
+ * seed. The custom hash function uses Hirose's construction over AES-256;
+ * see the comments in `aesctr_drbg.c` for details.
+ *
+ * This DRBG does not follow an existing standard, and thus should be
+ * considered as inadequate for production use until it has been properly
+ * analysed.
+ */
+
+/**
+ * \brief Class type for PRNG implementations.
+ *
+ * A `br_prng_class` instance references the methods implementing a PRNG.
+ * Constant instances of this structure are defined for each implemented
+ * PRNG. Such instances are also called "vtables".
+ */
+typedef struct br_prng_class_ br_prng_class;
+struct br_prng_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate for
+	 * running this PRNG.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Initialisation method.
+	 *
+	 * The context to initialise is provided as a pointer to its
+	 * first field (the vtable pointer); this function sets that
+	 * first field to a pointer to the vtable.
+	 *
+	 * The extra parameters depend on the implementation; each
+	 * implementation defines what kind of extra parameters it
+	 * expects (if any).
+	 *
+	 * Requirements on the initial seed depend on the implemented
+	 * PRNG.
+	 *
+	 * \param ctx        PRNG context to initialise.
+	 * \param params     extra parameters for the PRNG.
+	 * \param seed       initial seed.
+	 * \param seed_len   initial seed length (in bytes).
+	 */
+	void (*init)(const br_prng_class **ctx, const void *params,
+		const void *seed, size_t seed_len);
+
+	/**
+	 * \brief Random bytes generation.
+	 *
+	 * This method produces `len` pseudorandom bytes, in the `out`
+	 * buffer. The context is updated accordingly.
+	 *
+	 * \param ctx   PRNG context.
+	 * \param out   output buffer.
+	 * \param len   number of pseudorandom bytes to produce.
+	 */
+	void (*generate)(const br_prng_class **ctx, void *out, size_t len);
+
+	/**
+	 * \brief Inject additional seed bytes.
+	 *
+	 * The provided seed bytes are added into the PRNG internal
+	 * entropy pool.
+	 *
+	 * \param ctx        PRNG context.
+	 * \param seed       additional seed.
+	 * \param seed_len   additional seed length (in bytes).
+	 */
+	void (*update)(const br_prng_class **ctx,
+		const void *seed, size_t seed_len);
+};
+
+/**
+ * \brief Context for HMAC_DRBG.
+ *
+ * The context contents are opaque, except the first field, which
+ * supports OOP.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to the vtable.
+	 *
+	 * This field is set with the initialisation method/function.
+	 */
+	const br_prng_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char K[64];
+	unsigned char V[64];
+	const br_hash_class *digest_class;
+#endif
+} br_hmac_drbg_context;
+
+/**
+ * \brief Statically allocated, constant vtable for HMAC_DRBG.
+ */
+extern const br_prng_class br_hmac_drbg_vtable;
+
+/**
+ * \brief HMAC_DRBG initialisation.
+ *
+ * The context to initialise is provided as a pointer to its first field
+ * (the vtable pointer); this function sets that first field to a
+ * pointer to the vtable.
+ *
+ * The `seed` value is what is called, in NIST terminology, the
+ * concatenation of the "seed", "nonce" and "personalization string", in
+ * that order.
+ *
+ * The `digest_class` parameter defines the underlying hash function.
+ * Formally, the NIST standard specifies that the hash function shall
+ * be only SHA-1 or one of the SHA-2 functions. This implementation also
+ * works with any other implemented hash function (such as MD5), but
+ * this is non-standard and therefore not recommended.
+ *
+ * \param ctx            HMAC_DRBG context to initialise.
+ * \param digest_class   vtable for the underlying hash function.
+ * \param seed           initial seed.
+ * \param seed_len       initial seed length (in bytes).
+ */
+void br_hmac_drbg_init(br_hmac_drbg_context *ctx,
+	const br_hash_class *digest_class, const void *seed, size_t seed_len);
+
+/**
+ * \brief Random bytes generation with HMAC_DRBG.
+ *
+ * This method produces `len` pseudorandom bytes, in the `out`
+ * buffer. The context is updated accordingly. Formally, requesting
+ * more than 65536 bytes in one request falls out of specification
+ * limits (but it won't fail).
+ *
+ * \param ctx   HMAC_DRBG context.
+ * \param out   output buffer.
+ * \param len   number of pseudorandom bytes to produce.
+ */
+void br_hmac_drbg_generate(br_hmac_drbg_context *ctx, void *out, size_t len);
+
+/**
+ * \brief Inject additional seed bytes in HMAC_DRBG.
+ *
+ * The provided seed bytes are added into the HMAC_DRBG internal
+ * entropy pool. The process does not _replace_ existing entropy,
+ * thus pushing non-random bytes (i.e. bytes which are known to the
+ * attackers) does not degrade the overall quality of generated bytes.
+ *
+ * \param ctx        HMAC_DRBG context.
+ * \param seed       additional seed.
+ * \param seed_len   additional seed length (in bytes).
+ */
+void br_hmac_drbg_update(br_hmac_drbg_context *ctx,
+	const void *seed, size_t seed_len);
+
+/**
+ * \brief Get the hash function implementation used by a given instance of
+ * HMAC_DRBG.
+ *
+ * This calls MUST NOT be performed on a context which was not
+ * previously initialised.
+ *
+ * \param ctx   HMAC_DRBG context.
+ * \return  the hash function vtable.
+ */
+static inline const br_hash_class *
+br_hmac_drbg_get_hash(const br_hmac_drbg_context *ctx)
+{
+	return ctx->digest_class;
+}
+
+/**
+ * \brief Type for a provider of entropy seeds.
+ *
+ * A "seeder" is a function that is able to obtain random values from
+ * some source and inject them as entropy seed in a PRNG. A seeder
+ * shall guarantee that the total entropy of the injected seed is large
+ * enough to seed a PRNG for purposes of cryptographic key generation
+ * (i.e. at least 128 bits).
+ *
+ * A seeder may report a failure to obtain adequate entropy. Seeders
+ * shall endeavour to fix themselves transient errors by trying again;
+ * thus, callers may consider reported errors as permanent.
+ *
+ * \param ctx   PRNG context to seed.
+ * \return  1 on success, 0 on error.
+ */
+typedef int (*br_prng_seeder)(const br_prng_class **ctx);
+
+/**
+ * \brief Get a seeder backed by the operating system or hardware.
+ *
+ * Get a seeder that feeds on RNG facilities provided by the current
+ * operating system or hardware. If no such facility is known, then 0
+ * is returned.
+ *
+ * If `name` is not `NULL`, then `*name` is set to a symbolic string
+ * that identifies the seeder implementation. If no seeder is returned
+ * and `name` is not `NULL`, then `*name` is set to a pointer to the
+ * constant string `"none"`.
+ *
+ * \param name   receiver for seeder name, or `NULL`.
+ * \return  the system seeder, if available, or 0.
+ */
+br_prng_seeder br_prng_seeder_system(const char **name);
+
+/**
+ * \brief Context for AESCTR_DRBG.
+ *
+ * The context contents are opaque, except the first field, which
+ * supports OOP.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to the vtable.
+	 *
+	 * This field is set with the initialisation method/function.
+	 */
+	const br_prng_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	br_aes_gen_ctr_keys sk;
+	uint32_t cc;
+#endif
+} br_aesctr_drbg_context;
+
+/**
+ * \brief Statically allocated, constant vtable for AESCTR_DRBG.
+ */
+extern const br_prng_class br_aesctr_drbg_vtable;
+
+/**
+ * \brief AESCTR_DRBG initialisation.
+ *
+ * The context to initialise is provided as a pointer to its first field
+ * (the vtable pointer); this function sets that first field to a
+ * pointer to the vtable.
+ *
+ * The internal AES key is first set to the all-zero key; then, the
+ * `br_aesctr_drbg_update()` function is called with the provided `seed`.
+ * The call is performed even if the seed length (`seed_len`) is zero.
+ *
+ * The `aesctr` parameter defines the underlying AES/CTR implementation.
+ *
+ * \param ctx        AESCTR_DRBG context to initialise.
+ * \param aesctr     vtable for the AES/CTR implementation.
+ * \param seed       initial seed (can be `NULL` if `seed_len` is zero).
+ * \param seed_len   initial seed length (in bytes).
+ */
+void br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
+	const br_block_ctr_class *aesctr, const void *seed, size_t seed_len);
+
+/**
+ * \brief Random bytes generation with AESCTR_DRBG.
+ *
+ * This method produces `len` pseudorandom bytes, in the `out`
+ * buffer. The context is updated accordingly.
+ *
+ * \param ctx   AESCTR_DRBG context.
+ * \param out   output buffer.
+ * \param len   number of pseudorandom bytes to produce.
+ */
+void br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx,
+	void *out, size_t len);
+
+/**
+ * \brief Inject additional seed bytes in AESCTR_DRBG.
+ *
+ * The provided seed bytes are added into the AESCTR_DRBG internal
+ * entropy pool. The process does not _replace_ existing entropy,
+ * thus pushing non-random bytes (i.e. bytes which are known to the
+ * attackers) does not degrade the overall quality of generated bytes.
+ *
+ * \param ctx        AESCTR_DRBG context.
+ * \param seed       additional seed.
+ * \param seed_len   additional seed length (in bytes).
+ */
+void br_aesctr_drbg_update(br_aesctr_drbg_context *ctx,
+	const void *seed, size_t seed_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_rsa.h b/third_party/bearssl/inc/bearssl_rsa.h
new file mode 100644
index 0000000..0a069fd
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_rsa.h
@@ -0,0 +1,1655 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_RSA_H__
+#define BR_BEARSSL_RSA_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_hash.h"
+#include "bearssl_rand.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_rsa.h
+ *
+ * # RSA
+ *
+ * This file documents the RSA implementations provided with BearSSL.
+ * Note that the SSL engine accesses these implementations through a
+ * configurable API, so it is possible to, for instance, run a SSL
+ * server which uses a RSA engine which is not based on this code.
+ *
+ * ## Key Elements
+ *
+ * RSA public and private keys consist in lists of big integers. All
+ * such integers are represented with big-endian unsigned notation:
+ * first byte is the most significant, and the value is positive (so
+ * there is no dedicated "sign bit"). Public and private key structures
+ * thus contain, for each such integer, a pointer to the first value byte
+ * (`unsigned char *`), and a length (`size_t`) which is the number of
+ * relevant bytes. As a general rule, minimal-length encoding is not
+ * enforced: values may have extra leading bytes of value 0.
+ *
+ * RSA public keys consist in two integers:
+ *
+ *   - the modulus (`n`);
+ *   - the public exponent (`e`).
+ *
+ * RSA private keys, as defined in
+ * [PKCS#1](https://tools.ietf.org/html/rfc3447), contain eight integers:
+ *
+ *   - the modulus (`n`);
+ *   - the public exponent (`e`);
+ *   - the private exponent (`d`);
+ *   - the first prime factor (`p`);
+ *   - the second prime factor (`q`);
+ *   - the first reduced exponent (`dp`, which is `d` modulo `p-1`);
+ *   - the second reduced exponent (`dq`, which is `d` modulo `q-1`);
+ *   - the CRT coefficient (`iq`, the inverse of `q` modulo `p`).
+ *
+ * However, the implementations defined in BearSSL use only five of
+ * these integers: `p`, `q`, `dp`, `dq` and `iq`.
+ *
+ * ## Security Features and Limitations
+ *
+ * The implementations contained in BearSSL have the following limitations
+ * and features:
+ *
+ *   - They are constant-time. This means that the execution time and
+ *     memory access pattern may depend on the _lengths_ of the private
+ *     key components, but not on their value, nor on the value of
+ *     the operand. Note that this property is not achieved through
+ *     random masking, but "true" constant-time code.
+ *
+ *   - They support only private keys with two prime factors. RSA private
+ *     keys with three or more prime factors are nominally supported, but
+ *     rarely used; they may offer faster operations, at the expense of
+ *     more code and potentially a reduction in security if there are
+ *     "too many" prime factors.
+ *
+ *   - The public exponent may have arbitrary length. Of course, it is
+ *     a good idea to keep public exponents small, so that public key
+ *     operations are fast; but, contrary to some widely deployed
+ *     implementations, BearSSL has no problem with public exponents
+ *     longer than 32 bits.
+ *
+ *   - The two prime factors of the modulus need not have the same length
+ *     (but severely imbalanced factor lengths might reduce security).
+ *     Similarly, there is no requirement that the first factor (`p`)
+ *     be greater than the second factor (`q`).
+ *
+ *   - Prime factors and modulus must be smaller than a compile-time limit.
+ *     This is made necessary by the use of fixed-size stack buffers, and
+ *     the limit has been adjusted to keep stack usage under 2 kB for the
+ *     RSA operations. Currently, the maximum modulus size is 4096 bits,
+ *     and the maximum prime factor size is 2080 bits.
+ *
+ *   - The RSA functions themselves do not enforce lower size limits,
+ *     except that which is absolutely necessary for the operation to
+ *     mathematically make sense (e.g. a PKCS#1 v1.5 signature with
+ *     SHA-1 requires a modulus of at least 361 bits). It is up to users
+ *     of this code to enforce size limitations when appropriate (e.g.
+ *     the X.509 validation engine, by default, rejects RSA keys of
+ *     less than 1017 bits).
+ *
+ *   - Within the size constraints expressed above, arbitrary bit lengths
+ *     are supported. There is no requirement that prime factors or
+ *     modulus have a size multiple of 8 or 16.
+ *
+ *   - When verifying PKCS#1 v1.5 signatures, both variants of the hash
+ *     function identifying header (with and without the ASN.1 NULL) are
+ *     supported. When producing such signatures, the variant with the
+ *     ASN.1 NULL is used.
+ *
+ * ## Implementations
+ *
+ * Three RSA implementations are included:
+ *
+ *   - The **i32** implementation internally represents big integers
+ *     as arrays of 32-bit integers. It is perfunctory and portable,
+ *     but not very efficient.
+ *
+ *   - The **i31** implementation uses 32-bit integers, each containing
+ *     31 bits worth of integer data. The i31 implementation is somewhat
+ *     faster than the i32 implementation (the reduced integer size makes
+ *     carry propagation easier) for a similar code footprint, but uses
+ *     very slightly larger stack buffers (about 4% bigger).
+ *
+ *   - The **i62** implementation is similar to the i31 implementation,
+ *     except that it internally leverages the 64x64->128 multiplication
+ *     opcode. This implementation is available only on architectures
+ *     where such an opcode exists. It is much faster than i31.
+ *
+ *   - The **i15** implementation uses 16-bit integers, each containing
+ *     15 bits worth of integer data. Multiplication results fit on
+ *     32 bits, so this won't use the "widening" multiplication routine
+ *     on ARM Cortex M0/M0+, for much better performance and constant-time
+ *     execution.
+ */
+
+/**
+ * \brief RSA public key.
+ *
+ * The structure references the modulus and the public exponent. Both
+ * integers use unsigned big-endian representation; extra leading bytes
+ * of value 0 are allowed.
+ */
+typedef struct {
+	/** \brief Modulus. */
+	unsigned char *n;
+	/** \brief Modulus length (in bytes). */
+	size_t nlen;
+	/** \brief Public exponent. */
+	unsigned char *e;
+	/** \brief Public exponent length (in bytes). */
+	size_t elen;
+} br_rsa_public_key;
+
+/**
+ * \brief RSA private key.
+ *
+ * The structure references the private factors, reduced private
+ * exponents, and CRT coefficient. It also contains the bit length of
+ * the modulus. The big integers use unsigned big-endian representation;
+ * extra leading bytes of value 0 are allowed. However, the modulus bit
+ * length (`n_bitlen`) MUST be exact.
+ */
+typedef struct {
+	/** \brief Modulus bit length (in bits, exact value). */
+	uint32_t n_bitlen;
+	/** \brief First prime factor. */
+	unsigned char *p;
+	/** \brief First prime factor length (in bytes). */
+	size_t plen;
+	/** \brief Second prime factor. */
+	unsigned char *q;
+	/** \brief Second prime factor length (in bytes). */
+	size_t qlen;
+	/** \brief First reduced private exponent. */
+	unsigned char *dp;
+	/** \brief First reduced private exponent length (in bytes). */
+	size_t dplen;
+	/** \brief Second reduced private exponent. */
+	unsigned char *dq;
+	/** \brief Second reduced private exponent length (in bytes). */
+	size_t dqlen;
+	/** \brief CRT coefficient. */
+	unsigned char *iq;
+	/** \brief CRT coefficient length (in bytes). */
+	size_t iqlen;
+} br_rsa_private_key;
+
+/**
+ * \brief Type for a RSA public key engine.
+ *
+ * The public key engine performs the modular exponentiation of the
+ * provided value with the public exponent. The value is modified in
+ * place.
+ *
+ * The value length (`xlen`) is verified to have _exactly_ the same
+ * length as the modulus (actual modulus length, without extra leading
+ * zeros in the modulus representation in memory). If the length does
+ * not match, then this function returns 0 and `x[]` is unmodified.
+ * 
+ * It `xlen` is correct, then `x[]` is modified. Returned value is 1
+ * on success, 0 on error. Error conditions include an oversized `x[]`
+ * (the array has the same length as the modulus, but the numerical value
+ * is not lower than the modulus) and an invalid modulus (e.g. an even
+ * integer). If an error is reported, then the new contents of `x[]` are
+ * unspecified.
+ *
+ * \param x      operand to exponentiate.
+ * \param xlen   length of the operand (in bytes).
+ * \param pk     RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_public)(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk);
+
+/**
+ * \brief Type for a RSA signature verification engine (PKCS#1 v1.5).
+ *
+ * Parameters are:
+ *
+ *   - The signature itself. The provided array is NOT modified.
+ *
+ *   - The encoded OID for the hash function. The provided array must begin
+ *     with a single byte that contains the length of the OID value (in
+ *     bytes), followed by exactly that many bytes. This parameter may
+ *     also be `NULL`, in which case the raw hash value should be used
+ *     with the PKCS#1 v1.5 "type 1" padding (as used in SSL/TLS up
+ *     to TLS-1.1, with a 36-byte hash value).
+ *
+ *   - The hash output length, in bytes.
+ *
+ *   - The public key.
+ *
+ *   - An output buffer for the hash value. The caller must still compare
+ *     it with the hash of the data over which the signature is computed.
+ *
+ * **Constraints:**
+ *
+ *   - Hash length MUST be no more than 64 bytes.
+ *
+ *   - OID value length MUST be no more than 32 bytes (i.e. `hash_oid[0]`
+ *     must have a value in the 0..32 range, inclusive).
+ *
+ * This function verifies that the signature length (`xlen`) matches the
+ * modulus length (this function returns 0 on mismatch). If the modulus
+ * size exceeds the maximum supported RSA size, then the function also
+ * returns 0.
+ *
+ * Returned value is 1 on success, 0 on error.
+ *
+ * Implementations of this type need not be constant-time.
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash_len   expected hash value length (in bytes).
+ * \param pk         RSA public key.
+ * \param hash_out   output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_pkcs1_vrfy)(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out);
+
+/**
+ * \brief Type for a RSA signature verification engine (PSS).
+ *
+ * Parameters are:
+ *
+ *   - The signature itself. The provided array is NOT modified.
+ *
+ *   - The hash function which was used to hash the message.
+ *
+ *   - The hash function to use with MGF1 within the PSS padding. This
+ *     is not necessarily the same hash function as the one which was
+ *     used to hash the signed message.
+ *
+ *   - The hashed message (as an array of bytes).
+ *
+ *   - The PSS salt length (in bytes).
+ *
+ *   - The public key.
+ *
+ * **Constraints:**
+ *
+ *   - Hash message length MUST be no more than 64 bytes.
+ *
+ * Note that, contrary to PKCS#1 v1.5 signature, the hash value of the
+ * signed data cannot be extracted from the signature; it must be
+ * provided to the verification function.
+ *
+ * This function verifies that the signature length (`xlen`) matches the
+ * modulus length (this function returns 0 on mismatch). If the modulus
+ * size exceeds the maximum supported RSA size, then the function also
+ * returns 0.
+ *
+ * Returned value is 1 on success, 0 on error.
+ *
+ * Implementations of this type need not be constant-time.
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hf_data    hash function applied on the message.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hash value of the signed message.
+ * \param salt_len   PSS salt length (in bytes).
+ * \param pk         RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_pss_vrfy)(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1, 
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk);
+
+/**
+ * \brief Type for a RSA encryption engine (OAEP).
+ *
+ * Parameters are:
+ *
+ *   - A source of random bytes. The source must be already initialized.
+ *
+ *   - A hash function, used internally with the mask generation function
+ *     (MGF1).
+ *
+ *   - A label. The `label` pointer may be `NULL` if `label_len` is zero
+ *     (an empty label, which is the default in PKCS#1 v2.2).
+ *
+ *   - The public key.
+ *
+ *   - The destination buffer. Its maximum length (in bytes) is provided;
+ *     if that length is lower than the public key length, then an error
+ *     is reported.
+ *
+ *   - The source message.
+ *
+ * The encrypted message output has exactly the same length as the modulus
+ * (mathematical length, in bytes, not counting extra leading zeros in the
+ * modulus representation in the public key).
+ *
+ * The source message (`src`, length `src_len`) may overlap with the
+ * destination buffer (`dst`, length `dst_max_len`).
+ *
+ * This function returns the actual encrypted message length, in bytes;
+ * on error, zero is returned. An error is reported if the output buffer
+ * is not large enough, or the public is invalid, or the public key
+ * modulus exceeds the maximum supported RSA size.
+ *
+ * \param rnd           source of random bytes.
+ * \param dig           hash function to use with MGF1.
+ * \param label         label value (may be `NULL` if `label_len` is zero).
+ * \param label_len     label length, in bytes.
+ * \param pk            RSA public key.
+ * \param dst           destination buffer.
+ * \param dst_max_len   destination buffer length (maximum encrypted data size).
+ * \param src           message to encrypt.
+ * \param src_len       source message length (in bytes).
+ * \return  encrypted message length (in bytes), or 0 on error.
+ */
+typedef size_t (*br_rsa_oaep_encrypt)(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len);
+
+/**
+ * \brief Type for a RSA private key engine.
+ *
+ * The `x[]` buffer is modified in place, and its length is inferred from
+ * the modulus length (`x[]` is assumed to have a length of
+ * `(sk->n_bitlen+7)/8` bytes).
+ *
+ * Returned value is 1 on success, 0 on error.
+ *
+ * \param x    operand to exponentiate.
+ * \param sk   RSA private key.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_private)(unsigned char *x,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief Type for a RSA signature generation engine (PKCS#1 v1.5).
+ *
+ * Parameters are:
+ *
+ *   - The encoded OID for the hash function. The provided array must begin
+ *     with a single byte that contains the length of the OID value (in
+ *     bytes), followed by exactly that many bytes. This parameter may
+ *     also be `NULL`, in which case the raw hash value should be used
+ *     with the PKCS#1 v1.5 "type 1" padding (as used in SSL/TLS up
+ *     to TLS-1.1, with a 36-byte hash value).
+ *
+ *   - The hash value computes over the data to sign (its length is
+ *     expressed in bytes).
+ *
+ *   - The RSA private key.
+ *
+ *   - The output buffer, that receives the signature.
+ *
+ * Returned value is 1 on success, 0 on error. Error conditions include
+ * a too small modulus for the provided hash OID and value, or some
+ * invalid key parameters. The signature length is exactly
+ * `(sk->n_bitlen+7)/8` bytes.
+ *
+ * This function is expected to be constant-time with regards to the
+ * private key bytes (lengths of the modulus and the individual factors
+ * may leak, though) and to the hashed data.
+ *
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash       hash value.
+ * \param hash_len   hash value length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the signature value.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_pkcs1_sign)(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief Type for a RSA signature generation engine (PSS).
+ *
+ * Parameters are:
+ *
+ *   - An initialized PRNG for salt generation. If the salt length is
+ *     zero (`salt_len` parameter), then the PRNG is optional (this is
+ *     not the typical case, as the security proof of RSA/PSS is
+ *     tighter when a non-empty salt is used).
+ *
+ *   - The hash function which was used to hash the message.
+ *
+ *   - The hash function to use with MGF1 within the PSS padding. This
+ *     is not necessarily the same function as the one used to hash the
+ *     message.
+ *
+ *   - The hashed message.
+ *
+ *   - The salt length, in bytes.
+ *
+ *   - The RSA private key.
+ *
+ *   - The output buffer, that receives the signature.
+ *
+ * Returned value is 1 on success, 0 on error. Error conditions include
+ * a too small modulus for the provided hash and salt lengths, or some
+ * invalid key parameters. The signature length is exactly
+ * `(sk->n_bitlen+7)/8` bytes.
+ *
+ * This function is expected to be constant-time with regards to the
+ * private key bytes (lengths of the modulus and the individual factors
+ * may leak, though) and to the hashed data.
+ *
+ * \param rng        PRNG for salt generation (`NULL` if `salt_len` is zero).
+ * \param hf_data    hash function used to hash the signed data.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hashed message.
+ * \param salt_len   salt length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the signature value.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_pss_sign)(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash_value, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief Encoded OID for SHA-1 (in RSA PKCS#1 signatures).
+ */
+#define BR_HASH_OID_SHA1     \
+	((const unsigned char *)"\x05\x2B\x0E\x03\x02\x1A")
+
+/**
+ * \brief Encoded OID for SHA-224 (in RSA PKCS#1 signatures).
+ */
+#define BR_HASH_OID_SHA224   \
+	((const unsigned char *)"\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04")
+
+/**
+ * \brief Encoded OID for SHA-256 (in RSA PKCS#1 signatures).
+ */
+#define BR_HASH_OID_SHA256   \
+	((const unsigned char *)"\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01")
+
+/**
+ * \brief Encoded OID for SHA-384 (in RSA PKCS#1 signatures).
+ */
+#define BR_HASH_OID_SHA384   \
+	((const unsigned char *)"\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02")
+
+/**
+ * \brief Encoded OID for SHA-512 (in RSA PKCS#1 signatures).
+ */
+#define BR_HASH_OID_SHA512   \
+	((const unsigned char *)"\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03")
+
+/**
+ * \brief Type for a RSA decryption engine (OAEP).
+ *
+ * Parameters are:
+ *
+ *   - A hash function, used internally with the mask generation function
+ *     (MGF1).
+ *
+ *   - A label. The `label` pointer may be `NULL` if `label_len` is zero
+ *     (an empty label, which is the default in PKCS#1 v2.2).
+ *
+ *   - The private key.
+ *
+ *   - The source and destination buffer. The buffer initially contains
+ *     the encrypted message; the buffer contents are altered, and the
+ *     decrypted message is written at the start of that buffer
+ *     (decrypted message is always shorter than the encrypted message).
+ *
+ * If decryption fails in any way, then `*len` is unmodified, and the
+ * function returns 0. Otherwise, `*len` is set to the decrypted message
+ * length, and 1 is returned. The implementation is responsible for
+ * checking that the input message length matches the key modulus length,
+ * and that the padding is correct.
+ *
+ * Implementations MUST use constant-time check of the validity of the
+ * OAEP padding, at least until the leading byte and hash value have
+ * been checked. Whether overall decryption worked, and the length of
+ * the decrypted message, may leak.
+ *
+ * \param dig         hash function to use with MGF1.
+ * \param label       label value (may be `NULL` if `label_len` is zero).
+ * \param label_len   label length, in bytes.
+ * \param sk          RSA private key.
+ * \param data        input/output buffer.
+ * \param len         encrypted/decrypted message length.
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_rsa_oaep_decrypt)(
+	const br_hash_class *dig, const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len);
+
+/*
+ * RSA "i32" engine. Integers are internally represented as arrays of
+ * 32-bit integers, and the core multiplication primitive is the
+ * 32x32->64 multiplication.
+ */
+
+/**
+ * \brief RSA public key engine "i32".
+ *
+ * \see br_rsa_public
+ *
+ * \param x      operand to exponentiate.
+ * \param xlen   length of the operand (in bytes).
+ * \param pk     RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA signature verification engine "i32" (PKCS#1 v1.5 signatures).
+ *
+ * \see br_rsa_pkcs1_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash_len   expected hash value length (in bytes).
+ * \param pk         RSA public key.
+ * \param hash_out   output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out);
+
+/**
+ * \brief RSA signature verification engine "i32" (PSS signatures).
+ *
+ * \see br_rsa_pss_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hf_data    hash function applied on the message.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hash value of the signed message.
+ * \param salt_len   PSS salt length (in bytes).
+ * \param pk         RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1, 
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA private key engine "i32".
+ *
+ * \see br_rsa_private
+ *
+ * \param x    operand to exponentiate.
+ * \param sk   RSA private key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_private(unsigned char *x,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief RSA signature generation engine "i32" (PKCS#1 v1.5 signatures).
+ *
+ * \see br_rsa_pkcs1_sign
+ *
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash       hash value.
+ * \param hash_len   hash value length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief RSA signature generation engine "i32" (PSS signatures).
+ *
+ * \see br_rsa_pss_sign
+ *
+ * \param rng        PRNG for salt generation (`NULL` if `salt_len` is zero).
+ * \param hf_data    hash function used to hash the signed data.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hashed message.
+ * \param salt_len   salt length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the signature value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash_value, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/*
+ * RSA "i31" engine. Similar to i32, but only 31 bits are used per 32-bit
+ * word. This uses slightly more stack space (about 4% more) and code
+ * space, but it quite faster.
+ */
+
+/**
+ * \brief RSA public key engine "i31".
+ *
+ * \see br_rsa_public
+ *
+ * \param x      operand to exponentiate.
+ * \param xlen   length of the operand (in bytes).
+ * \param pk     RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA signature verification engine "i31" (PKCS#1 v1.5 signatures).
+ *
+ * \see br_rsa_pkcs1_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash_len   expected hash value length (in bytes).
+ * \param pk         RSA public key.
+ * \param hash_out   output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out);
+
+/**
+ * \brief RSA signature verification engine "i31" (PSS signatures).
+ *
+ * \see br_rsa_pss_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hf_data    hash function applied on the message.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hash value of the signed message.
+ * \param salt_len   PSS salt length (in bytes).
+ * \param pk         RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1, 
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA private key engine "i31".
+ *
+ * \see br_rsa_private
+ *
+ * \param x    operand to exponentiate.
+ * \param sk   RSA private key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_private(unsigned char *x,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief RSA signature generation engine "i31" (PKCS#1 v1.5 signatures).
+ *
+ * \see br_rsa_pkcs1_sign
+ *
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash       hash value.
+ * \param hash_len   hash value length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief RSA signature generation engine "i31" (PSS signatures).
+ *
+ * \see br_rsa_pss_sign
+ *
+ * \param rng        PRNG for salt generation (`NULL` if `salt_len` is zero).
+ * \param hf_data    hash function used to hash the signed data.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hashed message.
+ * \param salt_len   salt length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the signature value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash_value, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/*
+ * RSA "i62" engine. Similar to i31, but internal multiplication use
+ * 64x64->128 multiplications. This is available only on architecture
+ * that offer such an opcode.
+ */
+
+/**
+ * \brief RSA public key engine "i62".
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_public_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_public
+ *
+ * \param x      operand to exponentiate.
+ * \param xlen   length of the operand (in bytes).
+ * \param pk     RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA signature verification engine "i62" (PKCS#1 v1.5 signatures).
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_pkcs1_vrfy_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_pkcs1_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash_len   expected hash value length (in bytes).
+ * \param pk         RSA public key.
+ * \param hash_out   output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out);
+
+/**
+ * \brief RSA signature verification engine "i62" (PSS signatures).
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_pss_vrfy_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_pss_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hf_data    hash function applied on the message.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hash value of the signed message.
+ * \param salt_len   PSS salt length (in bytes).
+ * \param pk         RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1, 
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA private key engine "i62".
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_private_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_private
+ *
+ * \param x    operand to exponentiate.
+ * \param sk   RSA private key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_private(unsigned char *x,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief RSA signature generation engine "i62" (PKCS#1 v1.5 signatures).
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_pkcs1_sign_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_pkcs1_sign
+ *
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash       hash value.
+ * \param hash_len   hash value length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief RSA signature generation engine "i62" (PSS signatures).
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_pss_sign_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_pss_sign
+ *
+ * \param rng        PRNG for salt generation (`NULL` if `salt_len` is zero).
+ * \param hf_data    hash function used to hash the signed data.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hashed message.
+ * \param salt_len   salt length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the signature value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash_value, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief Get the RSA "i62" implementation (public key operations),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_public br_rsa_i62_public_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (PKCS#1 v1.5 signature verification),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_pkcs1_vrfy br_rsa_i62_pkcs1_vrfy_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (PSS signature verification),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_pss_vrfy br_rsa_i62_pss_vrfy_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (private key operations),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_private br_rsa_i62_private_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (PKCS#1 v1.5 signature generation),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_pkcs1_sign br_rsa_i62_pkcs1_sign_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (PSS signature generation),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_pss_sign br_rsa_i62_pss_sign_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (OAEP encryption),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_oaep_encrypt br_rsa_i62_oaep_encrypt_get(void);
+
+/**
+ * \brief Get the RSA "i62" implementation (OAEP decryption),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_oaep_decrypt br_rsa_i62_oaep_decrypt_get(void);
+
+/*
+ * RSA "i15" engine. Integers are represented as 15-bit integers, so
+ * the code uses only 32-bit multiplication (no 64-bit result), which
+ * is vastly faster (and constant-time) on the ARM Cortex M0/M0+.
+ */
+
+/**
+ * \brief RSA public key engine "i15".
+ *
+ * \see br_rsa_public
+ *
+ * \param x      operand to exponentiate.
+ * \param xlen   length of the operand (in bytes).
+ * \param pk     RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA signature verification engine "i15" (PKCS#1 v1.5 signatures).
+ *
+ * \see br_rsa_pkcs1_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash_len   expected hash value length (in bytes).
+ * \param pk         RSA public key.
+ * \param hash_out   output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out);
+
+/**
+ * \brief RSA signature verification engine "i15" (PSS signatures).
+ *
+ * \see br_rsa_pss_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hf_data    hash function applied on the message.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hash value of the signed message.
+ * \param salt_len   PSS salt length (in bytes).
+ * \param pk         RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1, 
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA private key engine "i15".
+ *
+ * \see br_rsa_private
+ *
+ * \param x    operand to exponentiate.
+ * \param sk   RSA private key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_private(unsigned char *x,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief RSA signature generation engine "i15" (PKCS#1 v1.5 signatures).
+ *
+ * \see br_rsa_pkcs1_sign
+ *
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash       hash value.
+ * \param hash_len   hash value length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief RSA signature generation engine "i15" (PSS signatures).
+ *
+ * \see br_rsa_pss_sign
+ *
+ * \param rng        PRNG for salt generation (`NULL` if `salt_len` is zero).
+ * \param hf_data    hash function used to hash the signed data.
+ * \param hf_mgf1    hash function to use with MGF1.
+ * \param hash       hashed message.
+ * \param salt_len   salt length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the signature value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash_value, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
+/**
+ * \brief Get "default" RSA implementation (public-key operations).
+ *
+ * This returns the preferred implementation of RSA (public-key operations)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_public br_rsa_public_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (private-key operations).
+ *
+ * This returns the preferred implementation of RSA (private-key operations)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_private br_rsa_private_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (PKCS#1 v1.5 signature verification).
+ *
+ * This returns the preferred implementation of RSA (signature verification)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_pkcs1_vrfy br_rsa_pkcs1_vrfy_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (PSS signature verification).
+ *
+ * This returns the preferred implementation of RSA (signature verification)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_pss_vrfy br_rsa_pss_vrfy_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (PKCS#1 v1.5 signature generation).
+ *
+ * This returns the preferred implementation of RSA (signature generation)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_pkcs1_sign br_rsa_pkcs1_sign_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (PSS signature generation).
+ *
+ * This returns the preferred implementation of RSA (signature generation)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_pss_sign br_rsa_pss_sign_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (OAEP encryption).
+ *
+ * This returns the preferred implementation of RSA (OAEP encryption)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_oaep_encrypt br_rsa_oaep_encrypt_get_default(void);
+
+/**
+ * \brief Get "default" RSA implementation (OAEP decryption).
+ *
+ * This returns the preferred implementation of RSA (OAEP decryption)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_oaep_decrypt br_rsa_oaep_decrypt_get_default(void);
+
+/**
+ * \brief RSA decryption helper, for SSL/TLS.
+ *
+ * This function performs the RSA decryption for a RSA-based key exchange
+ * in a SSL/TLS server. The provided RSA engine is used. The `data`
+ * parameter points to the value to decrypt, of length `len` bytes. On
+ * success, the 48-byte pre-master secret is copied into `data`, starting
+ * at the first byte of that buffer; on error, the contents of `data`
+ * become indeterminate.
+ *
+ * This function first checks that the provided value length (`len`) is
+ * not lower than 59 bytes, and matches the RSA modulus length; if neither
+ * of this property is met, then this function returns 0 and the buffer
+ * is unmodified.
+ *
+ * Otherwise, decryption and then padding verification are performed, both
+ * in constant-time. A decryption error, or a bad padding, or an
+ * incorrect decrypted value length are reported with a returned value of
+ * 0; on success, 1 is returned. The caller (SSL server engine) is supposed
+ * to proceed with a random pre-master secret in case of error.
+ *
+ * \param core   RSA private key engine.
+ * \param sk     RSA private key.
+ * \param data   input/output buffer.
+ * \param len    length (in bytes) of the data to decrypt.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_ssl_decrypt(br_rsa_private core, const br_rsa_private_key *sk,
+	unsigned char *data, size_t len);
+
+/**
+ * \brief RSA encryption (OAEP) with the "i15" engine.
+ *
+ * \see br_rsa_oaep_encrypt
+ *
+ * \param rnd           source of random bytes.
+ * \param dig           hash function to use with MGF1.
+ * \param label         label value (may be `NULL` if `label_len` is zero).
+ * \param label_len     label length, in bytes.
+ * \param pk            RSA public key.
+ * \param dst           destination buffer.
+ * \param dst_max_len   destination buffer length (maximum encrypted data size).
+ * \param src           message to encrypt.
+ * \param src_len       source message length (in bytes).
+ * \return  encrypted message length (in bytes), or 0 on error.
+ */
+size_t br_rsa_i15_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len);
+
+/**
+ * \brief RSA decryption (OAEP) with the "i15" engine.
+ *
+ * \see br_rsa_oaep_decrypt
+ *
+ * \param dig         hash function to use with MGF1.
+ * \param label       label value (may be `NULL` if `label_len` is zero).
+ * \param label_len   label length, in bytes.
+ * \param sk          RSA private key.
+ * \param data        input/output buffer.
+ * \param len         encrypted/decrypted message length.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_oaep_decrypt(
+	const br_hash_class *dig, const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len);
+
+/**
+ * \brief RSA encryption (OAEP) with the "i31" engine.
+ *
+ * \see br_rsa_oaep_encrypt
+ *
+ * \param rnd           source of random bytes.
+ * \param dig           hash function to use with MGF1.
+ * \param label         label value (may be `NULL` if `label_len` is zero).
+ * \param label_len     label length, in bytes.
+ * \param pk            RSA public key.
+ * \param dst           destination buffer.
+ * \param dst_max_len   destination buffer length (maximum encrypted data size).
+ * \param src           message to encrypt.
+ * \param src_len       source message length (in bytes).
+ * \return  encrypted message length (in bytes), or 0 on error.
+ */
+size_t br_rsa_i31_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len);
+
+/**
+ * \brief RSA decryption (OAEP) with the "i31" engine.
+ *
+ * \see br_rsa_oaep_decrypt
+ *
+ * \param dig         hash function to use with MGF1.
+ * \param label       label value (may be `NULL` if `label_len` is zero).
+ * \param label_len   label length, in bytes.
+ * \param sk          RSA private key.
+ * \param data        input/output buffer.
+ * \param len         encrypted/decrypted message length.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i31_oaep_decrypt(
+	const br_hash_class *dig, const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len);
+
+/**
+ * \brief RSA encryption (OAEP) with the "i32" engine.
+ *
+ * \see br_rsa_oaep_encrypt
+ *
+ * \param rnd           source of random bytes.
+ * \param dig           hash function to use with MGF1.
+ * \param label         label value (may be `NULL` if `label_len` is zero).
+ * \param label_len     label length, in bytes.
+ * \param pk            RSA public key.
+ * \param dst           destination buffer.
+ * \param dst_max_len   destination buffer length (maximum encrypted data size).
+ * \param src           message to encrypt.
+ * \param src_len       source message length (in bytes).
+ * \return  encrypted message length (in bytes), or 0 on error.
+ */
+size_t br_rsa_i32_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len);
+
+/**
+ * \brief RSA decryption (OAEP) with the "i32" engine.
+ *
+ * \see br_rsa_oaep_decrypt
+ *
+ * \param dig         hash function to use with MGF1.
+ * \param label       label value (may be `NULL` if `label_len` is zero).
+ * \param label_len   label length, in bytes.
+ * \param sk          RSA private key.
+ * \param data        input/output buffer.
+ * \param len         encrypted/decrypted message length.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i32_oaep_decrypt(
+	const br_hash_class *dig, const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len);
+
+/**
+ * \brief RSA encryption (OAEP) with the "i62" engine.
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_oaep_encrypt_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_oaep_encrypt
+ *
+ * \param rnd           source of random bytes.
+ * \param dig           hash function to use with MGF1.
+ * \param label         label value (may be `NULL` if `label_len` is zero).
+ * \param label_len     label length, in bytes.
+ * \param pk            RSA public key.
+ * \param dst           destination buffer.
+ * \param dst_max_len   destination buffer length (maximum encrypted data size).
+ * \param src           message to encrypt.
+ * \param src_len       source message length (in bytes).
+ * \return  encrypted message length (in bytes), or 0 on error.
+ */
+size_t br_rsa_i62_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len);
+
+/**
+ * \brief RSA decryption (OAEP) with the "i62" engine.
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_oaep_decrypt_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_oaep_decrypt
+ *
+ * \param dig         hash function to use with MGF1.
+ * \param label       label value (may be `NULL` if `label_len` is zero).
+ * \param label_len   label length, in bytes.
+ * \param sk          RSA private key.
+ * \param data        input/output buffer.
+ * \param len         encrypted/decrypted message length.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i62_oaep_decrypt(
+	const br_hash_class *dig, const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len);
+
+/**
+ * \brief Get buffer size to hold RSA private key elements.
+ *
+ * This macro returns the length (in bytes) of the buffer needed to
+ * receive the elements of a RSA private key, as generated by one of
+ * the `br_rsa_*_keygen()` functions. If the provided size is a constant
+ * expression, then the whole macro evaluates to a constant expression.
+ *
+ * \param size   target key size (modulus size, in bits)
+ * \return  the length of the private key buffer, in bytes.
+ */
+#define BR_RSA_KBUF_PRIV_SIZE(size)    (5 * (((size) + 15) >> 4))
+
+/**
+ * \brief Get buffer size to hold RSA public key elements.
+ *
+ * This macro returns the length (in bytes) of the buffer needed to
+ * receive the elements of a RSA public key, as generated by one of
+ * the `br_rsa_*_keygen()` functions. If the provided size is a constant
+ * expression, then the whole macro evaluates to a constant expression.
+ *
+ * \param size   target key size (modulus size, in bits)
+ * \return  the length of the public key buffer, in bytes.
+ */
+#define BR_RSA_KBUF_PUB_SIZE(size)     (4 + (((size) + 7) >> 3))
+
+/**
+ * \brief Type for RSA key pair generator implementation.
+ *
+ * This function generates a new RSA key pair whose modulus has bit
+ * length `size` bits. The private key elements are written in the
+ * `kbuf_priv` buffer, and pointer values and length fields to these
+ * elements are populated in the provided private key structure `sk`.
+ * Similarly, the public key elements are written in `kbuf_pub`, with
+ * pointers and lengths set in `pk`.
+ *
+ * If `pk` is `NULL`, then `kbuf_pub` may be `NULL`, and only the
+ * private key is set.
+ *
+ * If `pubexp` is not zero, then its value will be used as public
+ * exponent. Valid RSA public exponent values are odd integers
+ * greater than 1. If `pubexp` is zero, then the public exponent will
+ * have value 3.
+ *
+ * The provided PRNG (`rng_ctx`) must have already been initialized
+ * and seeded.
+ *
+ * Returned value is 1 on success, 0 on error. An error is reported
+ * if the requested range is outside of the supported key sizes, or
+ * if an invalid non-zero public exponent value is provided. Supported
+ * range starts at 512 bits, and up to an implementation-defined
+ * maximum (by default 4096 bits). Note that key sizes up to 768 bits
+ * have been broken in practice, and sizes lower than 2048 bits are
+ * usually considered to be weak and should not be used.
+ *
+ * \param rng_ctx     source PRNG context (already initialized)
+ * \param sk          RSA private key structure (destination)
+ * \param kbuf_priv   buffer for private key elements
+ * \param pk          RSA public key structure (destination), or `NULL`
+ * \param kbuf_pub    buffer for public key elements, or `NULL`
+ * \param size        target RSA modulus size (in bits)
+ * \param pubexp      public exponent to use, or zero
+ * \return  1 on success, 0 on error (invalid parameters)
+ */
+typedef uint32_t (*br_rsa_keygen)(
+	const br_prng_class **rng_ctx,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp);
+
+/**
+ * \brief RSA key pair generation with the "i15" engine.
+ *
+ * \see br_rsa_keygen
+ *
+ * \param rng_ctx     source PRNG context (already initialized)
+ * \param sk          RSA private key structure (destination)
+ * \param kbuf_priv   buffer for private key elements
+ * \param pk          RSA public key structure (destination), or `NULL`
+ * \param kbuf_pub    buffer for public key elements, or `NULL`
+ * \param size        target RSA modulus size (in bits)
+ * \param pubexp      public exponent to use, or zero
+ * \return  1 on success, 0 on error (invalid parameters)
+ */
+uint32_t br_rsa_i15_keygen(
+	const br_prng_class **rng_ctx,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp);
+
+/**
+ * \brief RSA key pair generation with the "i31" engine.
+ *
+ * \see br_rsa_keygen
+ *
+ * \param rng_ctx     source PRNG context (already initialized)
+ * \param sk          RSA private key structure (destination)
+ * \param kbuf_priv   buffer for private key elements
+ * \param pk          RSA public key structure (destination), or `NULL`
+ * \param kbuf_pub    buffer for public key elements, or `NULL`
+ * \param size        target RSA modulus size (in bits)
+ * \param pubexp      public exponent to use, or zero
+ * \return  1 on success, 0 on error (invalid parameters)
+ */
+uint32_t br_rsa_i31_keygen(
+	const br_prng_class **rng_ctx,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp);
+
+/**
+ * \brief RSA key pair generation with the "i62" engine.
+ *
+ * This function is defined only on architecture that offer a 64x64->128
+ * opcode. Use `br_rsa_i62_keygen_get()` to dynamically obtain a pointer
+ * to that function.
+ *
+ * \see br_rsa_keygen
+ *
+ * \param rng_ctx     source PRNG context (already initialized)
+ * \param sk          RSA private key structure (destination)
+ * \param kbuf_priv   buffer for private key elements
+ * \param pk          RSA public key structure (destination), or `NULL`
+ * \param kbuf_pub    buffer for public key elements, or `NULL`
+ * \param size        target RSA modulus size (in bits)
+ * \param pubexp      public exponent to use, or zero
+ * \return  1 on success, 0 on error (invalid parameters)
+ */
+uint32_t br_rsa_i62_keygen(
+	const br_prng_class **rng_ctx,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp);
+
+/**
+ * \brief Get the RSA "i62" implementation (key pair generation),
+ * if available.
+ *
+ * \return  the implementation, or 0.
+ */
+br_rsa_keygen br_rsa_i62_keygen_get(void);
+
+/**
+ * \brief Get "default" RSA implementation (key pair generation).
+ *
+ * This returns the preferred implementation of RSA (key pair generation)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_keygen br_rsa_keygen_get_default(void);
+
+/**
+ * \brief Type for a modulus computing function.
+ *
+ * Such a function computes the public modulus from the private key. The
+ * encoded modulus (unsigned big-endian) is written on `n`, and the size
+ * (in bytes) is returned. If `n` is `NULL`, then the size is returned but
+ * the modulus itself is not computed.
+ *
+ * If the key size exceeds an internal limit, 0 is returned.
+ *
+ * \param n    destination buffer (or `NULL`).
+ * \param sk   RSA private key.
+ * \return  the modulus length (in bytes), or 0.
+ */
+typedef size_t (*br_rsa_compute_modulus)(void *n, const br_rsa_private_key *sk);
+
+/**
+ * \brief Recompute RSA modulus ("i15" engine).
+ *
+ * \see br_rsa_compute_modulus
+ *
+ * \param n    destination buffer (or `NULL`).
+ * \param sk   RSA private key.
+ * \return  the modulus length (in bytes), or 0.
+ */
+size_t br_rsa_i15_compute_modulus(void *n, const br_rsa_private_key *sk);
+
+/**
+ * \brief Recompute RSA modulus ("i31" engine).
+ *
+ * \see br_rsa_compute_modulus
+ *
+ * \param n    destination buffer (or `NULL`).
+ * \param sk   RSA private key.
+ * \return  the modulus length (in bytes), or 0.
+ */
+size_t br_rsa_i31_compute_modulus(void *n, const br_rsa_private_key *sk);
+
+/**
+ * \brief Get "default" RSA implementation (recompute modulus).
+ *
+ * This returns the preferred implementation of RSA (recompute modulus)
+ * on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_compute_modulus br_rsa_compute_modulus_get_default(void);
+
+/**
+ * \brief Type for a public exponent computing function.
+ *
+ * Such a function recomputes the public exponent from the private key.
+ * 0 is returned if any of the following occurs:
+ *
+ *   - Either `p` or `q` is not equal to 3 modulo 4.
+ *
+ *   - The public exponent does not fit on 32 bits.
+ *
+ *   - An internal limit is exceeded.
+ *
+ *   - The private key is invalid in some way.
+ *
+ * For all private keys produced by the key generator functions
+ * (`br_rsa_keygen` type), this function succeeds and returns the true
+ * public exponent. The public exponent is always an odd integer greater
+ * than 1.
+ *
+ * \return  the public exponent, or 0.
+ */
+typedef uint32_t (*br_rsa_compute_pubexp)(const br_rsa_private_key *sk);
+
+/**
+ * \brief Recompute RSA public exponent ("i15" engine).
+ *
+ * \see br_rsa_compute_pubexp
+ *
+ * \return  the public exponent, or 0.
+ */
+uint32_t br_rsa_i15_compute_pubexp(const br_rsa_private_key *sk);
+
+/**
+ * \brief Recompute RSA public exponent ("i31" engine).
+ *
+ * \see br_rsa_compute_pubexp
+ *
+ * \return  the public exponent, or 0.
+ */
+uint32_t br_rsa_i31_compute_pubexp(const br_rsa_private_key *sk);
+
+/**
+ * \brief Get "default" RSA implementation (recompute public exponent).
+ *
+ * This returns the preferred implementation of RSA (recompute public
+ * exponent) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_compute_pubexp br_rsa_compute_pubexp_get_default(void);
+
+/**
+ * \brief Type for a private exponent computing function.
+ *
+ * An RSA private key (`br_rsa_private_key`) contains two reduced
+ * private exponents, which are sufficient to perform private key
+ * operations. However, standard encoding formats for RSA private keys
+ * require also a copy of the complete private exponent (non-reduced),
+ * which this function recomputes.
+ *
+ * This function suceeds if all the following conditions hold:
+ *
+ *   - Both private factors `p` and `q` are equal to 3 modulo 4.
+ *
+ *   - The provided public exponent `pubexp` is correct, and, in particular,
+ *     is odd, relatively prime to `p-1` and `q-1`, and greater than 1.
+ *
+ *   - No internal storage limit is exceeded.
+ *
+ * For all private keys produced by the key generator functions
+ * (`br_rsa_keygen` type), this function succeeds. Note that the API
+ * restricts the public exponent to a maximum size of 32 bits.
+ *
+ * The encoded private exponent is written in `d` (unsigned big-endian
+ * convention), and the length (in bytes) is returned. If `d` is `NULL`,
+ * then the exponent is not written anywhere, but the length is still
+ * returned. On error, 0 is returned.
+ *
+ * Not all error conditions are detected when `d` is `NULL`; therefore, the
+ * returned value shall be checked also when actually producing the value.
+ *
+ * \param d        destination buffer (or `NULL`).
+ * \param sk       RSA private key.
+ * \param pubexp   the public exponent.
+ * \return  the private exponent length (in bytes), or 0.
+ */
+typedef size_t (*br_rsa_compute_privexp)(void *d,
+	const br_rsa_private_key *sk, uint32_t pubexp);
+
+/**
+ * \brief Recompute RSA private exponent ("i15" engine).
+ *
+ * \see br_rsa_compute_privexp
+ *
+ * \param d        destination buffer (or `NULL`).
+ * \param sk       RSA private key.
+ * \param pubexp   the public exponent.
+ * \return  the private exponent length (in bytes), or 0.
+ */
+size_t br_rsa_i15_compute_privexp(void *d,
+	const br_rsa_private_key *sk, uint32_t pubexp);
+
+/**
+ * \brief Recompute RSA private exponent ("i31" engine).
+ *
+ * \see br_rsa_compute_privexp
+ *
+ * \param d        destination buffer (or `NULL`).
+ * \param sk       RSA private key.
+ * \param pubexp   the public exponent.
+ * \return  the private exponent length (in bytes), or 0.
+ */
+size_t br_rsa_i31_compute_privexp(void *d,
+	const br_rsa_private_key *sk, uint32_t pubexp);
+
+/**
+ * \brief Get "default" RSA implementation (recompute private exponent).
+ *
+ * This returns the preferred implementation of RSA (recompute private
+ * exponent) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_rsa_compute_privexp br_rsa_compute_privexp_get_default(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_ssl.h b/third_party/bearssl/inc/bearssl_ssl.h
new file mode 100644
index 0000000..e91df47
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_ssl.h
@@ -0,0 +1,4296 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_SSL_H__
+#define BR_BEARSSL_SSL_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_block.h"
+#include "bearssl_hash.h"
+#include "bearssl_hmac.h"
+#include "bearssl_prf.h"
+#include "bearssl_rand.h"
+#include "bearssl_x509.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_ssl.h
+ *
+ * # SSL
+ *
+ * For an overview of the SSL/TLS API, see [the BearSSL Web
+ * site](https://www.bearssl.org/api1.html).
+ *
+ * The `BR_TLS_*` constants correspond to the standard cipher suites and
+ * their values in the [IANA
+ * registry](http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-4).
+ *
+ * The `BR_ALERT_*` constants are for standard TLS alert messages. When
+ * a fatal alert message is sent of received, then the SSL engine context
+ * status is set to the sum of that alert value (an integer in the 0..255
+ * range) and a fixed offset (`BR_ERR_SEND_FATAL_ALERT` for a sent alert,
+ * `BR_ERR_RECV_FATAL_ALERT` for a received alert).
+ */
+
+/** \brief Optimal input buffer size. */
+#define BR_SSL_BUFSIZE_INPUT    (16384 + 325)
+
+/** \brief Optimal output buffer size. */
+#define BR_SSL_BUFSIZE_OUTPUT   (16384 + 85)
+
+/** \brief Optimal buffer size for monodirectional engine
+    (shared input/output buffer). */
+#define BR_SSL_BUFSIZE_MONO     BR_SSL_BUFSIZE_INPUT
+
+/** \brief Optimal buffer size for bidirectional engine
+    (single buffer split into two separate input/output buffers). */
+#define BR_SSL_BUFSIZE_BIDI     (BR_SSL_BUFSIZE_INPUT + BR_SSL_BUFSIZE_OUTPUT)
+
+/*
+ * Constants for known SSL/TLS protocol versions (SSL 3.0, TLS 1.0, TLS 1.1
+ * and TLS 1.2). Note that though there is a constant for SSL 3.0, that
+ * protocol version is not actually supported.
+ */
+
+/** \brief Protocol version: SSL 3.0 (unsupported). */
+#define BR_SSL30   0x0300
+/** \brief Protocol version: TLS 1.0. */
+#define BR_TLS10   0x0301
+/** \brief Protocol version: TLS 1.1. */
+#define BR_TLS11   0x0302
+/** \brief Protocol version: TLS 1.2. */
+#define BR_TLS12   0x0303
+
+/*
+ * Error constants. They are used to report the reason why a context has
+ * been marked as failed.
+ *
+ * Implementation note: SSL-level error codes should be in the 1..31
+ * range. The 32..63 range is for certificate decoding and validation
+ * errors. Received fatal alerts imply an error code in the 256..511 range.
+ */
+
+/** \brief SSL status: no error so far (0). */
+#define BR_ERR_OK                      0
+
+/** \brief SSL status: caller-provided parameter is incorrect. */
+#define BR_ERR_BAD_PARAM               1
+
+/** \brief SSL status: operation requested by the caller cannot be applied
+    with the current context state (e.g. reading data while outgoing data
+    is waiting to be sent). */
+#define BR_ERR_BAD_STATE               2
+
+/** \brief SSL status: incoming protocol or record version is unsupported. */
+#define BR_ERR_UNSUPPORTED_VERSION     3
+
+/** \brief SSL status: incoming record version does not match the expected
+    version. */
+#define BR_ERR_BAD_VERSION             4
+
+/** \brief SSL status: incoming record length is invalid. */
+#define BR_ERR_BAD_LENGTH              5
+
+/** \brief SSL status: incoming record is too large to be processed, or
+    buffer is too small for the handshake message to send. */
+#define BR_ERR_TOO_LARGE               6
+
+/** \brief SSL status: decryption found an invalid padding, or the record
+    MAC is not correct. */
+#define BR_ERR_BAD_MAC                 7
+
+/** \brief SSL status: no initial entropy was provided, and none can be
+    obtained from the OS. */
+#define BR_ERR_NO_RANDOM               8
+
+/** \brief SSL status: incoming record type is unknown. */
+#define BR_ERR_UNKNOWN_TYPE            9
+
+/** \brief SSL status: incoming record or message has wrong type with
+    regards to the current engine state. */
+#define BR_ERR_UNEXPECTED             10
+
+/** \brief SSL status: ChangeCipherSpec message from the peer has invalid
+    contents. */
+#define BR_ERR_BAD_CCS                12
+
+/** \brief SSL status: alert message from the peer has invalid contents
+    (odd length). */
+#define BR_ERR_BAD_ALERT              13
+
+/** \brief SSL status: incoming handshake message decoding failed. */
+#define BR_ERR_BAD_HANDSHAKE          14
+
+/** \brief SSL status: ServerHello contains a session ID which is larger
+    than 32 bytes. */
+#define BR_ERR_OVERSIZED_ID           15
+
+/** \brief SSL status: server wants to use a cipher suite that we did
+    not claim to support. This is also reported if we tried to advertise
+    a cipher suite that we do not support. */
+#define BR_ERR_BAD_CIPHER_SUITE       16
+
+/** \brief SSL status: server wants to use a compression that we did not
+    claim to support. */
+#define BR_ERR_BAD_COMPRESSION        17
+
+/** \brief SSL status: server's max fragment length does not match
+    client's. */
+#define BR_ERR_BAD_FRAGLEN            18
+
+/** \brief SSL status: secure renegotiation failed. */
+#define BR_ERR_BAD_SECRENEG           19
+
+/** \brief SSL status: server sent an extension type that we did not
+    announce, or used the same extension type several times in a single
+    ServerHello. */
+#define BR_ERR_EXTRA_EXTENSION        20
+
+/** \brief SSL status: invalid Server Name Indication contents (when
+    used by the server, this extension shall be empty). */
+#define BR_ERR_BAD_SNI                21
+
+/** \brief SSL status: invalid ServerHelloDone from the server (length
+    is not 0). */
+#define BR_ERR_BAD_HELLO_DONE         22
+
+/** \brief SSL status: internal limit exceeded (e.g. server's public key
+    is too large). */
+#define BR_ERR_LIMIT_EXCEEDED         23
+
+/** \brief SSL status: Finished message from peer does not match the
+    expected value. */
+#define BR_ERR_BAD_FINISHED           24
+
+/** \brief SSL status: session resumption attempt with distinct version
+    or cipher suite. */
+#define BR_ERR_RESUME_MISMATCH        25
+
+/** \brief SSL status: unsupported or invalid algorithm (ECDHE curve,
+    signature algorithm, hash function). */
+#define BR_ERR_INVALID_ALGORITHM      26
+
+/** \brief SSL status: invalid signature (on ServerKeyExchange from
+    server, or in CertificateVerify from client). */
+#define BR_ERR_BAD_SIGNATURE          27
+
+/** \brief SSL status: peer's public key does not have the proper type
+    or is not allowed for requested operation. */
+#define BR_ERR_WRONG_KEY_USAGE        28
+
+/** \brief SSL status: client did not send a certificate upon request,
+    or the client certificate could not be validated. */
+#define BR_ERR_NO_CLIENT_AUTH         29
+
+/** \brief SSL status: I/O error or premature close on underlying
+    transport stream. This error code is set only by the simplified
+    I/O API ("br_sslio_*"). */
+#define BR_ERR_IO                     31
+
+/** \brief SSL status: base value for a received fatal alert.
+
+    When a fatal alert is received from the peer, the alert value
+    is added to this constant. */
+#define BR_ERR_RECV_FATAL_ALERT      256
+
+/** \brief SSL status: base value for a sent fatal alert.
+
+    When a fatal alert is sent to the peer, the alert value is added
+    to this constant. */
+#define BR_ERR_SEND_FATAL_ALERT      512
+
+/* ===================================================================== */
+
+/**
+ * \brief Decryption engine for SSL.
+ *
+ * When processing incoming records, the SSL engine will use a decryption
+ * engine that uses a specific context structure, and has a set of
+ * methods (a vtable) that follows this template.
+ *
+ * The decryption engine is responsible for applying decryption, verifying
+ * MAC, and keeping track of the record sequence number.
+ */
+typedef struct br_sslrec_in_class_ br_sslrec_in_class;
+struct br_sslrec_in_class_ {
+	/**
+	 * \brief Context size (in bytes).
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Test validity of the incoming record length.
+	 *
+	 * This function returns 1 if the announced length for an
+	 * incoming record is valid, 0 otherwise,
+	 *
+	 * \param ctx          decryption engine context.
+	 * \param record_len   incoming record length.
+	 * \return  1 of a valid length, 0 otherwise.
+	 */
+	int (*check_length)(const br_sslrec_in_class *const *ctx,
+		size_t record_len);
+
+	/**
+	 * \brief Decrypt the incoming record.
+	 *
+	 * This function may assume that the record length is valid
+	 * (it has been previously tested with `check_length()`).
+	 * Decryption is done in place; `*len` is updated with the
+	 * cleartext length, and the address of the first plaintext
+	 * byte is returned. If the record is correct but empty, then
+	 * `*len` is set to 0 and a non-`NULL` pointer is returned.
+	 *
+	 * On decryption/MAC error, `NULL` is returned.
+	 *
+	 * \param ctx           decryption engine context.
+	 * \param record_type   record type (23 for application data, etc).
+	 * \param version       record version.
+	 * \param payload       address of encrypted payload.
+	 * \param len           pointer to payload length (updated).
+	 * \return  pointer to plaintext, or `NULL` on error.
+	 */
+	unsigned char *(*decrypt)(const br_sslrec_in_class **ctx,
+		int record_type, unsigned version,
+		void *payload, size_t *len);
+};
+
+/**
+ * \brief Encryption engine for SSL.
+ *
+ * When building outgoing records, the SSL engine will use an encryption
+ * engine that uses a specific context structure, and has a set of
+ * methods (a vtable) that follows this template.
+ *
+ * The encryption engine is responsible for applying encryption and MAC,
+ * and keeping track of the record sequence number.
+ */
+typedef struct br_sslrec_out_class_ br_sslrec_out_class;
+struct br_sslrec_out_class_ {
+	/**
+	 * \brief Context size (in bytes).
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Compute maximum plaintext sizes and offsets.
+	 *
+	 * When this function is called, the `*start` and `*end`
+	 * values contain offsets designating the free area in the
+	 * outgoing buffer for plaintext data; that free area is
+	 * preceded by a 5-byte space which will receive the record
+	 * header.
+	 *
+	 * The `max_plaintext()` function is responsible for adjusting
+	 * both `*start` and `*end` to make room for any record-specific
+	 * header, MAC, padding, and possible split.
+	 *
+	 * \param ctx     encryption engine context.
+	 * \param start   pointer to start of plaintext offset (updated).
+	 * \param end     pointer to start of plaintext offset (updated).
+	 */
+	void (*max_plaintext)(const br_sslrec_out_class *const *ctx,
+		size_t *start, size_t *end);
+
+	/**
+	 * \brief Perform record encryption.
+	 *
+	 * This function encrypts the record. The plaintext address and
+	 * length are provided. Returned value is the start of the
+	 * encrypted record (or sequence of records, if a split was
+	 * performed), _including_ the 5-byte header, and `*len` is
+	 * adjusted to the total size of the record(s), there again
+	 * including the header(s).
+	 *
+	 * \param ctx           decryption engine context.
+	 * \param record_type   record type (23 for application data, etc).
+	 * \param version       record version.
+	 * \param plaintext     address of plaintext.
+	 * \param len           pointer to plaintext length (updated).
+	 * \return  pointer to start of built record.
+	 */
+	unsigned char *(*encrypt)(const br_sslrec_out_class **ctx,
+		int record_type, unsigned version,
+		void *plaintext, size_t *len);
+};
+
+/**
+ * \brief Context for a no-encryption engine.
+ *
+ * The no-encryption engine processes outgoing records during the initial
+ * handshake, before encryption is applied.
+ */
+typedef struct {
+	/** \brief No-encryption engine vtable. */
+	const br_sslrec_out_class *vtable;
+} br_sslrec_out_clear_context;
+
+/** \brief Static, constant vtable for the no-encryption engine. */
+extern const br_sslrec_out_class br_sslrec_out_clear_vtable;
+
+/* ===================================================================== */
+
+/**
+ * \brief Record decryption engine class, for CBC mode.
+ *
+ * This class type extends the decryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for CBC processing: block cipher implementation, block cipher key,
+ * HMAC parameters (hash function, key, MAC length), and IV. If the
+ * IV is `NULL`, then a per-record IV will be used (TLS 1.1+).
+ */
+typedef struct br_sslrec_in_cbc_class_ br_sslrec_in_cbc_class;
+struct br_sslrec_in_cbc_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_in_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param bc_impl       block cipher implementation (CBC decryption).
+	 * \param bc_key        block cipher key.
+	 * \param bc_key_len    block cipher key length (in bytes).
+	 * \param dig_impl      hash function for HMAC.
+	 * \param mac_key       HMAC key.
+	 * \param mac_key_len   HMAC key length (in bytes).
+	 * \param mac_out_len   HMAC output length (in bytes).
+	 * \param iv            initial IV (or `NULL`).
+	 */
+	void (*init)(const br_sslrec_in_cbc_class **ctx,
+		const br_block_cbcdec_class *bc_impl,
+		const void *bc_key, size_t bc_key_len,
+		const br_hash_class *dig_impl,
+		const void *mac_key, size_t mac_key_len, size_t mac_out_len,
+		const void *iv);
+};
+
+/**
+ * \brief Record encryption engine class, for CBC mode.
+ *
+ * This class type extends the encryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for CBC processing: block cipher implementation, block cipher key,
+ * HMAC parameters (hash function, key, MAC length), and IV. If the
+ * IV is `NULL`, then a per-record IV will be used (TLS 1.1+).
+ */
+typedef struct br_sslrec_out_cbc_class_ br_sslrec_out_cbc_class;
+struct br_sslrec_out_cbc_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_out_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param bc_impl       block cipher implementation (CBC encryption).
+	 * \param bc_key        block cipher key.
+	 * \param bc_key_len    block cipher key length (in bytes).
+	 * \param dig_impl      hash function for HMAC.
+	 * \param mac_key       HMAC key.
+	 * \param mac_key_len   HMAC key length (in bytes).
+	 * \param mac_out_len   HMAC output length (in bytes).
+	 * \param iv            initial IV (or `NULL`).
+	 */
+	void (*init)(const br_sslrec_out_cbc_class **ctx,
+		const br_block_cbcenc_class *bc_impl,
+		const void *bc_key, size_t bc_key_len,
+		const br_hash_class *dig_impl,
+		const void *mac_key, size_t mac_key_len, size_t mac_out_len,
+		const void *iv);
+};
+
+/**
+ * \brief Context structure for decrypting incoming records with
+ * CBC + HMAC.
+ *
+ * The first field points to the vtable. The other fields are opaque
+ * and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_sslrec_in_cbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t seq;
+	union {
+		const br_block_cbcdec_class *vtable;
+		br_aes_gen_cbcdec_keys aes;
+		br_des_gen_cbcdec_keys des;
+	} bc;
+	br_hmac_key_context mac;
+	size_t mac_len;
+	unsigned char iv[16];
+	int explicit_IV;
+#endif
+} br_sslrec_in_cbc_context;
+
+/**
+ * \brief Static, constant vtable for record decryption with CBC.
+ */
+extern const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable;
+
+/**
+ * \brief Context structure for encrypting outgoing records with
+ * CBC + HMAC.
+ *
+ * The first field points to the vtable. The other fields are opaque
+ * and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_sslrec_out_cbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t seq;
+	union {
+		const br_block_cbcenc_class *vtable;
+		br_aes_gen_cbcenc_keys aes;
+		br_des_gen_cbcenc_keys des;
+	} bc;
+	br_hmac_key_context mac;
+	size_t mac_len;
+	unsigned char iv[16];
+	int explicit_IV;
+#endif
+} br_sslrec_out_cbc_context;
+
+/**
+ * \brief Static, constant vtable for record encryption with CBC.
+ */
+extern const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable;
+
+/* ===================================================================== */
+
+/**
+ * \brief Record decryption engine class, for GCM mode.
+ *
+ * This class type extends the decryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for GCM processing: block cipher implementation, block cipher key,
+ * GHASH implementation, and 4-byte IV.
+ */
+typedef struct br_sslrec_in_gcm_class_ br_sslrec_in_gcm_class;
+struct br_sslrec_in_gcm_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_in_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param bc_impl       block cipher implementation (CTR).
+	 * \param key           block cipher key.
+	 * \param key_len       block cipher key length (in bytes).
+	 * \param gh_impl       GHASH implementation.
+	 * \param iv            static IV (4 bytes).
+	 */
+	void (*init)(const br_sslrec_in_gcm_class **ctx,
+		const br_block_ctr_class *bc_impl,
+		const void *key, size_t key_len,
+		br_ghash gh_impl,
+		const void *iv);
+};
+
+/**
+ * \brief Record encryption engine class, for GCM mode.
+ *
+ * This class type extends the encryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for GCM processing: block cipher implementation, block cipher key,
+ * GHASH implementation, and 4-byte IV.
+ */
+typedef struct br_sslrec_out_gcm_class_ br_sslrec_out_gcm_class;
+struct br_sslrec_out_gcm_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_out_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param bc_impl       block cipher implementation (CTR).
+	 * \param key           block cipher key.
+	 * \param key_len       block cipher key length (in bytes).
+	 * \param gh_impl       GHASH implementation.
+	 * \param iv            static IV (4 bytes).
+	 */
+	void (*init)(const br_sslrec_out_gcm_class **ctx,
+		const br_block_ctr_class *bc_impl,
+		const void *key, size_t key_len,
+		br_ghash gh_impl,
+		const void *iv);
+};
+
+/**
+ * \brief Context structure for processing records with GCM.
+ *
+ * The same context structure is used for encrypting and decrypting.
+ *
+ * The first field points to the vtable. The other fields are opaque
+ * and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	union {
+		const void *gen;
+		const br_sslrec_in_gcm_class *in;
+		const br_sslrec_out_gcm_class *out;
+	} vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t seq;
+	union {
+		const br_block_ctr_class *vtable;
+		br_aes_gen_ctr_keys aes;
+	} bc;
+	br_ghash gh;
+	unsigned char iv[4];
+	unsigned char h[16];
+#endif
+} br_sslrec_gcm_context;
+
+/**
+ * \brief Static, constant vtable for record decryption with GCM.
+ */
+extern const br_sslrec_in_gcm_class br_sslrec_in_gcm_vtable;
+
+/**
+ * \brief Static, constant vtable for record encryption with GCM.
+ */
+extern const br_sslrec_out_gcm_class br_sslrec_out_gcm_vtable;
+
+/* ===================================================================== */
+
+/**
+ * \brief Record decryption engine class, for ChaCha20+Poly1305.
+ *
+ * This class type extends the decryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for ChaCha20+Poly1305 processing: ChaCha20 implementation,
+ * Poly1305 implementation, key, and 12-byte IV.
+ */
+typedef struct br_sslrec_in_chapol_class_ br_sslrec_in_chapol_class;
+struct br_sslrec_in_chapol_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_in_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param ichacha       ChaCha20 implementation.
+	 * \param ipoly         Poly1305 implementation.
+	 * \param key           secret key (32 bytes).
+	 * \param iv            static IV (12 bytes).
+	 */
+	void (*init)(const br_sslrec_in_chapol_class **ctx,
+		br_chacha20_run ichacha,
+		br_poly1305_run ipoly,
+		const void *key, const void *iv);
+};
+
+/**
+ * \brief Record encryption engine class, for ChaCha20+Poly1305.
+ *
+ * This class type extends the encryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for ChaCha20+Poly1305 processing: ChaCha20 implementation,
+ * Poly1305 implementation, key, and 12-byte IV.
+ */
+typedef struct br_sslrec_out_chapol_class_ br_sslrec_out_chapol_class;
+struct br_sslrec_out_chapol_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_out_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param ichacha       ChaCha20 implementation.
+	 * \param ipoly         Poly1305 implementation.
+	 * \param key           secret key (32 bytes).
+	 * \param iv            static IV (12 bytes).
+	 */
+	void (*init)(const br_sslrec_out_chapol_class **ctx,
+		br_chacha20_run ichacha,
+		br_poly1305_run ipoly,
+		const void *key, const void *iv);
+};
+
+/**
+ * \brief Context structure for processing records with ChaCha20+Poly1305.
+ *
+ * The same context structure is used for encrypting and decrypting.
+ *
+ * The first field points to the vtable. The other fields are opaque
+ * and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	union {
+		const void *gen;
+		const br_sslrec_in_chapol_class *in;
+		const br_sslrec_out_chapol_class *out;
+	} vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t seq;
+	unsigned char key[32];
+	unsigned char iv[12];
+	br_chacha20_run ichacha;
+	br_poly1305_run ipoly;
+#endif
+} br_sslrec_chapol_context;
+
+/**
+ * \brief Static, constant vtable for record decryption with ChaCha20+Poly1305.
+ */
+extern const br_sslrec_in_chapol_class br_sslrec_in_chapol_vtable;
+
+/**
+ * \brief Static, constant vtable for record encryption with ChaCha20+Poly1305.
+ */
+extern const br_sslrec_out_chapol_class br_sslrec_out_chapol_vtable;
+
+/* ===================================================================== */
+
+/**
+ * \brief Record decryption engine class, for CCM mode.
+ *
+ * This class type extends the decryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for CCM processing: block cipher implementation, block cipher key,
+ * and 4-byte IV.
+ */
+typedef struct br_sslrec_in_ccm_class_ br_sslrec_in_ccm_class;
+struct br_sslrec_in_ccm_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_in_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param bc_impl       block cipher implementation (CTR+CBC).
+	 * \param key           block cipher key.
+	 * \param key_len       block cipher key length (in bytes).
+	 * \param iv            static IV (4 bytes).
+	 * \param tag_len       tag length (in bytes)
+	 */
+	void (*init)(const br_sslrec_in_ccm_class **ctx,
+		const br_block_ctrcbc_class *bc_impl,
+		const void *key, size_t key_len,
+		const void *iv, size_t tag_len);
+};
+
+/**
+ * \brief Record encryption engine class, for CCM mode.
+ *
+ * This class type extends the encryption engine class with an
+ * initialisation method that receives the parameters needed
+ * for CCM processing: block cipher implementation, block cipher key,
+ * and 4-byte IV.
+ */
+typedef struct br_sslrec_out_ccm_class_ br_sslrec_out_ccm_class;
+struct br_sslrec_out_ccm_class_ {
+	/**
+	 * \brief Superclass, as first vtable field.
+	 */
+	br_sslrec_out_class inner;
+
+	/**
+	 * \brief Engine initialisation method.
+	 *
+	 * This method sets the vtable field in the context.
+	 *
+	 * \param ctx           context to initialise.
+	 * \param bc_impl       block cipher implementation (CTR+CBC).
+	 * \param key           block cipher key.
+	 * \param key_len       block cipher key length (in bytes).
+	 * \param iv            static IV (4 bytes).
+	 * \param tag_len       tag length (in bytes)
+	 */
+	void (*init)(const br_sslrec_out_ccm_class **ctx,
+		const br_block_ctrcbc_class *bc_impl,
+		const void *key, size_t key_len,
+		const void *iv, size_t tag_len);
+};
+
+/**
+ * \brief Context structure for processing records with CCM.
+ *
+ * The same context structure is used for encrypting and decrypting.
+ *
+ * The first field points to the vtable. The other fields are opaque
+ * and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	union {
+		const void *gen;
+		const br_sslrec_in_ccm_class *in;
+		const br_sslrec_out_ccm_class *out;
+	} vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t seq;
+	union {
+		const br_block_ctrcbc_class *vtable;
+		br_aes_gen_ctrcbc_keys aes;
+	} bc;
+	unsigned char iv[4];
+	size_t tag_len;
+#endif
+} br_sslrec_ccm_context;
+
+/**
+ * \brief Static, constant vtable for record decryption with CCM.
+ */
+extern const br_sslrec_in_ccm_class br_sslrec_in_ccm_vtable;
+
+/**
+ * \brief Static, constant vtable for record encryption with CCM.
+ */
+extern const br_sslrec_out_ccm_class br_sslrec_out_ccm_vtable;
+
+/* ===================================================================== */
+
+/**
+ * \brief Type for session parameters, to be saved for session resumption.
+ */
+typedef struct {
+	/** \brief Session ID buffer. */
+	unsigned char session_id[32];
+	/** \brief Session ID length (in bytes, at most 32). */
+	unsigned char session_id_len;
+	/** \brief Protocol version. */
+	uint16_t version;
+	/** \brief Cipher suite. */
+	uint16_t cipher_suite;
+	/** \brief Master secret. */
+	unsigned char master_secret[48];
+} br_ssl_session_parameters;
+
+#ifndef BR_DOXYGEN_IGNORE
+/*
+ * Maximum number of cipher suites supported by a client or server.
+ */
+#define BR_MAX_CIPHER_SUITES   48
+#endif
+
+/**
+ * \brief Context structure for SSL engine.
+ *
+ * This strucuture is common to the client and server; both the client
+ * context (`br_ssl_client_context`) and the server context
+ * (`br_ssl_server_context`) include a `br_ssl_engine_context` as their
+ * first field.
+ *
+ * The engine context manages records, including alerts, closures, and
+ * transitions to new encryption/MAC algorithms. Processing of handshake
+ * records is delegated to externally provided code. This structure
+ * should not be used directly.
+ *
+ * Structure contents are opaque and shall not be accessed directly.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	/*
+	 * The error code. When non-zero, then the state is "failed" and
+	 * no I/O may occur until reset.
+	 */
+	int err;
+
+	/*
+	 * Configured I/O buffers. They are either disjoint, or identical.
+	 */
+	unsigned char *ibuf, *obuf;
+	size_t ibuf_len, obuf_len;
+
+	/*
+	 * Maximum fragment length applies to outgoing records; incoming
+	 * records can be processed as long as they fit in the input
+	 * buffer. It is guaranteed that incoming records at least as big
+	 * as max_frag_len can be processed.
+	 */
+	uint16_t max_frag_len;
+	unsigned char log_max_frag_len;
+	unsigned char peer_log_max_frag_len;
+
+	/*
+	 * Buffering management registers.
+	 */
+	size_t ixa, ixb, ixc;
+	size_t oxa, oxb, oxc;
+	unsigned char iomode;
+	unsigned char incrypt;
+
+	/*
+	 * Shutdown flag: when set to non-zero, incoming record bytes
+	 * will not be accepted anymore. This is used after a close_notify
+	 * has been received: afterwards, the engine no longer claims that
+	 * it could receive bytes from the transport medium.
+	 */
+	unsigned char shutdown_recv;
+
+	/*
+	 * 'record_type_in' is set to the incoming record type when the
+	 * record header has been received.
+	 * 'record_type_out' is used to make the next outgoing record
+	 * header when it is ready to go.
+	 */
+	unsigned char record_type_in, record_type_out;
+
+	/*
+	 * When a record is received, its version is extracted:
+	 * -- if 'version_in' is 0, then it is set to the received version;
+	 * -- otherwise, if the received version is not identical to
+	 *    the 'version_in' contents, then a failure is reported.
+	 *
+	 * This implements the SSL requirement that all records shall
+	 * use the negotiated protocol version, once decided (in the
+	 * ServerHello). It is up to the handshake handler to adjust this
+	 * field when necessary.
+	 */
+	uint16_t version_in;
+
+	/*
+	 * 'version_out' is used when the next outgoing record is ready
+	 * to go.
+	 */
+	uint16_t version_out;
+
+	/*
+	 * Record handler contexts.
+	 */
+	union {
+		const br_sslrec_in_class *vtable;
+		br_sslrec_in_cbc_context cbc;
+		br_sslrec_gcm_context gcm;
+		br_sslrec_chapol_context chapol;
+		br_sslrec_ccm_context ccm;
+	} in;
+	union {
+		const br_sslrec_out_class *vtable;
+		br_sslrec_out_clear_context clear;
+		br_sslrec_out_cbc_context cbc;
+		br_sslrec_gcm_context gcm;
+		br_sslrec_chapol_context chapol;
+		br_sslrec_ccm_context ccm;
+	} out;
+
+	/*
+	 * The "application data" flag. Value:
+	 *   0   handshake is in process, no application data acceptable
+	 *   1   application data can be sent and received
+	 *   2   closing, no application data can be sent, but some
+	 *       can still be received (and discarded)
+	 */
+	unsigned char application_data;
+
+	/*
+	 * Context RNG.
+	 *
+	 *   rng_init_done is initially 0. It is set to 1 when the
+	 *   basic structure of the RNG is set, and 2 when some
+	 *   entropy has been pushed in. The value 2 marks the RNG
+	 *   as "properly seeded".
+	 *
+	 *   rng_os_rand_done is initially 0. It is set to 1 when
+	 *   some seeding from the OS or hardware has been attempted.
+	 */
+	br_hmac_drbg_context rng;
+	int rng_init_done;
+	int rng_os_rand_done;
+
+	/*
+	 * Supported minimum and maximum versions, and cipher suites.
+	 */
+	uint16_t version_min;
+	uint16_t version_max;
+	uint16_t suites_buf[BR_MAX_CIPHER_SUITES];
+	unsigned char suites_num;
+
+	/*
+	 * For clients, the server name to send as a SNI extension. For
+	 * servers, the name received in the SNI extension (if any).
+	 */
+	char server_name[256];
+
+	/*
+	 * "Security parameters". These are filled by the handshake
+	 * handler, and used when switching encryption state.
+	 */
+	unsigned char client_random[32];
+	unsigned char server_random[32];
+	br_ssl_session_parameters session;
+
+	/*
+	 * ECDHE elements: curve and point from the peer. The server also
+	 * uses that buffer for the point to send to the client.
+	 */
+	unsigned char ecdhe_curve;
+	unsigned char ecdhe_point[133];
+	unsigned char ecdhe_point_len;
+
+	/*
+	 * Secure renegotiation (RFC 5746): 'reneg' can be:
+	 *   0   first handshake (server support is not known)
+	 *   1   peer does not support secure renegotiation
+	 *   2   peer supports secure renegotiation
+	 *
+	 * The saved_finished buffer contains the client and the
+	 * server "Finished" values from the last handshake, in
+	 * that order (12 bytes each).
+	 */
+	unsigned char reneg;
+	unsigned char saved_finished[24];
+
+	/*
+	 * Behavioural flags.
+	 */
+	uint32_t flags;
+
+	/*
+	 * Context variables for the handshake processor. The 'pad' must
+	 * be large enough to accommodate an RSA-encrypted pre-master
+	 * secret, or an RSA signature; since we want to support up to
+	 * RSA-4096, this means at least 512 bytes. (Other pad usages
+	 * require its length to be at least 256.)
+	 */
+	struct {
+		uint32_t *dp;
+		uint32_t *rp;
+		const unsigned char *ip;
+	} cpu;
+	uint32_t dp_stack[32];
+	uint32_t rp_stack[32];
+	unsigned char pad[512];
+	unsigned char *hbuf_in, *hbuf_out, *saved_hbuf_out;
+	size_t hlen_in, hlen_out;
+	void (*hsrun)(void *ctx);
+
+	/*
+	 * The 'action' value communicates OOB information between the
+	 * engine and the handshake processor.
+	 *
+	 * From the engine:
+	 *   0  invocation triggered by I/O
+	 *   1  invocation triggered by explicit close
+	 *   2  invocation triggered by explicit renegotiation
+	 */
+	unsigned char action;
+
+	/*
+	 * State for alert messages. Value is either 0, or the value of
+	 * the alert level byte (level is either 1 for warning, or 2 for
+	 * fatal; we convert all other values to 'fatal').
+	 */
+	unsigned char alert;
+
+	/*
+	 * Closure flags. This flag is set when a close_notify has been
+	 * received from the peer.
+	 */
+	unsigned char close_received;
+
+	/*
+	 * Multi-hasher for the handshake messages. The handshake handler
+	 * is responsible for resetting it when appropriate.
+	 */
+	br_multihash_context mhash;
+
+	/*
+	 * Pointer to the X.509 engine. The engine is supposed to be
+	 * already initialized. It is used to validate the peer's
+	 * certificate.
+	 */
+	const br_x509_class **x509ctx;
+
+	/*
+	 * Certificate chain to send. This is used by both client and
+	 * server, when they send their respective Certificate messages.
+	 * If chain_len is 0, then chain may be NULL.
+	 */
+	const br_x509_certificate *chain;
+	size_t chain_len;
+	const unsigned char *cert_cur;
+	size_t cert_len;
+
+	/*
+	 * List of supported protocol names (ALPN extension). If unset,
+	 * (number of names is 0), then:
+	 *  - the client sends no ALPN extension;
+	 *  - the server ignores any incoming ALPN extension.
+	 *
+	 * Otherwise:
+	 *  - the client sends an ALPN extension with all the names;
+	 *  - the server selects the first protocol in its list that
+	 *    the client also supports, or fails (fatal alert 120)
+	 *    if the client sends an ALPN extension and there is no
+	 *    match.
+	 *
+	 * The 'selected_protocol' field contains 1+n if the matching
+	 * name has index n in the list (the value is 0 if no match was
+	 * performed, e.g. the peer did not send an ALPN extension).
+	 */
+	const char **protocol_names;
+	uint16_t protocol_names_num;
+	uint16_t selected_protocol;
+
+	/*
+	 * Pointers to implementations; left to NULL for unsupported
+	 * functions. For the raw hash functions, implementations are
+	 * referenced from the multihasher (mhash field).
+	 */
+	br_tls_prf_impl prf10;
+	br_tls_prf_impl prf_sha256;
+	br_tls_prf_impl prf_sha384;
+	const br_block_cbcenc_class *iaes_cbcenc;
+	const br_block_cbcdec_class *iaes_cbcdec;
+	const br_block_ctr_class *iaes_ctr;
+	const br_block_ctrcbc_class *iaes_ctrcbc;
+	const br_block_cbcenc_class *ides_cbcenc;
+	const br_block_cbcdec_class *ides_cbcdec;
+	br_ghash ighash;
+	br_chacha20_run ichacha;
+	br_poly1305_run ipoly;
+	const br_sslrec_in_cbc_class *icbc_in;
+	const br_sslrec_out_cbc_class *icbc_out;
+	const br_sslrec_in_gcm_class *igcm_in;
+	const br_sslrec_out_gcm_class *igcm_out;
+	const br_sslrec_in_chapol_class *ichapol_in;
+	const br_sslrec_out_chapol_class *ichapol_out;
+	const br_sslrec_in_ccm_class *iccm_in;
+	const br_sslrec_out_ccm_class *iccm_out;
+	const br_ec_impl *iec;
+	br_rsa_pkcs1_vrfy irsavrfy;
+	br_ecdsa_vrfy iecdsa;
+#endif
+} br_ssl_engine_context;
+
+/**
+ * \brief Get currently defined engine behavioural flags.
+ *
+ * \param cc   SSL engine context.
+ * \return  the flags.
+ */
+static inline uint32_t
+br_ssl_engine_get_flags(br_ssl_engine_context *cc)
+{
+	return cc->flags;
+}
+
+/**
+ * \brief Set all engine behavioural flags.
+ *
+ * \param cc      SSL engine context.
+ * \param flags   new value for all flags.
+ */
+static inline void
+br_ssl_engine_set_all_flags(br_ssl_engine_context *cc, uint32_t flags)
+{
+	cc->flags = flags;
+}
+
+/**
+ * \brief Set some engine behavioural flags.
+ *
+ * The flags set in the `flags` parameter are set in the context; other
+ * flags are untouched.
+ *
+ * \param cc      SSL engine context.
+ * \param flags   additional set flags.
+ */
+static inline void
+br_ssl_engine_add_flags(br_ssl_engine_context *cc, uint32_t flags)
+{
+	cc->flags |= flags;
+}
+
+/**
+ * \brief Clear some engine behavioural flags.
+ *
+ * The flags set in the `flags` parameter are cleared from the context; other
+ * flags are untouched.
+ *
+ * \param cc      SSL engine context.
+ * \param flags   flags to remove.
+ */
+static inline void
+br_ssl_engine_remove_flags(br_ssl_engine_context *cc, uint32_t flags)
+{
+	cc->flags &= ~flags;
+}
+
+/**
+ * \brief Behavioural flag: enforce server preferences.
+ *
+ * If this flag is set, then the server will enforce its own cipher suite
+ * preference order; otherwise, it follows the client preferences.
+ */
+#define BR_OPT_ENFORCE_SERVER_PREFERENCES      ((uint32_t)1 << 0)
+
+/**
+ * \brief Behavioural flag: disable renegotiation.
+ *
+ * If this flag is set, then renegotiations are rejected unconditionally:
+ * they won't be honoured if asked for programmatically, and requests from
+ * the peer are rejected.
+ */
+#define BR_OPT_NO_RENEGOTIATION                ((uint32_t)1 << 1)
+
+/**
+ * \brief Behavioural flag: tolerate lack of client authentication.
+ *
+ * If this flag is set in a server and the server requests a client
+ * certificate, but the authentication fails (the client does not send
+ * a certificate, or the client's certificate chain cannot be validated),
+ * then the connection keeps on. Without this flag, a failed client
+ * authentication terminates the connection.
+ *
+ * Notes:
+ *
+ *   - If the client's certificate can be validated and its public key is
+ *     supported, then a wrong signature value terminates the connection
+ *     regardless of that flag.
+ *
+ *   - If using full-static ECDH, then a failure to validate the client's
+ *     certificate prevents the handshake from succeeding.
+ */
+#define BR_OPT_TOLERATE_NO_CLIENT_AUTH         ((uint32_t)1 << 2)
+
+/**
+ * \brief Behavioural flag: fail on application protocol mismatch.
+ *
+ * The ALPN extension ([RFC 7301](https://tools.ietf.org/html/rfc7301))
+ * allows the client to send a list of application protocol names, and
+ * the server to select one. A mismatch is one of the following occurrences:
+ *
+ *   - On the client: the client sends a list of names, the server
+ *     responds with a protocol name which is _not_ part of the list of
+ *     names sent by the client.
+ *
+ *   - On the server: the client sends a list of names, and the server
+ *     is also configured with a list of names, but there is no common
+ *     protocol name between the two lists.
+ *
+ * Normal behaviour in case of mismatch is to report no matching name
+ * (`br_ssl_engine_get_selected_protocol()` returns `NULL`) and carry on.
+ * If the flag is set, then a mismatch implies a protocol failure (if
+ * the mismatch is detected by the server, it will send a fatal alert).
+ *
+ * Note: even with this flag, `br_ssl_engine_get_selected_protocol()`
+ * may still return `NULL` if the client or the server does not send an
+ * ALPN extension at all.
+ */
+#define BR_OPT_FAIL_ON_ALPN_MISMATCH           ((uint32_t)1 << 3)
+
+/**
+ * \brief Set the minimum and maximum supported protocol versions.
+ *
+ * The two provided versions MUST be supported by the implementation
+ * (i.e. TLS 1.0, 1.1 and 1.2), and `version_max` MUST NOT be lower
+ * than `version_min`.
+ *
+ * \param cc            SSL engine context.
+ * \param version_min   minimum supported TLS version.
+ * \param version_max   maximum supported TLS version.
+ */
+static inline void
+br_ssl_engine_set_versions(br_ssl_engine_context *cc,
+	unsigned version_min, unsigned version_max)
+{
+	cc->version_min = (uint16_t)version_min;
+	cc->version_max = (uint16_t)version_max;
+}
+
+/**
+ * \brief Set the list of cipher suites advertised by this context.
+ *
+ * The provided array is copied into the context. It is the caller
+ * responsibility to ensure that all provided suites will be supported
+ * by the context. The engine context has enough room to receive _all_
+ * suites supported by the implementation. The provided array MUST NOT
+ * contain duplicates.
+ *
+ * If the engine is for a client, the "signaling" pseudo-cipher suite
+ * `TLS_FALLBACK_SCSV` can be added at the end of the list, if the
+ * calling application is performing a voluntary downgrade (voluntary
+ * downgrades are not recommended, but if such a downgrade is done, then
+ * adding the fallback pseudo-suite is a good idea).
+ *
+ * \param cc           SSL engine context.
+ * \param suites       cipher suites.
+ * \param suites_num   number of cipher suites.
+ */
+void br_ssl_engine_set_suites(br_ssl_engine_context *cc,
+	const uint16_t *suites, size_t suites_num);
+
+/**
+ * \brief Set the X.509 engine.
+ *
+ * The caller shall ensure that the X.509 engine is properly initialised.
+ *
+ * \param cc        SSL engine context.
+ * \param x509ctx   X.509 certificate validation context.
+ */
+static inline void
+br_ssl_engine_set_x509(br_ssl_engine_context *cc, const br_x509_class **x509ctx)
+{
+	cc->x509ctx = x509ctx;
+}
+
+/**
+ * \brief Set the supported protocol names.
+ *
+ * Protocol names are part of the ALPN extension ([RFC
+ * 7301](https://tools.ietf.org/html/rfc7301)). Each protocol name is a
+ * character string, containing no more than 255 characters (256 with the
+ * terminating zero). When names are set, then:
+ *
+ *   - The client will send an ALPN extension, containing the names. If
+ *     the server responds with an ALPN extension, the client will verify
+ *     that the response contains one of its name, and report that name
+ *     through `br_ssl_engine_get_selected_protocol()`.
+ *
+ *   - The server will parse incoming ALPN extension (from clients), and
+ *     try to find a common protocol; if none is found, the connection
+ *     is aborted with a fatal alert. On match, a response ALPN extension
+ *     is sent, and name is reported through
+ *     `br_ssl_engine_get_selected_protocol()`.
+ *
+ * The provided array is linked in, and must remain valid while the
+ * connection is live.
+ *
+ * Names MUST NOT be empty. Names MUST NOT be longer than 255 characters
+ * (excluding the terminating 0).
+ *
+ * \param ctx     SSL engine context.
+ * \param names   list of protocol names (zero-terminated).
+ * \param num     number of protocol names (MUST be 1 or more).
+ */
+static inline void
+br_ssl_engine_set_protocol_names(br_ssl_engine_context *ctx,
+	const char **names, size_t num)
+{
+	ctx->protocol_names = names;
+	ctx->protocol_names_num = (uint16_t)num;
+}
+
+/**
+ * \brief Get the selected protocol.
+ *
+ * If this context was initialised with a non-empty list of protocol
+ * names, and both client and server sent ALPN extensions during the
+ * handshake, and a common name was found, then that name is returned.
+ * Otherwise, `NULL` is returned.
+ *
+ * The returned pointer is one of the pointers provided to the context
+ * with `br_ssl_engine_set_protocol_names()`.
+ *
+ * \return  the selected protocol, or `NULL`.
+ */
+static inline const char *
+br_ssl_engine_get_selected_protocol(br_ssl_engine_context *ctx)
+{
+	unsigned k;
+
+	k = ctx->selected_protocol;
+	return (k == 0 || k == 0xFFFF) ? NULL : ctx->protocol_names[k - 1];
+}
+
+/**
+ * \brief Set a hash function implementation (by ID).
+ *
+ * Hash functions set with this call will be used for SSL/TLS specific
+ * usages, not X.509 certificate validation. Only "standard" hash functions
+ * may be set (MD5, SHA-1, SHA-224, SHA-256, SHA-384, SHA-512). If `impl`
+ * is `NULL`, then the hash function support is removed, not added.
+ *
+ * \param ctx    SSL engine context.
+ * \param id     hash function identifier.
+ * \param impl   hash function implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_hash(br_ssl_engine_context *ctx,
+	int id, const br_hash_class *impl)
+{
+	br_multihash_setimpl(&ctx->mhash, id, impl);
+}
+
+/**
+ * \brief Get a hash function implementation (by ID).
+ *
+ * This function retrieves a hash function implementation which was
+ * set with `br_ssl_engine_set_hash()`.
+ *
+ * \param ctx   SSL engine context.
+ * \param id    hash function identifier.
+ * \return  the hash function implementation (or `NULL`).
+ */
+static inline const br_hash_class *
+br_ssl_engine_get_hash(br_ssl_engine_context *ctx, int id)
+{
+	return br_multihash_getimpl(&ctx->mhash, id);
+}
+
+/**
+ * \brief Set the PRF implementation (for TLS 1.0 and 1.1).
+ *
+ * This function sets (or removes, if `impl` is `NULL`) the implementation
+ * for the PRF used in TLS 1.0 and 1.1.
+ *
+ * \param cc     SSL engine context.
+ * \param impl   PRF implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_prf10(br_ssl_engine_context *cc, br_tls_prf_impl impl)
+{
+	cc->prf10 = impl;
+}
+
+/**
+ * \brief Set the PRF implementation with SHA-256 (for TLS 1.2).
+ *
+ * This function sets (or removes, if `impl` is `NULL`) the implementation
+ * for the SHA-256 variant of the PRF used in TLS 1.2.
+ *
+ * \param cc     SSL engine context.
+ * \param impl   PRF implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_prf_sha256(br_ssl_engine_context *cc, br_tls_prf_impl impl)
+{
+	cc->prf_sha256 = impl;
+}
+
+/**
+ * \brief Set the PRF implementation with SHA-384 (for TLS 1.2).
+ *
+ * This function sets (or removes, if `impl` is `NULL`) the implementation
+ * for the SHA-384 variant of the PRF used in TLS 1.2.
+ *
+ * \param cc     SSL engine context.
+ * \param impl   PRF implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_prf_sha384(br_ssl_engine_context *cc, br_tls_prf_impl impl)
+{
+	cc->prf_sha384 = impl;
+}
+
+/**
+ * \brief Set the AES/CBC implementations.
+ *
+ * \param cc         SSL engine context.
+ * \param impl_enc   AES/CBC encryption implementation (or `NULL`).
+ * \param impl_dec   AES/CBC decryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_aes_cbc(br_ssl_engine_context *cc,
+	const br_block_cbcenc_class *impl_enc,
+	const br_block_cbcdec_class *impl_dec)
+{
+	cc->iaes_cbcenc = impl_enc;
+	cc->iaes_cbcdec = impl_dec;
+}
+
+/**
+ * \brief Set the "default" AES/CBC implementations.
+ *
+ * This function configures in the engine the AES implementations that
+ * should provide best runtime performance on the local system, while
+ * still being safe (in particular, constant-time). It also sets the
+ * handlers for CBC records.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc);
+
+/**
+ * \brief Set the AES/CTR implementation.
+ *
+ * \param cc     SSL engine context.
+ * \param impl   AES/CTR encryption/decryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_aes_ctr(br_ssl_engine_context *cc,
+	const br_block_ctr_class *impl)
+{
+	cc->iaes_ctr = impl;
+}
+
+/**
+ * \brief Set the "default" implementations for AES/GCM (AES/CTR + GHASH).
+ *
+ * This function configures in the engine the AES/CTR and GHASH
+ * implementation that should provide best runtime performance on the local
+ * system, while still being safe (in particular, constant-time). It also
+ * sets the handlers for GCM records.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc);
+
+/**
+ * \brief Set the DES/CBC implementations.
+ *
+ * \param cc         SSL engine context.
+ * \param impl_enc   DES/CBC encryption implementation (or `NULL`).
+ * \param impl_dec   DES/CBC decryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_des_cbc(br_ssl_engine_context *cc,
+	const br_block_cbcenc_class *impl_enc,
+	const br_block_cbcdec_class *impl_dec)
+{
+	cc->ides_cbcenc = impl_enc;
+	cc->ides_cbcdec = impl_dec;
+}
+
+/**
+ * \brief Set the "default" DES/CBC implementations.
+ *
+ * This function configures in the engine the DES implementations that
+ * should provide best runtime performance on the local system, while
+ * still being safe (in particular, constant-time). It also sets the
+ * handlers for CBC records.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_des_cbc(br_ssl_engine_context *cc);
+
+/**
+ * \brief Set the GHASH implementation (used in GCM mode).
+ *
+ * \param cc     SSL engine context.
+ * \param impl   GHASH implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_ghash(br_ssl_engine_context *cc, br_ghash impl)
+{
+	cc->ighash = impl;
+}
+
+/**
+ * \brief Set the ChaCha20 implementation.
+ *
+ * \param cc        SSL engine context.
+ * \param ichacha   ChaCha20 implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_chacha20(br_ssl_engine_context *cc,
+	br_chacha20_run ichacha)
+{
+	cc->ichacha = ichacha;
+}
+
+/**
+ * \brief Set the Poly1305 implementation.
+ *
+ * \param cc      SSL engine context.
+ * \param ipoly   Poly1305 implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_poly1305(br_ssl_engine_context *cc,
+	br_poly1305_run ipoly)
+{
+	cc->ipoly = ipoly;
+}
+
+/**
+ * \brief Set the "default" ChaCha20 and Poly1305 implementations.
+ *
+ * This function configures in the engine the ChaCha20 and Poly1305
+ * implementations that should provide best runtime performance on the
+ * local system, while still being safe (in particular, constant-time).
+ * It also sets the handlers for ChaCha20+Poly1305 records.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_chapol(br_ssl_engine_context *cc);
+
+/**
+ * \brief Set the AES/CTR+CBC implementation.
+ *
+ * \param cc     SSL engine context.
+ * \param impl   AES/CTR+CBC encryption/decryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_aes_ctrcbc(br_ssl_engine_context *cc,
+	const br_block_ctrcbc_class *impl)
+{
+	cc->iaes_ctrcbc = impl;
+}
+
+/**
+ * \brief Set the "default" implementations for AES/CCM.
+ *
+ * This function configures in the engine the AES/CTR+CBC
+ * implementation that should provide best runtime performance on the local
+ * system, while still being safe (in particular, constant-time). It also
+ * sets the handlers for CCM records.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc);
+
+/**
+ * \brief Set the record encryption and decryption engines for CBC + HMAC.
+ *
+ * \param cc         SSL engine context.
+ * \param impl_in    record CBC decryption implementation (or `NULL`).
+ * \param impl_out   record CBC encryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_cbc(br_ssl_engine_context *cc,
+	const br_sslrec_in_cbc_class *impl_in,
+	const br_sslrec_out_cbc_class *impl_out)
+{
+	cc->icbc_in = impl_in;
+	cc->icbc_out = impl_out;
+}
+
+/**
+ * \brief Set the record encryption and decryption engines for GCM.
+ *
+ * \param cc         SSL engine context.
+ * \param impl_in    record GCM decryption implementation (or `NULL`).
+ * \param impl_out   record GCM encryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_gcm(br_ssl_engine_context *cc,
+	const br_sslrec_in_gcm_class *impl_in,
+	const br_sslrec_out_gcm_class *impl_out)
+{
+	cc->igcm_in = impl_in;
+	cc->igcm_out = impl_out;
+}
+
+/**
+ * \brief Set the record encryption and decryption engines for CCM.
+ *
+ * \param cc         SSL engine context.
+ * \param impl_in    record CCM decryption implementation (or `NULL`).
+ * \param impl_out   record CCM encryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_ccm(br_ssl_engine_context *cc,
+	const br_sslrec_in_ccm_class *impl_in,
+	const br_sslrec_out_ccm_class *impl_out)
+{
+	cc->iccm_in = impl_in;
+	cc->iccm_out = impl_out;
+}
+
+/**
+ * \brief Set the record encryption and decryption engines for
+ * ChaCha20+Poly1305.
+ *
+ * \param cc         SSL engine context.
+ * \param impl_in    record ChaCha20 decryption implementation (or `NULL`).
+ * \param impl_out   record ChaCha20 encryption implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_chapol(br_ssl_engine_context *cc,
+	const br_sslrec_in_chapol_class *impl_in,
+	const br_sslrec_out_chapol_class *impl_out)
+{
+	cc->ichapol_in = impl_in;
+	cc->ichapol_out = impl_out;
+}
+
+/**
+ * \brief Set the EC implementation.
+ *
+ * The elliptic curve implementation will be used for ECDH and ECDHE
+ * cipher suites, and for ECDSA support.
+ *
+ * \param cc    SSL engine context.
+ * \param iec   EC implementation (or `NULL`).
+ */
+static inline void
+br_ssl_engine_set_ec(br_ssl_engine_context *cc, const br_ec_impl *iec)
+{
+	cc->iec = iec;
+}
+
+/**
+ * \brief Set the "default" EC implementation.
+ *
+ * This function sets the elliptic curve implementation for ECDH and
+ * ECDHE cipher suites, and for ECDSA support. It selects the fastest
+ * implementation on the current system.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_ec(br_ssl_engine_context *cc);
+
+/**
+ * \brief Get the EC implementation configured in the provided engine.
+ *
+ * \param cc   SSL engine context.
+ * \return  the EC implementation.
+ */
+static inline const br_ec_impl *
+br_ssl_engine_get_ec(br_ssl_engine_context *cc)
+{
+	return cc->iec;
+}
+
+/**
+ * \brief Set the RSA signature verification implementation.
+ *
+ * On the client, this is used to verify the server's signature on its
+ * ServerKeyExchange message (for ECDHE_RSA cipher suites). On the server,
+ * this is used to verify the client's CertificateVerify message (if a
+ * client certificate is requested, and that certificate contains a RSA key).
+ *
+ * \param cc         SSL engine context.
+ * \param irsavrfy   RSA signature verification implementation.
+ */
+static inline void
+br_ssl_engine_set_rsavrfy(br_ssl_engine_context *cc, br_rsa_pkcs1_vrfy irsavrfy)
+{
+	cc->irsavrfy = irsavrfy;
+}
+
+/**
+ * \brief Set the "default" RSA implementation (signature verification).
+ *
+ * This function sets the RSA implementation (signature verification)
+ * to the fastest implementation available on the current platform.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_rsavrfy(br_ssl_engine_context *cc);
+
+/**
+ * \brief Get the RSA implementation (signature verification) configured
+ * in the provided engine.
+ *
+ * \param cc   SSL engine context.
+ * \return  the RSA signature verification implementation.
+ */
+static inline br_rsa_pkcs1_vrfy
+br_ssl_engine_get_rsavrfy(br_ssl_engine_context *cc)
+{
+	return cc->irsavrfy;
+}
+
+/*
+ * \brief Set the ECDSA implementation (signature verification).
+ *
+ * On the client, this is used to verify the server's signature on its
+ * ServerKeyExchange message (for ECDHE_ECDSA cipher suites). On the server,
+ * this is used to verify the client's CertificateVerify message (if a
+ * client certificate is requested, that certificate contains an EC key,
+ * and full-static ECDH is not used).
+ *
+ * The ECDSA implementation will use the EC core implementation configured
+ * in the engine context.
+ *
+ * \param cc       client context.
+ * \param iecdsa   ECDSA verification implementation.
+ */
+static inline void
+br_ssl_engine_set_ecdsa(br_ssl_engine_context *cc, br_ecdsa_vrfy iecdsa)
+{
+	cc->iecdsa = iecdsa;
+}
+
+/**
+ * \brief Set the "default" ECDSA implementation (signature verification).
+ *
+ * This function sets the ECDSA implementation (signature verification)
+ * to the fastest implementation available on the current platform. This
+ * call also sets the elliptic curve implementation itself, there again
+ * to the fastest EC implementation available.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_set_default_ecdsa(br_ssl_engine_context *cc);
+
+/**
+ * \brief Get the ECDSA implementation (signature verification) configured
+ * in the provided engine.
+ *
+ * \param cc   SSL engine context.
+ * \return  the ECDSA signature verification implementation.
+ */
+static inline br_ecdsa_vrfy
+br_ssl_engine_get_ecdsa(br_ssl_engine_context *cc)
+{
+	return cc->iecdsa;
+}
+
+/**
+ * \brief Set the I/O buffer for the SSL engine.
+ *
+ * Once this call has been made, `br_ssl_client_reset()` or
+ * `br_ssl_server_reset()` MUST be called before using the context.
+ *
+ * The provided buffer will be used as long as the engine context is
+ * used. The caller is responsible for keeping it available.
+ *
+ * If `bidi` is 0, then the engine will operate in half-duplex mode
+ * (it won't be able to send data while there is unprocessed incoming
+ * data in the buffer, and it won't be able to receive data while there
+ * is unsent data in the buffer). The optimal buffer size in half-duplex
+ * mode is `BR_SSL_BUFSIZE_MONO`; if the buffer is larger, then extra
+ * bytes are ignored. If the buffer is smaller, then this limits the
+ * capacity of the engine to support all allowed record sizes.
+ *
+ * If `bidi` is 1, then the engine will split the buffer into two
+ * parts, for separate handling of outgoing and incoming data. This
+ * enables full-duplex processing, but requires more RAM. The optimal
+ * buffer size in full-duplex mode is `BR_SSL_BUFSIZE_BIDI`; if the
+ * buffer is larger, then extra bytes are ignored. If the buffer is
+ * smaller, then the split will favour the incoming part, so that
+ * interoperability is maximised.
+ *
+ * \param cc          SSL engine context
+ * \param iobuf       I/O buffer.
+ * \param iobuf_len   I/O buffer length (in bytes).
+ * \param bidi        non-zero for full-duplex mode.
+ */
+void br_ssl_engine_set_buffer(br_ssl_engine_context *cc,
+	void *iobuf, size_t iobuf_len, int bidi);
+
+/**
+ * \brief Set the I/O buffers for the SSL engine.
+ *
+ * Once this call has been made, `br_ssl_client_reset()` or
+ * `br_ssl_server_reset()` MUST be called before using the context.
+ *
+ * This function is similar to `br_ssl_engine_set_buffer()`, except
+ * that it enforces full-duplex mode, and the two I/O buffers are
+ * provided as separate chunks.
+ *
+ * The macros `BR_SSL_BUFSIZE_INPUT` and `BR_SSL_BUFSIZE_OUTPUT`
+ * evaluate to the optimal (maximum) sizes for the input and output
+ * buffer, respectively.
+ *
+ * \param cc         SSL engine context
+ * \param ibuf       input buffer.
+ * \param ibuf_len   input buffer length (in bytes).
+ * \param obuf       output buffer.
+ * \param obuf_len   output buffer length (in bytes).
+ */
+void br_ssl_engine_set_buffers_bidi(br_ssl_engine_context *cc,
+	void *ibuf, size_t ibuf_len, void *obuf, size_t obuf_len);
+
+/**
+ * \brief Inject some "initial entropy" in the context.
+ *
+ * This entropy will be added to what can be obtained from the
+ * underlying operating system, if that OS is supported.
+ *
+ * This function may be called several times; all injected entropy chunks
+ * are cumulatively mixed.
+ *
+ * If entropy gathering from the OS is supported and compiled in, then this
+ * step is optional. Otherwise, it is mandatory to inject randomness, and
+ * the caller MUST take care to push (as one or several successive calls)
+ * enough entropy to achieve cryptographic resistance (at least 80 bits,
+ * preferably 128 or more). The engine will report an error if no entropy
+ * was provided and none can be obtained from the OS.
+ *
+ * Take care that this function cannot assess the cryptographic quality of
+ * the provided bytes.
+ *
+ * In all generality, "entropy" must here be considered to mean "that
+ * which the attacker cannot predict". If your OS/architecture does not
+ * have a suitable source of randomness, then you can make do with the
+ * combination of a large enough secret value (possibly a copy of an
+ * asymmetric private key that you also store on the system) AND a
+ * non-repeating value (e.g. current time, provided that the local clock
+ * cannot be reset or altered by the attacker).
+ *
+ * \param cc     SSL engine context.
+ * \param data   extra entropy to inject.
+ * \param len    length of the extra data (in bytes).
+ */
+void br_ssl_engine_inject_entropy(br_ssl_engine_context *cc,
+	const void *data, size_t len);
+
+/**
+ * \brief Get the "server name" in this engine.
+ *
+ * For clients, this is the name provided with `br_ssl_client_reset()`;
+ * for servers, this is the name received from the client as part of the
+ * ClientHello message. If there is no such name (e.g. the client did
+ * not send an SNI extension) then the returned string is empty
+ * (returned pointer points to a byte of value 0).
+ *
+ * The returned pointer refers to a buffer inside the context, which may
+ * be overwritten as part of normal SSL activity (even within the same
+ * connection, if a renegotiation occurs).
+ *
+ * \param cc   SSL engine context.
+ * \return  the server name (possibly empty).
+ */
+static inline const char *
+br_ssl_engine_get_server_name(const br_ssl_engine_context *cc)
+{
+	return cc->server_name;
+}
+
+/**
+ * \brief Get the protocol version.
+ *
+ * This function returns the protocol version that is used by the
+ * engine. That value is set after sending (for a server) or receiving
+ * (for a client) the ServerHello message.
+ *
+ * \param cc   SSL engine context.
+ * \return  the protocol version.
+ */
+static inline unsigned
+br_ssl_engine_get_version(const br_ssl_engine_context *cc)
+{
+	return cc->session.version;
+}
+
+/**
+ * \brief Get a copy of the session parameters.
+ *
+ * The session parameters are filled during the handshake, so this
+ * function shall not be called before completion of the handshake.
+ * The initial handshake is completed when the context first allows
+ * application data to be injected.
+ *
+ * This function copies the current session parameters into the provided
+ * structure. Beware that the session parameters include the master
+ * secret, which is sensitive data, to handle with great care.
+ *
+ * \param cc   SSL engine context.
+ * \param pp   destination structure for the session parameters.
+ */
+static inline void
+br_ssl_engine_get_session_parameters(const br_ssl_engine_context *cc,
+	br_ssl_session_parameters *pp)
+{
+	memcpy(pp, &cc->session, sizeof *pp);
+}
+
+/**
+ * \brief Set the session parameters to the provided values.
+ *
+ * This function is meant to be used in the client, before doing a new
+ * handshake; a session resumption will be attempted with these
+ * parameters. In the server, this function has no effect.
+ *
+ * \param cc   SSL engine context.
+ * \param pp   source structure for the session parameters.
+ */
+static inline void
+br_ssl_engine_set_session_parameters(br_ssl_engine_context *cc,
+	const br_ssl_session_parameters *pp)
+{
+	memcpy(&cc->session, pp, sizeof *pp);
+}
+
+/**
+ * \brief Get identifier for the curve used for key exchange.
+ *
+ * If the cipher suite uses ECDHE, then this function returns the
+ * identifier for the curve used for transient parameters. This is
+ * defined during the course of the handshake, when the ServerKeyExchange
+ * is sent (on the server) or received (on the client). If the
+ * cipher suite does not use ECDHE (e.g. static ECDH, or RSA key
+ * exchange), then this value is indeterminate.
+ *
+ * @param cc   SSL engine context.
+ * @return  the ECDHE curve identifier.
+ */
+static inline int
+br_ssl_engine_get_ecdhe_curve(br_ssl_engine_context *cc)
+{
+	return cc->ecdhe_curve;
+}
+
+/**
+ * \brief Get the current engine state.
+ *
+ * An SSL engine (client or server) has, at any time, a state which is
+ * the combination of zero, one or more of these flags:
+ *
+ *   - `BR_SSL_CLOSED`
+ *
+ *     Engine is finished, no more I/O (until next reset).
+ *
+ *   - `BR_SSL_SENDREC`
+ *
+ *     Engine has some bytes to send to the peer.
+ *
+ *   - `BR_SSL_RECVREC`
+ *
+ *     Engine expects some bytes from the peer.
+ *
+ *   - `BR_SSL_SENDAPP`
+ *
+ *     Engine may receive application data to send (or flush).
+ *
+ *   - `BR_SSL_RECVAPP`
+ *
+ *     Engine has obtained some application data from the peer,
+ *     that should be read by the caller.
+ *
+ * If no flag at all is set (state value is 0), then the engine is not
+ * fully initialised yet.
+ *
+ * The `BR_SSL_CLOSED` flag is exclusive; when it is set, no other flag
+ * is set. To distinguish between a normal closure and an error, use
+ * `br_ssl_engine_last_error()`.
+ *
+ * Generally speaking, `BR_SSL_SENDREC` and `BR_SSL_SENDAPP` are mutually
+ * exclusive: the input buffer, at any point, either accumulates
+ * plaintext data, or contains an assembled record that is being sent.
+ * Similarly, `BR_SSL_RECVREC` and `BR_SSL_RECVAPP` are mutually exclusive.
+ * This may change in a future library version.
+ *
+ * \param cc   SSL engine context.
+ * \return  the current engine state.
+ */
+unsigned br_ssl_engine_current_state(const br_ssl_engine_context *cc);
+
+/** \brief SSL engine state: closed or failed. */
+#define BR_SSL_CLOSED    0x0001
+/** \brief SSL engine state: record data is ready to be sent to the peer. */
+#define BR_SSL_SENDREC   0x0002
+/** \brief SSL engine state: engine may receive records from the peer. */
+#define BR_SSL_RECVREC   0x0004
+/** \brief SSL engine state: engine may accept application data to send. */
+#define BR_SSL_SENDAPP   0x0008
+/** \brief SSL engine state: engine has received application data. */
+#define BR_SSL_RECVAPP   0x0010
+
+/**
+ * \brief Get the engine error indicator.
+ *
+ * The error indicator is `BR_ERR_OK` (0) if no error was encountered
+ * since the last call to `br_ssl_client_reset()` or
+ * `br_ssl_server_reset()`. Other status values are "sticky": they
+ * remain set, and prevent all I/O activity, until cleared. Only the
+ * reset calls clear the error indicator.
+ *
+ * \param cc   SSL engine context.
+ * \return  0, or a non-zero error code.
+ */
+static inline int
+br_ssl_engine_last_error(const br_ssl_engine_context *cc)
+{
+	return cc->err;
+}
+
+/*
+ * There are four I/O operations, each identified by a symbolic name:
+ *
+ *   sendapp   inject application data in the engine
+ *   recvapp   retrieving application data from the engine
+ *   sendrec   sending records on the transport medium
+ *   recvrec   receiving records from the transport medium
+ *
+ * Terminology works thus: in a layered model where the SSL engine sits
+ * between the application and the network, "send" designates operations
+ * where bytes flow from application to network, and "recv" for the
+ * reverse operation. Application data (the plaintext that is to be
+ * conveyed through SSL) is "app", while encrypted records are "rec".
+ * Note that from the SSL engine point of view, "sendapp" and "recvrec"
+ * designate bytes that enter the engine ("inject" operation), while
+ * "recvapp" and "sendrec" designate bytes that exit the engine
+ * ("extract" operation).
+ *
+ * For the operation 'xxx', two functions are defined:
+ *
+ *   br_ssl_engine_xxx_buf
+ *      Returns a pointer and length to the buffer to use for that
+ *      operation. '*len' is set to the number of bytes that may be read
+ *      from the buffer (extract operation) or written to the buffer
+ *      (inject operation). If no byte may be exchanged for that operation
+ *      at that point, then '*len' is set to zero, and NULL is returned.
+ *      The engine state is unmodified by this call.
+ *
+ *   br_ssl_engine_xxx_ack
+ *      Informs the engine that 'len' bytes have been read from the buffer
+ *      (extract operation) or written to the buffer (inject operation).
+ *      The 'len' value MUST NOT be zero. The 'len' value MUST NOT exceed
+ *      that which was obtained from a preceding br_ssl_engine_xxx_buf()
+ *      call.
+ */
+
+/**
+ * \brief Get buffer for application data to send.
+ *
+ * If the engine is ready to accept application data to send to the
+ * peer, then this call returns a pointer to the buffer where such
+ * data shall be written, and its length is written in `*len`.
+ * Otherwise, `*len` is set to 0 and `NULL` is returned.
+ *
+ * \param cc    SSL engine context.
+ * \param len   receives the application data output buffer length, or 0.
+ * \return  the application data output buffer, or `NULL`.
+ */
+unsigned char *br_ssl_engine_sendapp_buf(
+	const br_ssl_engine_context *cc, size_t *len);
+
+/**
+ * \brief Inform the engine of some new application data.
+ *
+ * After writing `len` bytes in the buffer returned by
+ * `br_ssl_engine_sendapp_buf()`, the application shall call this
+ * function to trigger any relevant processing. The `len` parameter
+ * MUST NOT be 0, and MUST NOT exceed the value obtained in the
+ * `br_ssl_engine_sendapp_buf()` call.
+ *
+ * \param cc    SSL engine context.
+ * \param len   number of bytes pushed (not zero).
+ */
+void br_ssl_engine_sendapp_ack(br_ssl_engine_context *cc, size_t len);
+
+/**
+ * \brief Get buffer for received application data.
+ *
+ * If the engine has received application data from the peer, then this
+ * call returns a pointer to the buffer from where such data shall be
+ * read, and its length is written in `*len`. Otherwise, `*len` is set
+ * to 0 and `NULL` is returned.
+ *
+ * \param cc    SSL engine context.
+ * \param len   receives the application data input buffer length, or 0.
+ * \return  the application data input buffer, or `NULL`.
+ */
+unsigned char *br_ssl_engine_recvapp_buf(
+	const br_ssl_engine_context *cc, size_t *len);
+
+/**
+ * \brief Acknowledge some received application data.
+ *
+ * After reading `len` bytes from the buffer returned by
+ * `br_ssl_engine_recvapp_buf()`, the application shall call this
+ * function to trigger any relevant processing. The `len` parameter
+ * MUST NOT be 0, and MUST NOT exceed the value obtained in the
+ * `br_ssl_engine_recvapp_buf()` call.
+ *
+ * \param cc    SSL engine context.
+ * \param len   number of bytes read (not zero).
+ */
+void br_ssl_engine_recvapp_ack(br_ssl_engine_context *cc, size_t len);
+
+/**
+ * \brief Get buffer for record data to send.
+ *
+ * If the engine has prepared some records to send to the peer, then this
+ * call returns a pointer to the buffer from where such data shall be
+ * read, and its length is written in `*len`. Otherwise, `*len` is set
+ * to 0 and `NULL` is returned.
+ *
+ * \param cc    SSL engine context.
+ * \param len   receives the record data output buffer length, or 0.
+ * \return  the record data output buffer, or `NULL`.
+ */
+unsigned char *br_ssl_engine_sendrec_buf(
+	const br_ssl_engine_context *cc, size_t *len);
+
+/**
+ * \brief Acknowledge some sent record data.
+ *
+ * After reading `len` bytes from the buffer returned by
+ * `br_ssl_engine_sendrec_buf()`, the application shall call this
+ * function to trigger any relevant processing. The `len` parameter
+ * MUST NOT be 0, and MUST NOT exceed the value obtained in the
+ * `br_ssl_engine_sendrec_buf()` call.
+ *
+ * \param cc    SSL engine context.
+ * \param len   number of bytes read (not zero).
+ */
+void br_ssl_engine_sendrec_ack(br_ssl_engine_context *cc, size_t len);
+
+/**
+ * \brief Get buffer for incoming records.
+ *
+ * If the engine is ready to accept records from the peer, then this
+ * call returns a pointer to the buffer where such data shall be
+ * written, and its length is written in `*len`. Otherwise, `*len` is
+ * set to 0 and `NULL` is returned.
+ *
+ * \param cc    SSL engine context.
+ * \param len   receives the record data input buffer length, or 0.
+ * \return  the record data input buffer, or `NULL`.
+ */
+unsigned char *br_ssl_engine_recvrec_buf(
+	const br_ssl_engine_context *cc, size_t *len);
+
+/**
+ * \brief Inform the engine of some new record data.
+ *
+ * After writing `len` bytes in the buffer returned by
+ * `br_ssl_engine_recvrec_buf()`, the application shall call this
+ * function to trigger any relevant processing. The `len` parameter
+ * MUST NOT be 0, and MUST NOT exceed the value obtained in the
+ * `br_ssl_engine_recvrec_buf()` call.
+ *
+ * \param cc    SSL engine context.
+ * \param len   number of bytes pushed (not zero).
+ */
+void br_ssl_engine_recvrec_ack(br_ssl_engine_context *cc, size_t len);
+
+/**
+ * \brief Flush buffered application data.
+ *
+ * If some application data has been buffered in the engine, then wrap
+ * it into a record and mark it for sending. If no application data has
+ * been buffered but the engine would be ready to accept some, AND the
+ * `force` parameter is non-zero, then an empty record is assembled and
+ * marked for sending. In all other cases, this function does nothing.
+ *
+ * Empty records are technically legal, but not all existing SSL/TLS
+ * implementations support them. Empty records can be useful as a
+ * transparent "keep-alive" mechanism to maintain some low-level
+ * network activity.
+ *
+ * \param cc      SSL engine context.
+ * \param force   non-zero to force sending an empty record.
+ */
+void br_ssl_engine_flush(br_ssl_engine_context *cc, int force);
+
+/**
+ * \brief Initiate a closure.
+ *
+ * If, at that point, the context is open and in ready state, then a
+ * `close_notify` alert is assembled and marked for sending; this
+ * triggers the closure protocol. Otherwise, no such alert is assembled.
+ *
+ * \param cc   SSL engine context.
+ */
+void br_ssl_engine_close(br_ssl_engine_context *cc);
+
+/**
+ * \brief Initiate a renegotiation.
+ *
+ * If the engine is failed or closed, or if the peer is known not to
+ * support secure renegotiation (RFC 5746), or if renegotiations have
+ * been disabled with the `BR_OPT_NO_RENEGOTIATION` flag, or if there
+ * is buffered incoming application data, then this function returns 0
+ * and nothing else happens.
+ *
+ * Otherwise, this function returns 1, and a renegotiation attempt is
+ * triggered (if a handshake is already ongoing at that point, then
+ * no new handshake is triggered).
+ *
+ * \param cc   SSL engine context.
+ * \return  1 on success, 0 on error.
+ */
+int br_ssl_engine_renegotiate(br_ssl_engine_context *cc);
+
+/**
+ * \brief Export key material from a connected SSL engine (RFC 5705).
+ *
+ * This calls compute a secret key of arbitrary length from the master
+ * secret of a connected SSL engine. If the provided context is not
+ * currently in "application data" state (initial handshake is not
+ * finished, another handshake is ongoing, or the connection failed or
+ * was closed), then this function returns 0. Otherwise, a secret key of
+ * length `len` bytes is computed and written in the buffer pointed to
+ * by `dst`, and 1 is returned.
+ *
+ * The computed key follows the specification described in RFC 5705.
+ * That RFC includes two key computations, with and without a "context
+ * value". If `context` is `NULL`, then the variant without context is
+ * used; otherwise, the `context_len` bytes located at the address
+ * pointed to by `context` are used in the computation. Note that it
+ * is possible to have a "with context" key with a context length of
+ * zero bytes, by setting `context` to a non-`NULL` value but
+ * `context_len` to 0.
+ *
+ * When context bytes are used, the context length MUST NOT exceed
+ * 65535 bytes.
+ *
+ * \param cc            SSL engine context.
+ * \param dst           destination buffer for exported key.
+ * \param len           exported key length (in bytes).
+ * \param label         disambiguation label.
+ * \param context       context value (or `NULL`).
+ * \param context_len   context length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+int br_ssl_key_export(br_ssl_engine_context *cc,
+	void *dst, size_t len, const char *label,
+	const void *context, size_t context_len);
+
+/*
+ * Pre-declaration for the SSL client context.
+ */
+typedef struct br_ssl_client_context_ br_ssl_client_context;
+
+/**
+ * \brief Type for the client certificate, if requested by the server.
+ */
+typedef struct {
+	/**
+	 * \brief Authentication type.
+	 *
+	 * This is either `BR_AUTH_RSA` (RSA signature), `BR_AUTH_ECDSA`
+	 * (ECDSA signature), or `BR_AUTH_ECDH` (static ECDH key exchange).
+	 */
+	int auth_type;
+
+	/**
+	 * \brief Hash function for computing the CertificateVerify.
+	 *
+	 * This is the symbolic identifier for the hash function that
+	 * will be used to produce the hash of handshake messages, to
+	 * be signed into the CertificateVerify. For full static ECDH
+	 * (client and server certificates are both EC in the same
+	 * curve, and static ECDH is used), this value is set to -1.
+	 *
+	 * Take care that with TLS 1.0 and 1.1, that value MUST match
+	 * the protocol requirements: value must be 0 (MD5+SHA-1) for
+	 * a RSA signature, or 2 (SHA-1) for an ECDSA signature. Only
+	 * TLS 1.2 allows for other hash functions.
+	 */
+	int hash_id;
+
+	/**
+	 * \brief Certificate chain to send to the server.
+	 *
+	 * This is an array of `br_x509_certificate` objects, each
+	 * normally containing a DER-encoded certificate. The client
+	 * code does not try to decode these elements. If there is no
+	 * chain to send to the server, then this pointer shall be
+	 * set to `NULL`.
+	 */
+	const br_x509_certificate *chain;
+
+	/**
+	 * \brief Certificate chain length (number of certificates).
+	 *
+	 * If there is no chain to send to the server, then this value
+	 * shall be set to 0.
+	 */
+	size_t chain_len;
+
+} br_ssl_client_certificate;
+
+/*
+ * Note: the constants below for signatures match the TLS constants.
+ */
+
+/** \brief Client authentication type: static ECDH. */
+#define BR_AUTH_ECDH    0
+/** \brief Client authentication type: RSA signature. */
+#define BR_AUTH_RSA     1
+/** \brief Client authentication type: ECDSA signature. */
+#define BR_AUTH_ECDSA   3
+
+/**
+ * \brief Class type for a certificate handler (client side).
+ *
+ * A certificate handler selects a client certificate chain to send to
+ * the server, upon explicit request from that server. It receives
+ * the list of trust anchor DN from the server, and supported types
+ * of certificates and signatures, and returns the chain to use. It
+ * is also invoked to perform the corresponding private key operation
+ * (a signature, or an ECDH computation).
+ *
+ * The SSL client engine will first push the trust anchor DN with
+ * `start_name_list()`, `start_name()`, `append_name()`, `end_name()`
+ * and `end_name_list()`. Then it will call `choose()`, to select the
+ * actual chain (and signature/hash algorithms). Finally, it will call
+ * either `do_sign()` or `do_keyx()`, depending on the algorithm choices.
+ */
+typedef struct br_ssl_client_certificate_class_ br_ssl_client_certificate_class;
+struct br_ssl_client_certificate_class_ {
+	/**
+	 * \brief Context size (in bytes).
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Begin reception of a list of trust anchor names. This
+	 * is called while parsing the incoming CertificateRequest.
+	 *
+	 * \param pctx   certificate handler context.
+	 */
+	void (*start_name_list)(const br_ssl_client_certificate_class **pctx);
+
+	/**
+	 * \brief Begin reception of a new trust anchor name.
+	 *
+	 * The total encoded name length is provided; it is less than
+	 * 65535 bytes.
+	 *
+	 * \param pctx   certificate handler context.
+	 * \param len    encoded name length (in bytes).
+	 */
+	void (*start_name)(const br_ssl_client_certificate_class **pctx,
+		size_t len);
+
+	/**
+	 * \brief Receive some more bytes for the current trust anchor name.
+	 *
+	 * The provided reference (`data`) points to a transient buffer
+	 * they may be reused as soon as this function returns. The chunk
+	 * length (`len`) is never zero.
+	 *
+	 * \param pctx   certificate handler context.
+	 * \param data   anchor name chunk.
+	 * \param len    anchor name chunk length (in bytes).
+	 */
+	void (*append_name)(const br_ssl_client_certificate_class **pctx,
+		const unsigned char *data, size_t len);
+
+	/**
+	 * \brief End current trust anchor name.
+	 *
+	 * This function is called when all the encoded anchor name data
+	 * has been provided.
+	 *
+	 * \param pctx   certificate handler context.
+	 */
+	void (*end_name)(const br_ssl_client_certificate_class **pctx);
+
+	/**
+	 * \brief End list of trust anchor names.
+	 *
+	 * This function is called when all the anchor names in the
+	 * CertificateRequest message have been obtained.
+	 *
+	 * \param pctx   certificate handler context.
+	 */
+	void (*end_name_list)(const br_ssl_client_certificate_class **pctx);
+
+	/**
+	 * \brief Select client certificate and algorithms.
+	 *
+	 * This callback function shall fill the provided `choices`
+	 * structure with the selected algorithms and certificate chain.
+	 * The `hash_id`, `chain` and `chain_len` fields must be set. If
+	 * the client cannot or does not wish to send a certificate,
+	 * then it shall set `chain` to `NULL` and `chain_len` to 0.
+	 *
+	 * The `auth_types` parameter describes the authentication types,
+	 * signature algorithms and hash functions that are supported by
+	 * both the client context and the server, and compatible with
+	 * the current protocol version. This is a bit field with the
+	 * following contents:
+	 *
+	 *   - If RSA signatures with hash function x are supported, then
+	 *     bit x is set.
+	 *
+	 *   - If ECDSA signatures with hash function x are supported,
+	 *     then bit 8+x is set.
+	 *
+	 *   - If static ECDH is supported, with a RSA-signed certificate,
+	 *     then bit 16 is set.
+	 *
+	 *   - If static ECDH is supported, with an ECDSA-signed certificate,
+	 *     then bit 17 is set.
+	 *
+	 * Notes:
+	 *
+	 *   - When using TLS 1.0 or 1.1, the hash function for RSA
+	 *     signatures is always the special MD5+SHA-1 (id 0), and the
+	 *     hash function for ECDSA signatures is always SHA-1 (id 2).
+	 *
+	 *   - When using TLS 1.2, the list of hash functions is trimmed
+	 *     down to include only hash functions that the client context
+	 *     can support. The actual server list can be obtained with
+	 *     `br_ssl_client_get_server_hashes()`; that list may be used
+	 *     to select the certificate chain to send to the server.
+	 *
+	 * \param pctx         certificate handler context.
+	 * \param cc           SSL client context.
+	 * \param auth_types   supported authentication types and algorithms.
+	 * \param choices      destination structure for the policy choices.
+	 */
+	void (*choose)(const br_ssl_client_certificate_class **pctx,
+		const br_ssl_client_context *cc, uint32_t auth_types,
+		br_ssl_client_certificate *choices);
+
+	/**
+	 * \brief Perform key exchange (client part).
+	 *
+	 * This callback is invoked in case of a full static ECDH key
+	 * exchange:
+	 *
+	 *   - the cipher suite uses `ECDH_RSA` or `ECDH_ECDSA`;
+	 *
+	 *   - the server requests a client certificate;
+	 *
+	 *   - the client has, and sends, a client certificate that
+	 *     uses an EC key in the same curve as the server's key,
+	 *     and chooses static ECDH (the `hash_id` field in the choice
+	 *     structure was set to -1).
+	 *
+	 * In that situation, this callback is invoked to compute the
+	 * client-side ECDH: the provided `data` (of length `*len` bytes)
+	 * is the server's public key point (as decoded from its
+	 * certificate), and the client shall multiply that point with
+	 * its own private key, and write back the X coordinate of the
+	 * resulting point in the same buffer, starting at offset 0.
+	 * The `*len` value shall be modified to designate the actual
+	 * length of the X coordinate.
+	 *
+	 * The callback must uphold the following:
+	 *
+	 *   - If the input array does not have the proper length for
+	 *     an encoded curve point, then an error (0) shall be reported.
+	 *
+	 *   - If the input array has the proper length, then processing
+	 *     MUST be constant-time, even if the data is not a valid
+	 *     encoded point.
+	 *
+	 *   - This callback MUST check that the input point is valid.
+	 *
+	 * Returned value is 1 on success, 0 on error.
+	 *
+	 * \param pctx   certificate handler context.
+	 * \param data   server public key point.
+	 * \param len    public key point length / X coordinate length.
+	 * \return  1 on success, 0 on error.
+	 */
+	uint32_t (*do_keyx)(const br_ssl_client_certificate_class **pctx,
+		unsigned char *data, size_t *len);
+
+	/**
+	 * \brief Perform a signature (client authentication).
+	 *
+	 * This callback is invoked when a client certificate was sent,
+	 * and static ECDH is not used. It shall compute a signature,
+	 * using the client's private key, over the provided hash value
+	 * (which is the hash of all previous handshake messages).
+	 *
+	 * On input, the hash value to sign is in `data`, of size
+	 * `hv_len`; the involved hash function is identified by
+	 * `hash_id`. The signature shall be computed and written
+	 * back into `data`; the total size of that buffer is `len`
+	 * bytes.
+	 *
+	 * This callback shall verify that the signature length does not
+	 * exceed `len` bytes, and abstain from writing the signature if
+	 * it does not fit.
+	 *
+	 * For RSA signatures, the `hash_id` may be 0, in which case
+	 * this is the special header-less signature specified in TLS 1.0
+	 * and 1.1, with a 36-byte hash value. Otherwise, normal PKCS#1
+	 * v1.5 signatures shall be computed.
+	 *
+	 * For ECDSA signatures, the signature value shall use the ASN.1
+	 * based encoding.
+	 *
+	 * Returned value is the signature length (in bytes), or 0 on error.
+	 *
+	 * \param pctx      certificate handler context.
+	 * \param hash_id   hash function identifier.
+	 * \param hv_len    hash value length (in bytes).
+	 * \param data      input/output buffer (hash value, then signature).
+	 * \param len       total buffer length (in bytes).
+	 * \return  signature length (in bytes) on success, or 0 on error.
+	 */
+	size_t (*do_sign)(const br_ssl_client_certificate_class **pctx,
+		int hash_id, size_t hv_len, unsigned char *data, size_t len);
+};
+
+/**
+ * \brief A single-chain RSA client certificate handler.
+ *
+ * This handler uses a single certificate chain, with a RSA
+ * signature. The list of trust anchor DN is ignored.
+ *
+ * Apart from the first field (vtable pointer), its contents are
+ * opaque and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_ssl_client_certificate_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	const br_x509_certificate *chain;
+	size_t chain_len;
+	const br_rsa_private_key *sk;
+	br_rsa_pkcs1_sign irsasign;
+#endif
+} br_ssl_client_certificate_rsa_context;
+
+/**
+ * \brief A single-chain EC client certificate handler.
+ *
+ * This handler uses a single certificate chain, with a RSA
+ * signature. The list of trust anchor DN is ignored.
+ *
+ * This handler may support both static ECDH, and ECDSA signatures
+ * (either usage may be selectively disabled).
+ *
+ * Apart from the first field (vtable pointer), its contents are
+ * opaque and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_ssl_client_certificate_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	const br_x509_certificate *chain;
+	size_t chain_len;
+	const br_ec_private_key *sk;
+	unsigned allowed_usages;
+	unsigned issuer_key_type;
+	const br_multihash_context *mhash;
+	const br_ec_impl *iec;
+	br_ecdsa_sign iecdsa;
+#endif
+} br_ssl_client_certificate_ec_context;
+
+/**
+ * \brief Context structure for a SSL client.
+ *
+ * The first field (called `eng`) is the SSL engine; all functions that
+ * work on a `br_ssl_engine_context` structure shall take as parameter
+ * a pointer to that field. The other structure fields are opaque and
+ * must not be accessed directly.
+ */
+struct br_ssl_client_context_ {
+	/**
+	 * \brief The encapsulated engine context.
+	 */
+	br_ssl_engine_context eng;
+
+#ifndef BR_DOXYGEN_IGNORE
+	/*
+	 * Minimum ClientHello length; padding with an extension (RFC
+	 * 7685) is added if necessary to match at least that length.
+	 * Such padding is nominally unnecessary, but it has been used
+	 * to work around some server implementation bugs.
+	 */
+	uint16_t min_clienthello_len;
+
+	/*
+	 * Bit field for algoithms (hash + signature) supported by the
+	 * server when requesting a client certificate.
+	 */
+	uint32_t hashes;
+
+	/*
+	 * Server's public key curve.
+	 */
+	int server_curve;
+
+	/*
+	 * Context for certificate handler.
+	 */
+	const br_ssl_client_certificate_class **client_auth_vtable;
+
+	/*
+	 * Client authentication type.
+	 */
+	unsigned char auth_type;
+
+	/*
+	 * Hash function to use for the client signature. This is 0xFF
+	 * if static ECDH is used.
+	 */
+	unsigned char hash_id;
+
+	/*
+	 * For the core certificate handlers, thus avoiding (in most
+	 * cases) the need for an externally provided policy context.
+	 */
+	union {
+		const br_ssl_client_certificate_class *vtable;
+		br_ssl_client_certificate_rsa_context single_rsa;
+		br_ssl_client_certificate_ec_context single_ec;
+	} client_auth;
+
+	/*
+	 * Implementations.
+	 */
+	br_rsa_public irsapub;
+#endif
+};
+
+/**
+ * \brief Get the hash functions and signature algorithms supported by
+ * the server.
+ *
+ * This value is a bit field:
+ *
+ *   - If RSA (PKCS#1 v1.5) is supported with hash function of ID `x`,
+ *     then bit `x` is set (hash function ID is 0 for the special MD5+SHA-1,
+ *     or 2 to 6 for the SHA family).
+ *
+ *   - If ECDSA is supported with hash function of ID `x`, then bit `8+x`
+ *     is set.
+ *
+ *   - Newer algorithms are symbolic 16-bit identifiers that do not
+ *     represent signature algorithm and hash function separately. If
+ *     the TLS-level identifier is `0x0800+x` for a `x` in the 0..15
+ *     range, then bit `16+x` is set.
+ *
+ * "New algorithms" are currently defined only in draft documents, so
+ * this support is subject to possible change. Right now (early 2017),
+ * this maps ed25519 (EdDSA on Curve25519) to bit 23, and ed448 (EdDSA
+ * on Curve448) to bit 24. If the identifiers on the wire change in
+ * future document, then the decoding mechanism in BearSSL will be
+ * amended to keep mapping ed25519 and ed448 on bits 23 and 24,
+ * respectively. Mapping of other new algorithms (e.g. RSA/PSS) is not
+ * guaranteed yet.
+ *
+ * \param cc   client context.
+ * \return  the server-supported hash functions and signature algorithms.
+ */
+static inline uint32_t
+br_ssl_client_get_server_hashes(const br_ssl_client_context *cc)
+{
+	return cc->hashes;
+}
+
+/**
+ * \brief Get the server key curve.
+ *
+ * This function returns the ID for the curve used by the server's public
+ * key. This is set when the server's certificate chain is processed;
+ * this value is 0 if the server's key is not an EC key.
+ *
+ * \return  the server's public key curve ID, or 0.
+ */
+static inline int
+br_ssl_client_get_server_curve(const br_ssl_client_context *cc)
+{
+	return cc->server_curve;
+}
+
+/*
+ * Each br_ssl_client_init_xxx() function sets the list of supported
+ * cipher suites and used implementations, as specified by the profile
+ * name 'xxx'. Defined profile names are:
+ *
+ *    full    all supported versions and suites; constant-time implementations
+ *    TODO: add other profiles
+ */
+
+/**
+ * \brief SSL client profile: full.
+ *
+ * This function initialises the provided SSL client context with
+ * all supported algorithms and cipher suites. It also initialises
+ * a companion X.509 validation engine with all supported algorithms,
+ * and the provided trust anchors; the X.509 engine will be used by
+ * the client context to validate the server's certificate.
+ *
+ * \param cc                  client context to initialise.
+ * \param xc                  X.509 validation context to initialise.
+ * \param trust_anchors       trust anchors to use.
+ * \param trust_anchors_num   number of trust anchors.
+ */
+void br_ssl_client_init_full(br_ssl_client_context *cc,
+	br_x509_minimal_context *xc,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num);
+
+/**
+ * \brief Clear the complete contents of a SSL client context.
+ *
+ * Everything is cleared, including the reference to the configured buffer,
+ * implementations, cipher suites and state. This is a preparatory step
+ * to assembling a custom profile.
+ *
+ * \param cc   client context to clear.
+ */
+void br_ssl_client_zero(br_ssl_client_context *cc);
+
+/**
+ * \brief Set an externally provided client certificate handler context.
+ *
+ * The handler's methods are invoked when the server requests a client
+ * certificate.
+ *
+ * \param cc     client context.
+ * \param pctx   certificate handler context (pointer to its vtable field).
+ */
+static inline void
+br_ssl_client_set_client_certificate(br_ssl_client_context *cc,
+	const br_ssl_client_certificate_class **pctx)
+{
+	cc->client_auth_vtable = pctx;
+}
+
+/**
+ * \brief Set the RSA public-key operations implementation.
+ *
+ * This will be used to encrypt the pre-master secret with the server's
+ * RSA public key (RSA-encryption cipher suites only).
+ *
+ * \param cc        client context.
+ * \param irsapub   RSA public-key encryption implementation.
+ */
+static inline void
+br_ssl_client_set_rsapub(br_ssl_client_context *cc, br_rsa_public irsapub)
+{
+	cc->irsapub = irsapub;
+}
+
+/**
+ * \brief Set the "default" RSA implementation for public-key operations.
+ *
+ * This sets the RSA implementation in the client context (for encrypting
+ * the pre-master secret, in `TLS_RSA_*` cipher suites) to the fastest
+ * available on the current platform.
+ *
+ * \param cc   client context.
+ */
+void br_ssl_client_set_default_rsapub(br_ssl_client_context *cc);
+
+/**
+ * \brief Set the minimum ClientHello length (RFC 7685 padding).
+ *
+ * If this value is set and the ClientHello would be shorter, then
+ * the Pad ClientHello extension will be added with enough padding bytes
+ * to reach the target size. Because of the extension header, the resulting
+ * size will sometimes be slightly more than `len` bytes if the target
+ * size cannot be exactly met.
+ *
+ * The target length relates to the _contents_ of the ClientHello, not
+ * counting its 4-byte header. For instance, if `len` is set to 512,
+ * then the padding will bring the ClientHello size to 516 bytes with its
+ * header, and 521 bytes when counting the 5-byte record header.
+ *
+ * \param cc    client context.
+ * \param len   minimum ClientHello length (in bytes).
+ */
+static inline void
+br_ssl_client_set_min_clienthello_len(br_ssl_client_context *cc, uint16_t len)
+{
+	cc->min_clienthello_len = len;
+}
+
+/**
+ * \brief Prepare or reset a client context for a new connection.
+ *
+ * The `server_name` parameter is used to fill the SNI extension; the
+ * X.509 "minimal" engine will also match that name against the server
+ * names included in the server's certificate. If the parameter is
+ * `NULL` then no SNI extension will be sent, and the X.509 "minimal"
+ * engine (if used for server certificate validation) will not check
+ * presence of any specific name in the received certificate.
+ *
+ * Therefore, setting the `server_name` to `NULL` shall be reserved
+ * to cases where alternate or additional methods are used to ascertain
+ * that the right server public key is used (e.g. a "known key" model).
+ *
+ * If `resume_session` is non-zero and the context was previously used
+ * then the session parameters may be reused (depending on whether the
+ * server previously sent a non-empty session ID, and accepts the session
+ * resumption). The session parameters for session resumption can also
+ * be set explicitly with `br_ssl_engine_set_session_parameters()`.
+ *
+ * On failure, the context is marked as failed, and this function
+ * returns 0. A possible failure condition is when no initial entropy
+ * was injected, and none could be obtained from the OS (either OS
+ * randomness gathering is not supported, or it failed).
+ *
+ * \param cc               client context.
+ * \param server_name      target server name, or `NULL`.
+ * \param resume_session   non-zero to try session resumption.
+ * \return  0 on failure, 1 on success.
+ */
+int br_ssl_client_reset(br_ssl_client_context *cc,
+	const char *server_name, int resume_session);
+
+/**
+ * \brief Forget any session in the context.
+ *
+ * This means that the next handshake that uses this context will
+ * necessarily be a full handshake (this applies both to new connections
+ * and to renegotiations).
+ *
+ * \param cc   client context.
+ */
+static inline void
+br_ssl_client_forget_session(br_ssl_client_context *cc)
+{
+	cc->eng.session.session_id_len = 0;
+}
+
+/**
+ * \brief Set client certificate chain and key (single RSA case).
+ *
+ * This function sets a client certificate chain, that the client will
+ * send to the server whenever a client certificate is requested. This
+ * certificate uses an RSA public key; the corresponding private key is
+ * invoked for authentication. Trust anchor names sent by the server are
+ * ignored.
+ *
+ * The provided chain and private key are linked in the client context;
+ * they must remain valid as long as they may be used, i.e. normally
+ * for the duration of the connection, since they might be invoked
+ * again upon renegotiations.
+ *
+ * \param cc          SSL client context.
+ * \param chain       client certificate chain (SSL order: EE comes first).
+ * \param chain_len   client chain length (number of certificates).
+ * \param sk          client private key.
+ * \param irsasign    RSA signature implementation (PKCS#1 v1.5).
+ */
+void br_ssl_client_set_single_rsa(br_ssl_client_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk, br_rsa_pkcs1_sign irsasign);
+
+/*
+ * \brief Set the client certificate chain and key (single EC case).
+ *
+ * This function sets a client certificate chain, that the client will
+ * send to the server whenever a client certificate is requested. This
+ * certificate uses an EC public key; the corresponding private key is
+ * invoked for authentication. Trust anchor names sent by the server are
+ * ignored.
+ *
+ * The provided chain and private key are linked in the client context;
+ * they must remain valid as long as they may be used, i.e. normally
+ * for the duration of the connection, since they might be invoked
+ * again upon renegotiations.
+ *
+ * The `allowed_usages` is a combination of usages, namely
+ * `BR_KEYTYPE_KEYX` and/or `BR_KEYTYPE_SIGN`. The `BR_KEYTYPE_KEYX`
+ * value allows full static ECDH, while the `BR_KEYTYPE_SIGN` value
+ * allows ECDSA signatures. If ECDSA signatures are used, then an ECDSA
+ * signature implementation must be provided; otherwise, the `iecdsa`
+ * parameter may be 0.
+ *
+ * The `cert_issuer_key_type` value is either `BR_KEYTYPE_RSA` or
+ * `BR_KEYTYPE_EC`; it is the type of the public key used the the CA
+ * that issued (signed) the client certificate. That value is used with
+ * full static ECDH: support of the certificate by the server depends
+ * on how the certificate was signed. (Note: when using TLS 1.2, this
+ * parameter is ignored; but its value matters for TLS 1.0 and 1.1.)
+ *
+ * \param cc                     server context.
+ * \param chain                  server certificate chain to send.
+ * \param chain_len              chain length (number of certificates).
+ * \param sk                     server private key (EC).
+ * \param allowed_usages         allowed private key usages.
+ * \param cert_issuer_key_type   issuing CA's key type.
+ * \param iec                    EC core implementation.
+ * \param iecdsa                 ECDSA signature implementation ("asn1" format).
+ */
+void br_ssl_client_set_single_ec(br_ssl_client_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk, unsigned allowed_usages,
+	unsigned cert_issuer_key_type,
+	const br_ec_impl *iec, br_ecdsa_sign iecdsa);
+
+/**
+ * \brief Type for a "translated cipher suite", as an array of two
+ * 16-bit integers.
+ *
+ * The first element is the cipher suite identifier (as used on the wire).
+ * The second element is the concatenation of four 4-bit elements which
+ * characterise the cipher suite contents. In most to least significant
+ * order, these 4-bit elements are:
+ *
+ *   - Bits 12 to 15: key exchange + server key type
+ *
+ *     | val | symbolic constant        | suite type  | details                                          |
+ *     | :-- | :----------------------- | :---------- | :----------------------------------------------- |
+ *     |  0  | `BR_SSLKEYX_RSA`         | RSA         | RSA key exchange, key is RSA (encryption)        |
+ *     |  1  | `BR_SSLKEYX_ECDHE_RSA`   | ECDHE_RSA   | ECDHE key exchange, key is RSA (signature)       |
+ *     |  2  | `BR_SSLKEYX_ECDHE_ECDSA` | ECDHE_ECDSA | ECDHE key exchange, key is EC (signature)        |
+ *     |  3  | `BR_SSLKEYX_ECDH_RSA`    | ECDH_RSA    | Key is EC (key exchange), cert signed with RSA   |
+ *     |  4  | `BR_SSLKEYX_ECDH_ECDSA`  | ECDH_ECDSA  | Key is EC (key exchange), cert signed with ECDSA |
+ *
+ *   - Bits 8 to 11: symmetric encryption algorithm
+ *
+ *     | val | symbolic constant      | symmetric encryption | key strength (bits) |
+ *     | :-- | :--------------------- | :------------------- | :------------------ |
+ *     |  0  | `BR_SSLENC_3DES_CBC`   | 3DES/CBC             | 168                 |
+ *     |  1  | `BR_SSLENC_AES128_CBC` | AES-128/CBC          | 128                 |
+ *     |  2  | `BR_SSLENC_AES256_CBC` | AES-256/CBC          | 256                 |
+ *     |  3  | `BR_SSLENC_AES128_GCM` | AES-128/GCM          | 128                 |
+ *     |  4  | `BR_SSLENC_AES256_GCM` | AES-256/GCM          | 256                 |
+ *     |  5  | `BR_SSLENC_CHACHA20`   | ChaCha20/Poly1305    | 256                 |
+ *
+ *   - Bits 4 to 7: MAC algorithm
+ *
+ *     | val | symbolic constant  | MAC type     | details                               |
+ *     | :-- | :----------------- | :----------- | :------------------------------------ |
+ *     |  0  | `BR_SSLMAC_AEAD`   | AEAD         | No dedicated MAC (encryption is AEAD) |
+ *     |  2  | `BR_SSLMAC_SHA1`   | HMAC/SHA-1   | Value matches `br_sha1_ID`            |
+ *     |  4  | `BR_SSLMAC_SHA256` | HMAC/SHA-256 | Value matches `br_sha256_ID`          |
+ *     |  5  | `BR_SSLMAC_SHA384` | HMAC/SHA-384 | Value matches `br_sha384_ID`          |
+ *
+ *   - Bits 0 to 3: hash function for PRF when used with TLS-1.2
+ *
+ *     | val | symbolic constant  | hash function | details                              |
+ *     | :-- | :----------------- | :------------ | :----------------------------------- |
+ *     |  4  | `BR_SSLPRF_SHA256` | SHA-256       | Value matches `br_sha256_ID`         |
+ *     |  5  | `BR_SSLPRF_SHA384` | SHA-384       | Value matches `br_sha384_ID`         |
+ *
+ * For instance, cipher suite `TLS_RSA_WITH_AES_128_GCM_SHA256` has
+ * standard identifier 0x009C, and is translated to 0x0304, for, in
+ * that order: RSA key exchange (0), AES-128/GCM (3), AEAD integrity (0),
+ * SHA-256 in the TLS PRF (4).
+ */
+typedef uint16_t br_suite_translated[2];
+
+#ifndef BR_DOXYGEN_IGNORE
+/*
+ * Constants are already documented in the br_suite_translated type.
+ */
+
+#define BR_SSLKEYX_RSA           0
+#define BR_SSLKEYX_ECDHE_RSA     1
+#define BR_SSLKEYX_ECDHE_ECDSA   2
+#define BR_SSLKEYX_ECDH_RSA      3
+#define BR_SSLKEYX_ECDH_ECDSA    4
+
+#define BR_SSLENC_3DES_CBC       0
+#define BR_SSLENC_AES128_CBC     1
+#define BR_SSLENC_AES256_CBC     2
+#define BR_SSLENC_AES128_GCM     3
+#define BR_SSLENC_AES256_GCM     4
+#define BR_SSLENC_CHACHA20       5
+
+#define BR_SSLMAC_AEAD           0
+#define BR_SSLMAC_SHA1           br_sha1_ID
+#define BR_SSLMAC_SHA256         br_sha256_ID
+#define BR_SSLMAC_SHA384         br_sha384_ID
+
+#define BR_SSLPRF_SHA256         br_sha256_ID
+#define BR_SSLPRF_SHA384         br_sha384_ID
+
+#endif
+
+/*
+ * Pre-declaration for the SSL server context.
+ */
+typedef struct br_ssl_server_context_ br_ssl_server_context;
+
+/**
+ * \brief Type for the server policy choices, taken after analysis of
+ * the client message (ClientHello).
+ */
+typedef struct {
+	/**
+	 * \brief Cipher suite to use with that client.
+	 */
+	uint16_t cipher_suite;
+
+	/**
+	 * \brief Hash function or algorithm for signing the ServerKeyExchange.
+	 *
+	 * This parameter is ignored for `TLS_RSA_*` and `TLS_ECDH_*`
+	 * cipher suites; it is used only for `TLS_ECDHE_*` suites, in
+	 * which the server _signs_ the ephemeral EC Diffie-Hellman
+	 * parameters sent to the client.
+	 *
+	 * This identifier must be one of the following values:
+	 *
+	 *   - `0xFF00 + id`, where `id` is a hash function identifier
+	 *     (0 for MD5+SHA-1, or 2 to 6 for one of the SHA functions);
+	 *
+	 *   - a full 16-bit identifier, lower than `0xFF00`.
+	 *
+	 * If the first option is used, then the SSL engine will
+	 * compute the hash of the data that is to be signed, with the
+	 * designated hash function. The `do_sign()` method will be
+	 * invoked with that hash value provided in the the `data`
+	 * buffer.
+	 *
+	 * If the second option is used, then the SSL engine will NOT
+	 * compute a hash on the data; instead, it will provide the
+	 * to-be-signed data itself in `data`, i.e. the concatenation of
+	 * the client random, server random, and encoded ECDH
+	 * parameters. Furthermore, with TLS-1.2 and later, the 16-bit
+	 * identifier will be used "as is" in the protocol, in the
+	 * SignatureAndHashAlgorithm; for instance, `0x0401` stands for
+	 * RSA PKCS#1 v1.5 signature (the `01`) with SHA-256 as hash
+	 * function (the `04`).
+	 *
+	 * Take care that with TLS 1.0 and 1.1, the hash function is
+	 * constrainted by the protocol: RSA signature must use
+	 * MD5+SHA-1 (so use `0xFF00`), while ECDSA must use SHA-1
+	 * (`0xFF02`). Since TLS 1.0 and 1.1 don't include a
+	 * SignatureAndHashAlgorithm field in their ServerKeyExchange
+	 * messages, any value below `0xFF00` will be usable to send the
+	 * raw ServerKeyExchange data to the `do_sign()` callback, but
+	 * that callback must still follow the protocol requirements
+	 * when generating the signature.
+	 */
+	unsigned algo_id;
+
+	/**
+	 * \brief Certificate chain to send to the client.
+	 *
+	 * This is an array of `br_x509_certificate` objects, each
+	 * normally containing a DER-encoded certificate. The server
+	 * code does not try to decode these elements.
+	 */
+	const br_x509_certificate *chain;
+
+	/**
+	 * \brief Certificate chain length (number of certificates).
+	 */
+	size_t chain_len;
+
+} br_ssl_server_choices;
+
+/**
+ * \brief Class type for a policy handler (server side).
+ *
+ * A policy handler selects the policy parameters for a connection
+ * (cipher suite and other algorithms, and certificate chain to send to
+ * the client); it also performs the server-side computations involving
+ * its permanent private key.
+ *
+ * The SSL server engine will invoke first `choose()`, once the
+ * ClientHello message has been received, then either `do_keyx()`
+ * `do_sign()`, depending on the cipher suite.
+ */
+typedef struct br_ssl_server_policy_class_ br_ssl_server_policy_class;
+struct br_ssl_server_policy_class_ {
+	/**
+	 * \brief Context size (in bytes).
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Select algorithms and certificates for this connection.
+	 *
+	 * This callback function shall fill the provided `choices`
+	 * structure with the policy choices for this connection. This
+	 * entails selecting the cipher suite, hash function for signing
+	 * the ServerKeyExchange (applicable only to ECDHE cipher suites),
+	 * and certificate chain to send.
+	 *
+	 * The callback receives a pointer to the server context that
+	 * contains the relevant data. In particular, the functions
+	 * `br_ssl_server_get_client_suites()`,
+	 * `br_ssl_server_get_client_hashes()` and
+	 * `br_ssl_server_get_client_curves()` can be used to obtain
+	 * the cipher suites, hash functions and elliptic curves
+	 * supported by both the client and server, respectively. The
+	 * `br_ssl_engine_get_version()` and `br_ssl_engine_get_server_name()`
+	 * functions yield the protocol version and requested server name
+	 * (SNI), respectively.
+	 *
+	 * This function may modify its context structure (`pctx`) in
+	 * arbitrary ways to keep track of its own choices.
+	 *
+	 * This function shall return 1 if appropriate policy choices
+	 * could be made, or 0 if this connection cannot be pursued.
+	 *
+	 * \param pctx      policy context.
+	 * \param cc        SSL server context.
+	 * \param choices   destination structure for the policy choices.
+	 * \return  1 on success, 0 on error.
+	 */
+	int (*choose)(const br_ssl_server_policy_class **pctx,
+		const br_ssl_server_context *cc,
+		br_ssl_server_choices *choices);
+
+	/**
+	 * \brief Perform key exchange (server part).
+	 *
+	 * This callback is invoked to perform the server-side cryptographic
+	 * operation for a key exchange that is not ECDHE. This callback
+	 * uses the private key.
+	 *
+	 * **For RSA key exchange**, the provided `data` (of length `*len`
+	 * bytes) shall be decrypted with the server's private key, and
+	 * the 48-byte premaster secret copied back to the first 48 bytes
+	 * of `data`.
+	 *
+	 *   - The caller makes sure that `*len` is at least 59 bytes.
+	 *
+	 *   - This callback MUST check that the provided length matches
+	 *     that of the key modulus; it shall report an error otherwise.
+	 *
+	 *   - If the length matches that of the RSA key modulus, then
+	 *     processing MUST be constant-time, even if decryption fails,
+	 *     or the padding is incorrect, or the plaintext message length
+	 *     is not exactly 48 bytes.
+	 *
+	 *   - This callback needs not check the two first bytes of the
+	 *     obtained pre-master secret (the caller will do that).
+	 *
+	 *   - If an error is reported (0), then what the callback put
+	 *     in the first 48 bytes of `data` is unimportant (the caller
+	 *     will use random bytes instead).
+	 *
+	 * **For ECDH key exchange**, the provided `data` (of length `*len`
+	 * bytes) is the elliptic curve point from the client. The
+	 * callback shall multiply it with its private key, and store
+	 * the resulting X coordinate in `data`, starting at offset 0,
+	 * and set `*len` to the length of the X coordinate.
+	 *
+	 *   - If the input array does not have the proper length for
+	 *     an encoded curve point, then an error (0) shall be reported.
+	 *
+	 *   - If the input array has the proper length, then processing
+	 *     MUST be constant-time, even if the data is not a valid
+	 *     encoded point.
+	 *
+	 *   - This callback MUST check that the input point is valid.
+	 *
+	 * Returned value is 1 on success, 0 on error.
+	 *
+	 * \param pctx   policy context.
+	 * \param data   key exchange data from the client.
+	 * \param len    key exchange data length (in bytes).
+	 * \return  1 on success, 0 on error.
+	 */
+	uint32_t (*do_keyx)(const br_ssl_server_policy_class **pctx,
+		unsigned char *data, size_t *len);
+
+	/**
+	 * \brief Perform a signature (for a ServerKeyExchange message).
+	 *
+	 * This callback function is invoked for ECDHE cipher suites. On
+	 * input, the hash value or message to sign is in `data`, of
+	 * size `hv_len`; the involved hash function or algorithm is
+	 * identified by `algo_id`. The signature shall be computed and
+	 * written back into `data`; the total size of that buffer is
+	 * `len` bytes.
+	 *
+	 * This callback shall verify that the signature length does not
+	 * exceed `len` bytes, and abstain from writing the signature if
+	 * it does not fit.
+	 *
+	 * The `algo_id` value matches that which was written in the
+	 * `choices` structures by the `choose()` callback. This will be
+	 * one of the following:
+	 *
+	 *   - `0xFF00 + id` for a hash function identifier `id`. In
+	 *     that case, the `data` buffer contains a hash value
+	 *     already computed over the data that is to be signed,
+	 *     of length `hv_len`. The `id` may be 0 to designate the
+	 *     special MD5+SHA-1 concatenation (old-style RSA signing).
+	 *
+	 *   - Another value, lower than `0xFF00`. The `data` buffer
+	 *     then contains the raw, non-hashed data to be signed
+	 *     (concatenation of the client and server randoms and
+	 *     ECDH parameters). The callback is responsible to apply
+	 *     any relevant hashing as part of the signing process.
+	 *
+	 * Returned value is the signature length (in bytes), or 0 on error.
+	 *
+	 * \param pctx      policy context.
+	 * \param algo_id   hash function / algorithm identifier.
+	 * \param data      input/output buffer (message/hash, then signature).
+	 * \param hv_len    hash value or message length (in bytes).
+	 * \param len       total buffer length (in bytes).
+	 * \return  signature length (in bytes) on success, or 0 on error.
+	 */
+	size_t (*do_sign)(const br_ssl_server_policy_class **pctx,
+		unsigned algo_id,
+		unsigned char *data, size_t hv_len, size_t len);
+};
+
+/**
+ * \brief A single-chain RSA policy handler.
+ *
+ * This policy context uses a single certificate chain, and a RSA
+ * private key. The context can be restricted to only signatures or
+ * only key exchange.
+ *
+ * Apart from the first field (vtable pointer), its contents are
+ * opaque and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_ssl_server_policy_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	const br_x509_certificate *chain;
+	size_t chain_len;
+	const br_rsa_private_key *sk;
+	unsigned allowed_usages;
+	br_rsa_private irsacore;
+	br_rsa_pkcs1_sign irsasign;
+#endif
+} br_ssl_server_policy_rsa_context;
+
+/**
+ * \brief A single-chain EC policy handler.
+ *
+ * This policy context uses a single certificate chain, and an EC
+ * private key. The context can be restricted to only signatures or
+ * only key exchange.
+ *
+ * Due to how TLS is defined, this context must be made aware whether
+ * the server certificate was itself signed with RSA or ECDSA. The code
+ * does not try to decode the certificate to obtain that information.
+ *
+ * Apart from the first field (vtable pointer), its contents are
+ * opaque and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_ssl_server_policy_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	const br_x509_certificate *chain;
+	size_t chain_len;
+	const br_ec_private_key *sk;
+	unsigned allowed_usages;
+	unsigned cert_issuer_key_type;
+	const br_multihash_context *mhash;
+	const br_ec_impl *iec;
+	br_ecdsa_sign iecdsa;
+#endif
+} br_ssl_server_policy_ec_context;
+
+/**
+ * \brief Class type for a session parameter cache.
+ *
+ * Session parameters are saved in the cache with `save()`, and
+ * retrieved with `load()`. The cache implementation can apply any
+ * storage and eviction strategy that it sees fit. The SSL server
+ * context that performs the request is provided, so that its
+ * functionalities may be used by the implementation (e.g. hash
+ * functions or random number generation).
+ */
+typedef struct br_ssl_session_cache_class_ br_ssl_session_cache_class;
+struct br_ssl_session_cache_class_ {
+	/**
+	 * \brief Context size (in bytes).
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Record a session.
+	 *
+	 * This callback should record the provided session parameters.
+	 * The `params` structure is transient, so its contents shall
+	 * be copied into the cache. The session ID has been randomly
+	 * generated and always has length exactly 32 bytes.
+	 *
+	 * \param ctx          session cache context.
+	 * \param server_ctx   SSL server context.
+	 * \param params       session parameters to save.
+	 */
+	void (*save)(const br_ssl_session_cache_class **ctx,
+		br_ssl_server_context *server_ctx,
+		const br_ssl_session_parameters *params);
+
+	/**
+	 * \brief Lookup a session in the cache.
+	 *
+	 * The session ID to lookup is in `params` and always has length
+	 * exactly 32 bytes. If the session parameters are found in the
+	 * cache, then the parameters shall be copied into the `params`
+	 * structure. Returned value is 1 on successful lookup, 0
+	 * otherwise.
+	 *
+	 * \param ctx          session cache context.
+	 * \param server_ctx   SSL server context.
+	 * \param params       destination for session parameters.
+	 * \return  1 if found, 0 otherwise.
+	 */
+	int (*load)(const br_ssl_session_cache_class **ctx,
+		br_ssl_server_context *server_ctx,
+		br_ssl_session_parameters *params);
+};
+
+/**
+ * \brief Context for a basic cache system.
+ *
+ * The system stores session parameters in a buffer provided at
+ * initialisation time. Each entry uses exactly 100 bytes, and
+ * buffer sizes up to 4294967295 bytes are supported.
+ *
+ * Entries are evicted with a LRU (Least Recently Used) policy. A
+ * search tree is maintained to keep lookups fast even with large
+ * caches.
+ *
+ * Apart from the first field (vtable pointer), the structure
+ * contents are opaque and shall not be accessed directly.
+ */
+typedef struct {
+	/** \brief Pointer to vtable. */
+	const br_ssl_session_cache_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char *store;
+	size_t store_len, store_ptr;
+	unsigned char index_key[32];
+	const br_hash_class *hash;
+	int init_done;
+	uint32_t head, tail, root;
+#endif
+} br_ssl_session_cache_lru;
+
+/**
+ * \brief Initialise a LRU session cache with the provided storage space.
+ *
+ * The provided storage space must remain valid as long as the cache
+ * is used. Arbitrary lengths are supported, up to 4294967295 bytes;
+ * each entry uses up exactly 100 bytes.
+ *
+ * \param cc          session cache context.
+ * \param store       storage space for cached entries.
+ * \param store_len   storage space length (in bytes).
+ */
+void br_ssl_session_cache_lru_init(br_ssl_session_cache_lru *cc,
+	unsigned char *store, size_t store_len);
+
+/**
+ * \brief Forget an entry in an LRU session cache.
+ *
+ * The session cache context must have been initialised. The entry
+ * with the provided session ID (of exactly 32 bytes) is looked for
+ * in the cache; if located, it is disabled.
+ *
+ * \param cc   session cache context.
+ * \param id   session ID to forget.
+ */
+void br_ssl_session_cache_lru_forget(
+	br_ssl_session_cache_lru *cc, const unsigned char *id);
+
+/**
+ * \brief Context structure for a SSL server.
+ *
+ * The first field (called `eng`) is the SSL engine; all functions that
+ * work on a `br_ssl_engine_context` structure shall take as parameter
+ * a pointer to that field. The other structure fields are opaque and
+ * must not be accessed directly.
+ */
+struct br_ssl_server_context_ {
+	/**
+	 * \brief The encapsulated engine context.
+	 */
+	br_ssl_engine_context eng;
+
+#ifndef BR_DOXYGEN_IGNORE
+	/*
+	 * Maximum version from the client.
+	 */
+	uint16_t client_max_version;
+
+	/*
+	 * Session cache.
+	 */
+	const br_ssl_session_cache_class **cache_vtable;
+
+	/*
+	 * Translated cipher suites supported by the client. The list
+	 * is trimmed to include only the cipher suites that the
+	 * server also supports; they are in the same order as in the
+	 * client message.
+	 */
+	br_suite_translated client_suites[BR_MAX_CIPHER_SUITES];
+	unsigned char client_suites_num;
+
+	/*
+	 * Hash functions supported by the client, with ECDSA and RSA
+	 * (bit mask). For hash function with id 'x', set bit index is
+	 * x for RSA, x+8 for ECDSA. For newer algorithms, with ID
+	 * 0x08**, bit 16+k is set for algorithm 0x0800+k.
+	 */
+	uint32_t hashes;
+
+	/*
+	 * Curves supported by the client (bit mask, for named curves).
+	 */
+	uint32_t curves;
+
+	/*
+	 * Context for chain handler.
+	 */
+	const br_ssl_server_policy_class **policy_vtable;
+	uint16_t sign_hash_id;
+
+	/*
+	 * For the core handlers, thus avoiding (in most cases) the
+	 * need for an externally provided policy context.
+	 */
+	union {
+		const br_ssl_server_policy_class *vtable;
+		br_ssl_server_policy_rsa_context single_rsa;
+		br_ssl_server_policy_ec_context single_ec;
+	} chain_handler;
+
+	/*
+	 * Buffer for the ECDHE private key.
+	 */
+	unsigned char ecdhe_key[70];
+	size_t ecdhe_key_len;
+
+	/*
+	 * Trust anchor names for client authentication. "ta_names" and
+	 * "tas" cannot be both non-NULL.
+	 */
+	const br_x500_name *ta_names;
+	const br_x509_trust_anchor *tas;
+	size_t num_tas;
+	size_t cur_dn_index;
+	const unsigned char *cur_dn;
+	size_t cur_dn_len;
+
+	/*
+	 * Buffer for the hash value computed over all handshake messages
+	 * prior to CertificateVerify, and identifier for the hash function.
+	 */
+	unsigned char hash_CV[64];
+	size_t hash_CV_len;
+	int hash_CV_id;
+
+	/*
+	 * Server-specific implementations.
+	 * (none for now)
+	 */
+#endif
+};
+
+/*
+ * Each br_ssl_server_init_xxx() function sets the list of supported
+ * cipher suites and used implementations, as specified by the profile
+ * name 'xxx'. Defined profile names are:
+ *
+ *    full_rsa    all supported algorithm, server key type is RSA
+ *    full_ec     all supported algorithm, server key type is EC
+ *    TODO: add other profiles
+ *
+ * Naming scheme for "minimal" profiles: min123
+ *
+ * -- character 1: key exchange
+ *      r = RSA
+ *      e = ECDHE_RSA
+ *      f = ECDHE_ECDSA
+ *      u = ECDH_RSA
+ *      v = ECDH_ECDSA
+ * -- character 2: version / PRF
+ *      0 = TLS 1.0 / 1.1 with MD5+SHA-1
+ *      2 = TLS 1.2 with SHA-256
+ *      3 = TLS 1.2 with SHA-384
+ * -- character 3: encryption
+ *      a = AES/CBC
+ *      d = 3DES/CBC
+ *      g = AES/GCM
+ *      c = ChaCha20+Poly1305
+ */
+
+/**
+ * \brief SSL server profile: full_rsa.
+ *
+ * This function initialises the provided SSL server context with
+ * all supported algorithms and cipher suites that rely on a RSA
+ * key pair.
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          RSA private key.
+ */
+void br_ssl_server_init_full_rsa(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief SSL server profile: full_ec.
+ *
+ * This function initialises the provided SSL server context with
+ * all supported algorithms and cipher suites that rely on an EC
+ * key pair.
+ *
+ * The key type of the CA that issued the server's certificate must
+ * be provided, since it matters for ECDH cipher suites (ECDH_RSA
+ * suites require a RSA-powered CA). The key type is either
+ * `BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC`.
+ *
+ * \param cc                     server context to initialise.
+ * \param chain                  server certificate chain.
+ * \param chain_len              chain length (number of certificates).
+ * \param cert_issuer_key_type   certificate issuer's key type.
+ * \param sk                     EC private key.
+ */
+void br_ssl_server_init_full_ec(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	unsigned cert_issuer_key_type, const br_ec_private_key *sk);
+
+/**
+ * \brief SSL server profile: minr2g.
+ *
+ * This profile uses only TLS_RSA_WITH_AES_128_GCM_SHA256. Server key is
+ * RSA, and RSA key exchange is used (not forward secure, but uses little
+ * CPU in the client).
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          RSA private key.
+ */
+void br_ssl_server_init_minr2g(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief SSL server profile: mine2g.
+ *
+ * This profile uses only TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256. Server key
+ * is RSA, and ECDHE key exchange is used. This suite provides forward
+ * security, with a higher CPU expense on the client, and a somewhat
+ * larger code footprint (compared to "minr2g").
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          RSA private key.
+ */
+void br_ssl_server_init_mine2g(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief SSL server profile: minf2g.
+ *
+ * This profile uses only TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256.
+ * Server key is EC, and ECDHE key exchange is used. This suite provides
+ * forward security, with a higher CPU expense on the client and server
+ * (by a factor of about 3 to 4), and a somewhat larger code footprint
+ * (compared to "minu2g" and "minv2g").
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          EC private key.
+ */
+void br_ssl_server_init_minf2g(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk);
+
+/**
+ * \brief SSL server profile: minu2g.
+ *
+ * This profile uses only TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256.
+ * Server key is EC, and ECDH key exchange is used; the issuing CA used
+ * a RSA key.
+ *
+ * The "minu2g" and "minv2g" profiles do not provide forward secrecy,
+ * but are the lightest on the server (for CPU usage), and are rather
+ * inexpensive on the client as well.
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          EC private key.
+ */
+void br_ssl_server_init_minu2g(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk);
+
+/**
+ * \brief SSL server profile: minv2g.
+ *
+ * This profile uses only TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256.
+ * Server key is EC, and ECDH key exchange is used; the issuing CA used
+ * an EC key.
+ *
+ * The "minu2g" and "minv2g" profiles do not provide forward secrecy,
+ * but are the lightest on the server (for CPU usage), and are rather
+ * inexpensive on the client as well.
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          EC private key.
+ */
+void br_ssl_server_init_minv2g(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk);
+
+/**
+ * \brief SSL server profile: mine2c.
+ *
+ * This profile uses only TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256.
+ * Server key is RSA, and ECDHE key exchange is used. This suite
+ * provides forward security.
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          RSA private key.
+ */
+void br_ssl_server_init_mine2c(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief SSL server profile: minf2c.
+ *
+ * This profile uses only TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256.
+ * Server key is EC, and ECDHE key exchange is used. This suite provides
+ * forward security.
+ *
+ * \param cc          server context to initialise.
+ * \param chain       server certificate chain.
+ * \param chain_len   certificate chain length (number of certificate).
+ * \param sk          EC private key.
+ */
+void br_ssl_server_init_minf2c(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk);
+
+/**
+ * \brief Get the supported client suites.
+ *
+ * This function shall be called only after the ClientHello has been
+ * processed, typically from the policy engine. The returned array
+ * contains the cipher suites that are supported by both the client
+ * and the server; these suites are in client preference order, unless
+ * the `BR_OPT_ENFORCE_SERVER_PREFERENCES` flag was set, in which case
+ * they are in server preference order.
+ *
+ * The suites are _translated_, which means that each suite is given
+ * as two 16-bit integers: the standard suite identifier, and its
+ * translated version, broken down into its individual components,
+ * as explained with the `br_suite_translated` type.
+ *
+ * The returned array is allocated in the context and will be rewritten
+ * by each handshake.
+ *
+ * \param cc    server context.
+ * \param num   receives the array size (number of suites).
+ * \return  the translated common cipher suites, in preference order.
+ */
+static inline const br_suite_translated *
+br_ssl_server_get_client_suites(const br_ssl_server_context *cc, size_t *num)
+{
+	*num = cc->client_suites_num;
+	return cc->client_suites;
+}
+
+/**
+ * \brief Get the hash functions and signature algorithms supported by
+ * the client.
+ *
+ * This value is a bit field:
+ *
+ *   - If RSA (PKCS#1 v1.5) is supported with hash function of ID `x`,
+ *     then bit `x` is set (hash function ID is 0 for the special MD5+SHA-1,
+ *     or 2 to 6 for the SHA family).
+ *
+ *   - If ECDSA is supported with hash function of ID `x`, then bit `8+x`
+ *     is set.
+ *
+ *   - Newer algorithms are symbolic 16-bit identifiers that do not
+ *     represent signature algorithm and hash function separately. If
+ *     the TLS-level identifier is `0x0800+x` for a `x` in the 0..15
+ *     range, then bit `16+x` is set.
+ *
+ * "New algorithms" are currently defined only in draft documents, so
+ * this support is subject to possible change. Right now (early 2017),
+ * this maps ed25519 (EdDSA on Curve25519) to bit 23, and ed448 (EdDSA
+ * on Curve448) to bit 24. If the identifiers on the wire change in
+ * future document, then the decoding mechanism in BearSSL will be
+ * amended to keep mapping ed25519 and ed448 on bits 23 and 24,
+ * respectively. Mapping of other new algorithms (e.g. RSA/PSS) is not
+ * guaranteed yet.
+ *
+ * \param cc   server context.
+ * \return  the client-supported hash functions and signature algorithms.
+ */
+static inline uint32_t
+br_ssl_server_get_client_hashes(const br_ssl_server_context *cc)
+{
+	return cc->hashes;
+}
+
+/**
+ * \brief Get the elliptic curves supported by the client.
+ *
+ * This is a bit field (bit x is set if curve of ID x is supported).
+ *
+ * \param cc   server context.
+ * \return  the client-supported elliptic curves.
+ */
+static inline uint32_t
+br_ssl_server_get_client_curves(const br_ssl_server_context *cc)
+{
+	return cc->curves;
+}
+
+/**
+ * \brief Clear the complete contents of a SSL server context.
+ *
+ * Everything is cleared, including the reference to the configured buffer,
+ * implementations, cipher suites and state. This is a preparatory step
+ * to assembling a custom profile.
+ *
+ * \param cc   server context to clear.
+ */
+void br_ssl_server_zero(br_ssl_server_context *cc);
+
+/**
+ * \brief Set an externally provided policy context.
+ *
+ * The policy context's methods are invoked to decide the cipher suite
+ * and certificate chain, and to perform operations involving the server's
+ * private key.
+ *
+ * \param cc     server context.
+ * \param pctx   policy context (pointer to its vtable field).
+ */
+static inline void
+br_ssl_server_set_policy(br_ssl_server_context *cc,
+	const br_ssl_server_policy_class **pctx)
+{
+	cc->policy_vtable = pctx;
+}
+
+/**
+ * \brief Set the server certificate chain and key (single RSA case).
+ *
+ * This function uses a policy context included in the server context.
+ * It configures use of a single server certificate chain with a RSA
+ * private key. The `allowed_usages` is a combination of usages, namely
+ * `BR_KEYTYPE_KEYX` and/or `BR_KEYTYPE_SIGN`; this enables or disables
+ * the corresponding cipher suites (i.e. `TLS_RSA_*` use the RSA key for
+ * key exchange, while `TLS_ECDHE_RSA_*` use the RSA key for signatures).
+ *
+ * \param cc               server context.
+ * \param chain            server certificate chain to send to the client.
+ * \param chain_len        chain length (number of certificates).
+ * \param sk               server private key (RSA).
+ * \param allowed_usages   allowed private key usages.
+ * \param irsacore         RSA core implementation.
+ * \param irsasign         RSA signature implementation (PKCS#1 v1.5).
+ */
+void br_ssl_server_set_single_rsa(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk, unsigned allowed_usages,
+	br_rsa_private irsacore, br_rsa_pkcs1_sign irsasign);
+
+/**
+ * \brief Set the server certificate chain and key (single EC case).
+ *
+ * This function uses a policy context included in the server context.
+ * It configures use of a single server certificate chain with an EC
+ * private key. The `allowed_usages` is a combination of usages, namely
+ * `BR_KEYTYPE_KEYX` and/or `BR_KEYTYPE_SIGN`; this enables or disables
+ * the corresponding cipher suites (i.e. `TLS_ECDH_*` use the EC key for
+ * key exchange, while `TLS_ECDHE_ECDSA_*` use the EC key for signatures).
+ *
+ * In order to support `TLS_ECDH_*` cipher suites (non-ephemeral ECDH),
+ * the algorithm type of the key used by the issuing CA to sign the
+ * server's certificate must be provided, as `cert_issuer_key_type`
+ * parameter (this value is either `BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC`).
+ *
+ * \param cc                     server context.
+ * \param chain                  server certificate chain to send.
+ * \param chain_len              chain length (number of certificates).
+ * \param sk                     server private key (EC).
+ * \param allowed_usages         allowed private key usages.
+ * \param cert_issuer_key_type   issuing CA's key type.
+ * \param iec                    EC core implementation.
+ * \param iecdsa                 ECDSA signature implementation ("asn1" format).
+ */
+void br_ssl_server_set_single_ec(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk, unsigned allowed_usages,
+	unsigned cert_issuer_key_type,
+	const br_ec_impl *iec, br_ecdsa_sign iecdsa);
+
+/**
+ * \brief Activate client certificate authentication.
+ *
+ * The trust anchor encoded X.500 names (DN) to send to the client are
+ * provided. A client certificate will be requested and validated through
+ * the X.509 validator configured in the SSL engine. If `num` is 0, then
+ * client certificate authentication is disabled.
+ *
+ * If the client does not send a certificate, or on validation failure,
+ * the handshake aborts. Unauthenticated clients can be tolerated by
+ * setting the `BR_OPT_TOLERATE_NO_CLIENT_AUTH` flag.
+ *
+ * The provided array is linked in, not copied, so that pointer must
+ * remain valid as long as anchor names may be used.
+ *
+ * \param cc         server context.
+ * \param ta_names   encoded trust anchor names.
+ * \param num        number of encoded trust anchor names.
+ */
+static inline void
+br_ssl_server_set_trust_anchor_names(br_ssl_server_context *cc,
+	const br_x500_name *ta_names, size_t num)
+{
+	cc->ta_names = ta_names;
+	cc->tas = NULL;
+	cc->num_tas = num;
+}
+
+/**
+ * \brief Activate client certificate authentication.
+ *
+ * This is a variant for `br_ssl_server_set_trust_anchor_names()`: the
+ * trust anchor names are provided not as an array of stand-alone names
+ * (`br_x500_name` structures), but as an array of trust anchors
+ * (`br_x509_trust_anchor` structures). The server engine itself will
+ * only use the `dn` field of each trust anchor. This is meant to allow
+ * defining a single array of trust anchors, to be used here and in the
+ * X.509 validation engine itself.
+ *
+ * The provided array is linked in, not copied, so that pointer must
+ * remain valid as long as anchor names may be used.
+ *
+ * \param cc    server context.
+ * \param tas   trust anchors (only names are used).
+ * \param num   number of trust anchors.
+ */
+static inline void
+br_ssl_server_set_trust_anchor_names_alt(br_ssl_server_context *cc,
+	const br_x509_trust_anchor *tas, size_t num)
+{
+	cc->ta_names = NULL;
+	cc->tas = tas;
+	cc->num_tas = num;
+}
+
+/**
+ * \brief Configure the cache for session parameters.
+ *
+ * The cache context is provided as a pointer to its first field (vtable
+ * pointer).
+ *
+ * \param cc       server context.
+ * \param vtable   session cache context.
+ */
+static inline void
+br_ssl_server_set_cache(br_ssl_server_context *cc,
+	const br_ssl_session_cache_class **vtable)
+{
+	cc->cache_vtable = vtable;
+}
+
+/**
+ * \brief Prepare or reset a server context for handling an incoming client.
+ *
+ * \param cc   server context.
+ * \return  1 on success, 0 on error.
+ */
+int br_ssl_server_reset(br_ssl_server_context *cc);
+
+/* ===================================================================== */
+
+/*
+ * Context for the simplified I/O context. The transport medium is accessed
+ * through the low_read() and low_write() callback functions, each with
+ * its own opaque context pointer.
+ *
+ *  low_read()    read some bytes, at most 'len' bytes, into data[]. The
+ *                returned value is the number of read bytes, or -1 on error.
+ *                The 'len' parameter is guaranteed never to exceed 20000,
+ *                so the length always fits in an 'int' on all platforms.
+ *
+ *  low_write()   write up to 'len' bytes, to be read from data[]. The
+ *                returned value is the number of written bytes, or -1 on
+ *                error. The 'len' parameter is guaranteed never to exceed
+ *                20000, so the length always fits in an 'int' on all
+ *                parameters.
+ *
+ * A socket closure (if the transport medium is a socket) should be reported
+ * as an error (-1). The callbacks shall endeavour to block until at least
+ * one byte can be read or written; a callback returning 0 at times is
+ * acceptable, but this normally leads to the callback being immediately
+ * called again, so the callback should at least always try to block for
+ * some time if no I/O can take place.
+ *
+ * The SSL engine naturally applies some buffering, so the callbacks need
+ * not apply buffers of their own.
+ */
+/**
+ * \brief Context structure for the simplified SSL I/O wrapper.
+ *
+ * This structure is initialised with `br_sslio_init()`. Its contents
+ * are opaque and shall not be accessed directly.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	br_ssl_engine_context *engine;
+	int (*low_read)(void *read_context,
+		unsigned char *data, size_t len);
+	void *read_context;
+	int (*low_write)(void *write_context,
+		const unsigned char *data, size_t len);
+	void *write_context;
+#endif
+} br_sslio_context;
+
+/**
+ * \brief Initialise a simplified I/O wrapper context.
+ *
+ * The simplified I/O wrapper offers a simpler read/write API for a SSL
+ * engine (client or server), using the provided callback functions for
+ * reading data from, or writing data to, the transport medium.
+ *
+ * The callback functions have the following semantics:
+ *
+ *   - Each callback receives an opaque context value (of type `void *`)
+ *     that the callback may use arbitrarily (or possibly ignore).
+ *
+ *   - `low_read()` reads at least one byte, at most `len` bytes, from
+ *     the transport medium. Read bytes shall be written in `data`.
+ *
+ *   - `low_write()` writes at least one byte, at most `len` bytes, unto
+ *     the transport medium. The bytes to write are read from `data`.
+ *
+ *   - The `len` parameter is never zero, and is always lower than 20000.
+ *
+ *   - The number of processed bytes (read or written) is returned. Since
+ *     that number is less than 20000, it always fits on an `int`.
+ *
+ *   - On error, the callbacks return -1. Reaching end-of-stream is an
+ *     error. Errors are permanent: the SSL connection is terminated.
+ *
+ *   - Callbacks SHOULD NOT return 0. This is tolerated, as long as
+ *     callbacks endeavour to block for some non-negligible amount of
+ *     time until at least one byte can be sent or received (if a
+ *     callback returns 0, then the wrapper invokes it again
+ *     immediately).
+ *
+ *   - Callbacks MAY return as soon as at least one byte is processed;
+ *     they MAY also insist on reading or writing _all_ requested bytes.
+ *     Since SSL is a self-terminated protocol (each record has a length
+ *     header), this does not change semantics.
+ *
+ *   - Callbacks need not apply any buffering (for performance) since SSL
+ *     itself uses buffers.
+ *
+ * \param ctx             wrapper context to initialise.
+ * \param engine          SSL engine to wrap.
+ * \param low_read        callback for reading data from the transport.
+ * \param read_context    context pointer for `low_read()`.
+ * \param low_write       callback for writing data on the transport.
+ * \param write_context   context pointer for `low_write()`.
+ */
+void br_sslio_init(br_sslio_context *ctx,
+	br_ssl_engine_context *engine,
+	int (*low_read)(void *read_context,
+		unsigned char *data, size_t len),
+	void *read_context,
+	int (*low_write)(void *write_context,
+		const unsigned char *data, size_t len),
+	void *write_context);
+
+/**
+ * \brief Read some application data from a SSL connection.
+ *
+ * If `len` is zero, then this function returns 0 immediately. In
+ * all other cases, it never returns 0.
+ *
+ * This call returns only when at least one byte has been obtained.
+ * Returned value is the number of bytes read, or -1 on error. The
+ * number of bytes always fits on an 'int' (data from a single SSL/TLS
+ * record is returned).
+ *
+ * On error or SSL closure, this function returns -1. The caller should
+ * inspect the error status on the SSL engine to distinguish between
+ * normal closure and error.
+ *
+ * \param cc    SSL wrapper context.
+ * \param dst   destination buffer for application data.
+ * \param len   maximum number of bytes to obtain.
+ * \return  number of bytes obtained, or -1 on error.
+ */
+int br_sslio_read(br_sslio_context *cc, void *dst, size_t len);
+
+/**
+ * \brief Read application data from a SSL connection.
+ *
+ * This calls returns only when _all_ requested `len` bytes are read,
+ * or an error is reached. Returned value is 0 on success, -1 on error.
+ * A normal (verified) SSL closure before that many bytes are obtained
+ * is reported as an error by this function.
+ *
+ * \param cc    SSL wrapper context.
+ * \param dst   destination buffer for application data.
+ * \param len   number of bytes to obtain.
+ * \return  0 on success, or -1 on error.
+ */
+int br_sslio_read_all(br_sslio_context *cc, void *dst, size_t len);
+
+/**
+ * \brief Write some application data unto a SSL connection.
+ *
+ * If `len` is zero, then this function returns 0 immediately. In
+ * all other cases, it never returns 0.
+ *
+ * This call returns only when at least one byte has been written.
+ * Returned value is the number of bytes written, or -1 on error. The
+ * number of bytes always fits on an 'int' (less than 20000).
+ *
+ * On error or SSL closure, this function returns -1. The caller should
+ * inspect the error status on the SSL engine to distinguish between
+ * normal closure and error.
+ *
+ * **Important:** SSL is buffered; a "written" byte is a byte that was
+ * injected into the wrapped SSL engine, but this does not necessarily mean
+ * that it has been scheduled for sending. Use `br_sslio_flush()` to
+ * ensure that all pending data has been sent to the transport medium.
+ *
+ * \param cc    SSL wrapper context.
+ * \param src   source buffer for application data.
+ * \param len   maximum number of bytes to write.
+ * \return  number of bytes written, or -1 on error.
+ */
+int br_sslio_write(br_sslio_context *cc, const void *src, size_t len);
+
+/**
+ * \brief Write application data unto a SSL connection.
+ *
+ * This calls returns only when _all_ requested `len` bytes have been
+ * written, or an error is reached. Returned value is 0 on success, -1
+ * on error. A normal (verified) SSL closure before that many bytes are
+ * written is reported as an error by this function.
+ *
+ * **Important:** SSL is buffered; a "written" byte is a byte that was
+ * injected into the wrapped SSL engine, but this does not necessarily mean
+ * that it has been scheduled for sending. Use `br_sslio_flush()` to
+ * ensure that all pending data has been sent to the transport medium.
+ *
+ * \param cc    SSL wrapper context.
+ * \param src   source buffer for application data.
+ * \param len   number of bytes to write.
+ * \return  0 on success, or -1 on error.
+ */
+int br_sslio_write_all(br_sslio_context *cc, const void *src, size_t len);
+
+/**
+ * \brief Flush pending data.
+ *
+ * This call makes sure that any buffered application data in the
+ * provided context (including the wrapped SSL engine) has been sent
+ * to the transport medium (i.e. accepted by the `low_write()` callback
+ * method). If there is no such pending data, then this function does
+ * nothing (and returns a success, i.e. 0).
+ *
+ * If the underlying transport medium has its own buffers, then it is
+ * up to the caller to ensure the corresponding flushing.
+ *
+ * Returned value is 0 on success, -1 on error.
+ *
+ * \param cc    SSL wrapper context.
+ * \return  0 on success, or -1 on error.
+ */
+int br_sslio_flush(br_sslio_context *cc);
+
+/**
+ * \brief Close the SSL connection.
+ *
+ * This call runs the SSL closure protocol (sending a `close_notify`,
+ * receiving the response `close_notify`). When it returns, the SSL
+ * connection is finished. It is still up to the caller to manage the
+ * possible transport-level termination, if applicable (alternatively,
+ * the underlying transport stream may be reused for non-SSL messages).
+ *
+ * Returned value is 0 on success, -1 on error. A failure by the peer
+ * to process the complete closure protocol (i.e. sending back the
+ * `close_notify`) is an error.
+ *
+ * \param cc    SSL wrapper context.
+ * \return  0 on success, or -1 on error.
+ */
+int br_sslio_close(br_sslio_context *cc);
+
+/* ===================================================================== */
+
+/*
+ * Symbolic constants for cipher suites.
+ */
+
+/* From RFC 5246 */
+#define BR_TLS_NULL_WITH_NULL_NULL                   0x0000
+#define BR_TLS_RSA_WITH_NULL_MD5                     0x0001
+#define BR_TLS_RSA_WITH_NULL_SHA                     0x0002
+#define BR_TLS_RSA_WITH_NULL_SHA256                  0x003B
+#define BR_TLS_RSA_WITH_RC4_128_MD5                  0x0004
+#define BR_TLS_RSA_WITH_RC4_128_SHA                  0x0005
+#define BR_TLS_RSA_WITH_3DES_EDE_CBC_SHA             0x000A
+#define BR_TLS_RSA_WITH_AES_128_CBC_SHA              0x002F
+#define BR_TLS_RSA_WITH_AES_256_CBC_SHA              0x0035
+#define BR_TLS_RSA_WITH_AES_128_CBC_SHA256           0x003C
+#define BR_TLS_RSA_WITH_AES_256_CBC_SHA256           0x003D
+#define BR_TLS_DH_DSS_WITH_3DES_EDE_CBC_SHA          0x000D
+#define BR_TLS_DH_RSA_WITH_3DES_EDE_CBC_SHA          0x0010
+#define BR_TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA         0x0013
+#define BR_TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA         0x0016
+#define BR_TLS_DH_DSS_WITH_AES_128_CBC_SHA           0x0030
+#define BR_TLS_DH_RSA_WITH_AES_128_CBC_SHA           0x0031
+#define BR_TLS_DHE_DSS_WITH_AES_128_CBC_SHA          0x0032
+#define BR_TLS_DHE_RSA_WITH_AES_128_CBC_SHA          0x0033
+#define BR_TLS_DH_DSS_WITH_AES_256_CBC_SHA           0x0036
+#define BR_TLS_DH_RSA_WITH_AES_256_CBC_SHA           0x0037
+#define BR_TLS_DHE_DSS_WITH_AES_256_CBC_SHA          0x0038
+#define BR_TLS_DHE_RSA_WITH_AES_256_CBC_SHA          0x0039
+#define BR_TLS_DH_DSS_WITH_AES_128_CBC_SHA256        0x003E
+#define BR_TLS_DH_RSA_WITH_AES_128_CBC_SHA256        0x003F
+#define BR_TLS_DHE_DSS_WITH_AES_128_CBC_SHA256       0x0040
+#define BR_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256       0x0067
+#define BR_TLS_DH_DSS_WITH_AES_256_CBC_SHA256        0x0068
+#define BR_TLS_DH_RSA_WITH_AES_256_CBC_SHA256        0x0069
+#define BR_TLS_DHE_DSS_WITH_AES_256_CBC_SHA256       0x006A
+#define BR_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256       0x006B
+#define BR_TLS_DH_anon_WITH_RC4_128_MD5              0x0018
+#define BR_TLS_DH_anon_WITH_3DES_EDE_CBC_SHA         0x001B
+#define BR_TLS_DH_anon_WITH_AES_128_CBC_SHA          0x0034
+#define BR_TLS_DH_anon_WITH_AES_256_CBC_SHA          0x003A
+#define BR_TLS_DH_anon_WITH_AES_128_CBC_SHA256       0x006C
+#define BR_TLS_DH_anon_WITH_AES_256_CBC_SHA256       0x006D
+
+/* From RFC 4492 */
+#define BR_TLS_ECDH_ECDSA_WITH_NULL_SHA              0xC001
+#define BR_TLS_ECDH_ECDSA_WITH_RC4_128_SHA           0xC002
+#define BR_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA      0xC003
+#define BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA       0xC004
+#define BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA       0xC005
+#define BR_TLS_ECDHE_ECDSA_WITH_NULL_SHA             0xC006
+#define BR_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA          0xC007
+#define BR_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA     0xC008
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA      0xC009
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA      0xC00A
+#define BR_TLS_ECDH_RSA_WITH_NULL_SHA                0xC00B
+#define BR_TLS_ECDH_RSA_WITH_RC4_128_SHA             0xC00C
+#define BR_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA        0xC00D
+#define BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA         0xC00E
+#define BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA         0xC00F
+#define BR_TLS_ECDHE_RSA_WITH_NULL_SHA               0xC010
+#define BR_TLS_ECDHE_RSA_WITH_RC4_128_SHA            0xC011
+#define BR_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA       0xC012
+#define BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA        0xC013
+#define BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA        0xC014
+#define BR_TLS_ECDH_anon_WITH_NULL_SHA               0xC015
+#define BR_TLS_ECDH_anon_WITH_RC4_128_SHA            0xC016
+#define BR_TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA       0xC017
+#define BR_TLS_ECDH_anon_WITH_AES_128_CBC_SHA        0xC018
+#define BR_TLS_ECDH_anon_WITH_AES_256_CBC_SHA        0xC019
+
+/* From RFC 5288 */
+#define BR_TLS_RSA_WITH_AES_128_GCM_SHA256           0x009C
+#define BR_TLS_RSA_WITH_AES_256_GCM_SHA384           0x009D
+#define BR_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256       0x009E
+#define BR_TLS_DHE_RSA_WITH_AES_256_GCM_SHA384       0x009F
+#define BR_TLS_DH_RSA_WITH_AES_128_GCM_SHA256        0x00A0
+#define BR_TLS_DH_RSA_WITH_AES_256_GCM_SHA384        0x00A1
+#define BR_TLS_DHE_DSS_WITH_AES_128_GCM_SHA256       0x00A2
+#define BR_TLS_DHE_DSS_WITH_AES_256_GCM_SHA384       0x00A3
+#define BR_TLS_DH_DSS_WITH_AES_128_GCM_SHA256        0x00A4
+#define BR_TLS_DH_DSS_WITH_AES_256_GCM_SHA384        0x00A5
+#define BR_TLS_DH_anon_WITH_AES_128_GCM_SHA256       0x00A6
+#define BR_TLS_DH_anon_WITH_AES_256_GCM_SHA384       0x00A7
+
+/* From RFC 5289 */
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256   0xC023
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384   0xC024
+#define BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256    0xC025
+#define BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384    0xC026
+#define BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256     0xC027
+#define BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384     0xC028
+#define BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256      0xC029
+#define BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384      0xC02A
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256   0xC02B
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384   0xC02C
+#define BR_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256    0xC02D
+#define BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384    0xC02E
+#define BR_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256     0xC02F
+#define BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384     0xC030
+#define BR_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256      0xC031
+#define BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384      0xC032
+
+/* From RFC 6655 and 7251 */
+#define BR_TLS_RSA_WITH_AES_128_CCM                  0xC09C
+#define BR_TLS_RSA_WITH_AES_256_CCM                  0xC09D
+#define BR_TLS_RSA_WITH_AES_128_CCM_8                0xC0A0
+#define BR_TLS_RSA_WITH_AES_256_CCM_8                0xC0A1
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM          0xC0AC
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM          0xC0AD
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8        0xC0AE
+#define BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8        0xC0AF
+
+/* From RFC 7905 */
+#define BR_TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256     0xCCA8
+#define BR_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256   0xCCA9
+#define BR_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256       0xCCAA
+#define BR_TLS_PSK_WITH_CHACHA20_POLY1305_SHA256           0xCCAB
+#define BR_TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256     0xCCAC
+#define BR_TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256       0xCCAD
+#define BR_TLS_RSA_PSK_WITH_CHACHA20_POLY1305_SHA256       0xCCAE
+
+/* From RFC 7507 */
+#define BR_TLS_FALLBACK_SCSV                         0x5600
+
+/*
+ * Symbolic constants for alerts.
+ */
+#define BR_ALERT_CLOSE_NOTIFY                0
+#define BR_ALERT_UNEXPECTED_MESSAGE         10
+#define BR_ALERT_BAD_RECORD_MAC             20
+#define BR_ALERT_RECORD_OVERFLOW            22
+#define BR_ALERT_DECOMPRESSION_FAILURE      30
+#define BR_ALERT_HANDSHAKE_FAILURE          40
+#define BR_ALERT_BAD_CERTIFICATE            42
+#define BR_ALERT_UNSUPPORTED_CERTIFICATE    43
+#define BR_ALERT_CERTIFICATE_REVOKED        44
+#define BR_ALERT_CERTIFICATE_EXPIRED        45
+#define BR_ALERT_CERTIFICATE_UNKNOWN        46
+#define BR_ALERT_ILLEGAL_PARAMETER          47
+#define BR_ALERT_UNKNOWN_CA                 48
+#define BR_ALERT_ACCESS_DENIED              49
+#define BR_ALERT_DECODE_ERROR               50
+#define BR_ALERT_DECRYPT_ERROR              51
+#define BR_ALERT_PROTOCOL_VERSION           70
+#define BR_ALERT_INSUFFICIENT_SECURITY      71
+#define BR_ALERT_INTERNAL_ERROR             80
+#define BR_ALERT_USER_CANCELED              90
+#define BR_ALERT_NO_RENEGOTIATION          100
+#define BR_ALERT_UNSUPPORTED_EXTENSION     110
+#define BR_ALERT_NO_APPLICATION_PROTOCOL   120
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/inc/bearssl_x509.h b/third_party/bearssl/inc/bearssl_x509.h
new file mode 100644
index 0000000..7668e1d
--- /dev/null
+++ b/third_party/bearssl/inc/bearssl_x509.h
@@ -0,0 +1,1474 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_X509_H__
+#define BR_BEARSSL_X509_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_ec.h"
+#include "bearssl_hash.h"
+#include "bearssl_rsa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_x509.h
+ *
+ * # X.509 Certificate Chain Processing
+ *
+ * An X.509 processing engine receives an X.509 chain, chunk by chunk,
+ * as received from a SSL/TLS client or server (the client receives the
+ * server's certificate chain, and the server receives the client's
+ * certificate chain if it requested a client certificate). The chain
+ * is thus injected in the engine in SSL order (end-entity first).
+ *
+ * The engine's job is to return the public key to use for SSL/TLS.
+ * How exactly that key is obtained and verified is entirely up to the
+ * engine.
+ *
+ * **The "known key" engine** returns a public key which is already known
+ * from out-of-band information (e.g. the client _remembers_ the key from
+ * a previous connection, as in the usual SSH model). This is the simplest
+ * engine since it simply ignores the chain, thereby avoiding the need
+ * for any decoding logic.
+ *
+ * **The "minimal" engine** implements minimal X.509 decoding and chain
+ * validation:
+ *
+ *   - The provided chain should validate "as is". There is no attempt
+ *     at reordering, skipping or downloading extra certificates.
+ *
+ *   - X.509 v1, v2 and v3 certificates are supported.
+ *
+ *   - Trust anchors are a DN and a public key. Each anchor is either a
+ *     "CA" anchor, or a non-CA.
+ *
+ *   - If the end-entity certificate matches a non-CA anchor (subject DN
+ *     is equal to the non-CA name, and public key is also identical to
+ *     the anchor key), then this is a _direct trust_ case and the
+ *     remaining certificates are ignored.
+ *
+ *   - Unless direct trust is applied, the chain must be verifiable up to
+ *     a certificate whose issuer DN matches the DN from a "CA" trust anchor,
+ *     and whose signature is verifiable against that anchor's public key.
+ *     Subsequent certificates in the chain are ignored.
+ *
+ *   - The engine verifies subject/issuer DN matching, and enforces
+ *     processing of Basic Constraints and Key Usage extensions. The
+ *     Authority Key Identifier, Subject Key Identifier, Issuer Alt Name,
+ *     Subject Directory Attribute, CRL Distribution Points, Freshest CRL,
+ *     Authority Info Access and Subject Info Access extensions are
+ *     ignored. The Subject Alt Name is decoded for the end-entity
+ *     certificate under some conditions (see below). Other extensions
+ *     are ignored if non-critical, or imply chain rejection if critical.
+ *
+ *   - The Subject Alt Name extension is parsed for names of type `dNSName`
+ *     when decoding the end-entity certificate, and only if there is a
+ *     server name to match. If there is no SAN extension, then the
+ *     Common Name from the subjectDN is used. That name matching is
+ *     case-insensitive and honours a single starting wildcard (i.e. if
+ *     the name in the certificate starts with "`*.`" then this matches
+ *     any word as first element). Note: this name matching is performed
+ *     also in the "direct trust" model.
+ *
+ *   - DN matching is byte-to-byte equality (a future version might
+ *     include some limited processing for case-insensitive matching and
+ *     whitespace normalisation).
+ *
+ *   - Successful validation produces a public key type but also a set
+ *     of allowed usages (`BR_KEYTYPE_KEYX` and/or `BR_KEYTYPE_SIGN`).
+ *     The caller is responsible for checking that the key type and
+ *     usages are compatible with the expected values (e.g. with the
+ *     selected cipher suite, when the client validates the server's
+ *     certificate).
+ *
+ * **Important caveats:**
+ *
+ *   - The "minimal" engine does not check revocation status. The relevant
+ *     extensions are ignored, and CRL or OCSP responses are not gathered
+ *     or checked.
+ *
+ *   - The "minimal" engine does not currently support Name Constraints
+ *     (some basic functionality to handle sub-domains may be added in a
+ *     later version).
+ *
+ *   - The decoder is not "validating" in the sense that it won't reject
+ *     some certificates with invalid field values when these fields are
+ *     not actually processed.
+ */
+
+/*
+ * X.509 error codes are in the 32..63 range.
+ */
+
+/** \brief X.509 status: validation was successful; this is not actually
+    an error. */
+#define BR_ERR_X509_OK                    32
+
+/** \brief X.509 status: invalid value in an ASN.1 structure. */
+#define BR_ERR_X509_INVALID_VALUE         33
+
+/** \brief X.509 status: truncated certificate. */
+#define BR_ERR_X509_TRUNCATED             34
+
+/** \brief X.509 status: empty certificate chain (no certificate at all). */
+#define BR_ERR_X509_EMPTY_CHAIN           35
+
+/** \brief X.509 status: decoding error: inner element extends beyond
+    outer element size. */
+#define BR_ERR_X509_INNER_TRUNC           36
+
+/** \brief X.509 status: decoding error: unsupported tag class (application
+    or private). */
+#define BR_ERR_X509_BAD_TAG_CLASS         37
+
+/** \brief X.509 status: decoding error: unsupported tag value. */
+#define BR_ERR_X509_BAD_TAG_VALUE         38
+
+/** \brief X.509 status: decoding error: indefinite length. */
+#define BR_ERR_X509_INDEFINITE_LENGTH     39
+
+/** \brief X.509 status: decoding error: extraneous element. */
+#define BR_ERR_X509_EXTRA_ELEMENT         40
+
+/** \brief X.509 status: decoding error: unexpected element. */
+#define BR_ERR_X509_UNEXPECTED            41
+
+/** \brief X.509 status: decoding error: expected constructed element, but
+    is primitive. */
+#define BR_ERR_X509_NOT_CONSTRUCTED       42
+
+/** \brief X.509 status: decoding error: expected primitive element, but
+    is constructed. */
+#define BR_ERR_X509_NOT_PRIMITIVE         43
+
+/** \brief X.509 status: decoding error: BIT STRING length is not multiple
+    of 8. */
+#define BR_ERR_X509_PARTIAL_BYTE          44
+
+/** \brief X.509 status: decoding error: BOOLEAN value has invalid length. */
+#define BR_ERR_X509_BAD_BOOLEAN           45
+
+/** \brief X.509 status: decoding error: value is off-limits. */
+#define BR_ERR_X509_OVERFLOW              46
+
+/** \brief X.509 status: invalid distinguished name. */
+#define BR_ERR_X509_BAD_DN                47
+
+/** \brief X.509 status: invalid date/time representation. */
+#define BR_ERR_X509_BAD_TIME              48
+
+/** \brief X.509 status: certificate contains unsupported features that
+    cannot be ignored. */
+#define BR_ERR_X509_UNSUPPORTED           49
+
+/** \brief X.509 status: key or signature size exceeds internal limits. */
+#define BR_ERR_X509_LIMIT_EXCEEDED        50
+
+/** \brief X.509 status: key type does not match that which was expected. */
+#define BR_ERR_X509_WRONG_KEY_TYPE        51
+
+/** \brief X.509 status: signature is invalid. */
+#define BR_ERR_X509_BAD_SIGNATURE         52
+
+/** \brief X.509 status: validation time is unknown. */
+#define BR_ERR_X509_TIME_UNKNOWN          53
+
+/** \brief X.509 status: certificate is expired or not yet valid. */
+#define BR_ERR_X509_EXPIRED               54
+
+/** \brief X.509 status: issuer/subject DN mismatch in the chain. */
+#define BR_ERR_X509_DN_MISMATCH           55
+
+/** \brief X.509 status: expected server name was not found in the chain. */
+#define BR_ERR_X509_BAD_SERVER_NAME       56
+
+/** \brief X.509 status: unknown critical extension in certificate. */
+#define BR_ERR_X509_CRITICAL_EXTENSION    57
+
+/** \brief X.509 status: not a CA, or path length constraint violation */
+#define BR_ERR_X509_NOT_CA                58
+
+/** \brief X.509 status: Key Usage extension prohibits intended usage. */
+#define BR_ERR_X509_FORBIDDEN_KEY_USAGE   59
+
+/** \brief X.509 status: public key found in certificate is too small. */
+#define BR_ERR_X509_WEAK_PUBLIC_KEY       60
+
+/** \brief X.509 status: chain could not be linked to a trust anchor. */
+#define BR_ERR_X509_NOT_TRUSTED           62
+
+/**
+ * \brief Aggregate structure for public keys.
+ */
+typedef struct {
+	/** \brief Key type: `BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC` */
+	unsigned char key_type;
+	/** \brief Actual public key. */
+	union {
+		/** \brief RSA public key. */
+		br_rsa_public_key rsa;
+		/** \brief EC public key. */
+		br_ec_public_key ec;
+	} key;
+} br_x509_pkey;
+
+/**
+ * \brief Distinguished Name (X.500) structure.
+ *
+ * The DN is DER-encoded.
+ */
+typedef struct {
+	/** \brief Encoded DN data. */
+	unsigned char *data;
+	/** \brief Encoded DN length (in bytes). */
+	size_t len;
+} br_x500_name;
+
+/**
+ * \brief Trust anchor structure.
+ */
+typedef struct {
+	/** \brief Encoded DN (X.500 name). */
+	br_x500_name dn;
+	/** \brief Anchor flags (e.g. `BR_X509_TA_CA`). */
+	unsigned flags;
+	/** \brief Anchor public key. */
+	br_x509_pkey pkey;
+} br_x509_trust_anchor;
+
+/**
+ * \brief Trust anchor flag: CA.
+ *
+ * A "CA" anchor is deemed fit to verify signatures on certificates.
+ * A "non-CA" anchor is accepted only for direct trust (server's
+ * certificate name and key match the anchor).
+ */
+#define BR_X509_TA_CA        0x0001
+
+/*
+ * Key type: combination of a basic key type (low 4 bits) and some
+ * optional flags.
+ *
+ * For a public key, the basic key type only is set.
+ *
+ * For an expected key type, the flags indicate the intended purpose(s)
+ * for the key; the basic key type may be set to 0 to indicate that any
+ * key type compatible with the indicated purpose is acceptable.
+ */
+/** \brief Key type: algorithm is RSA. */
+#define BR_KEYTYPE_RSA    1
+/** \brief Key type: algorithm is EC. */
+#define BR_KEYTYPE_EC     2
+
+/**
+ * \brief Key type: usage is "key exchange".
+ *
+ * This value is combined (with bitwise OR) with the algorithm
+ * (`BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC`) when informing the X.509
+ * validation engine that it should find a public key of that type,
+ * fit for key exchanges (e.g. `TLS_RSA_*` and `TLS_ECDH_*` cipher
+ * suites).
+ */
+#define BR_KEYTYPE_KEYX   0x10
+
+/**
+ * \brief Key type: usage is "signature".
+ *
+ * This value is combined (with bitwise OR) with the algorithm
+ * (`BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC`) when informing the X.509
+ * validation engine that it should find a public key of that type,
+ * fit for signatures (e.g. `TLS_ECDHE_*` cipher suites).
+ */
+#define BR_KEYTYPE_SIGN   0x20
+
+/*
+ * start_chain   Called when a new chain is started. If 'server_name'
+ *               is not NULL and non-empty, then it is a name that
+ *               should be looked for in the EE certificate (in the
+ *               SAN extension as dNSName, or in the subjectDN's CN
+ *               if there is no SAN extension).
+ *               The caller ensures that the provided 'server_name'
+ *               pointer remains valid throughout validation.
+ *
+ * start_cert    Begins a new certificate in the chain. The provided
+ *               length is in bytes; this is the total certificate length.
+ *
+ * append        Get some additional bytes for the current certificate.
+ *
+ * end_cert      Ends the current certificate.
+ *
+ * end_chain     Called at the end of the chain. Returned value is
+ *               0 on success, or a non-zero error code.
+ *
+ * get_pkey      Returns the EE certificate public key.
+ *
+ * For a complete chain, start_chain() and end_chain() are always
+ * called. For each certificate, start_cert(), some append() calls, then
+ * end_cert() are called, in that order. There may be no append() call
+ * at all if the certificate is empty (which is not valid but may happen
+ * if the peer sends exactly that).
+ *
+ * get_pkey() shall return a pointer to a structure that is valid as
+ * long as a new chain is not started. This may be a sub-structure
+ * within the context for the engine. This function MAY return a valid
+ * pointer to a public key even in some cases of validation failure,
+ * depending on the validation engine.
+ */
+
+/**
+ * \brief Class type for an X.509 engine.
+ *
+ * A certificate chain validation uses a caller-allocated context, which
+ * contains the running state for that validation. Methods are called
+ * in due order:
+ *
+ *   - `start_chain()` is called at the start of the validation.
+ *   - Certificates are processed one by one, in SSL order (end-entity
+ *     comes first). For each certificate, the following methods are
+ *     called:
+ *
+ *       - `start_cert()` at the beginning of the certificate.
+ *       - `append()` is called zero, one or more times, to provide
+ *         the certificate (possibly in chunks).
+ *       - `end_cert()` at the end of the certificate.
+ *
+ *   - `end_chain()` is called when the last certificate in the chain
+ *     was processed.
+ *   - `get_pkey()` is called after chain processing, if the chain
+ *     validation was successful.
+ *
+ * A context structure may be reused; the `start_chain()` method shall
+ * ensure (re)initialisation.
+ */
+typedef struct br_x509_class_ br_x509_class;
+struct br_x509_class_ {
+	/**
+	 * \brief X.509 context size, in bytes.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Start a new chain.
+	 *
+	 * This method shall set the vtable (first field) of the context
+	 * structure.
+	 *
+	 * The `server_name`, if not `NULL`, will be considered as a
+	 * fully qualified domain name, to be matched against the `dNSName`
+	 * elements of the end-entity certificate's SAN extension (if there
+	 * is no SAN, then the Common Name from the subjectDN will be used).
+	 * If `server_name` is `NULL` then no such matching is performed.
+	 *
+	 * \param ctx           validation context.
+	 * \param server_name   server name to match (or `NULL`).
+	 */
+	void (*start_chain)(const br_x509_class **ctx,
+		const char *server_name);
+
+	/**
+	 * \brief Start a new certificate.
+	 *
+	 * \param ctx      validation context.
+	 * \param length   new certificate length (in bytes).
+	 */
+	void (*start_cert)(const br_x509_class **ctx, uint32_t length);
+
+	/**
+	 * \brief Receive some bytes for the current certificate.
+	 *
+	 * This function may be called several times in succession for
+	 * a given certificate. The caller guarantees that for each
+	 * call, `len` is not zero, and the sum of all chunk lengths
+	 * for a certificate matches the total certificate length which
+	 * was provided in the previous `start_cert()` call.
+	 *
+	 * If the new certificate is empty (no byte at all) then this
+	 * function won't be called at all.
+	 *
+	 * \param ctx   validation context.
+	 * \param buf   certificate data chunk.
+	 * \param len   certificate data chunk length (in bytes).
+	 */
+	void (*append)(const br_x509_class **ctx,
+		const unsigned char *buf, size_t len);
+
+	/**
+	 * \brief Finish the current certificate.
+	 *
+	 * This function is called when the end of the current certificate
+	 * is reached.
+	 *
+	 * \param ctx   validation context.
+	 */
+	void (*end_cert)(const br_x509_class **ctx);
+
+	/**
+	 * \brief Finish the chain.
+	 *
+	 * This function is called at the end of the chain. It shall
+	 * return either 0 if the validation was successful, or a
+	 * non-zero error code. The `BR_ERR_X509_*` constants are
+	 * error codes, though other values may be possible.
+	 *
+	 * \param ctx   validation context.
+	 * \return  0 on success, or a non-zero error code.
+	 */
+	unsigned (*end_chain)(const br_x509_class **ctx);
+
+	/**
+	 * \brief Get the resulting end-entity public key.
+	 *
+	 * The decoded public key is returned. The returned pointer
+	 * may be valid only as long as the context structure is
+	 * unmodified, i.e. it may cease to be valid if the context
+	 * is released or reused.
+	 *
+	 * This function _may_ return `NULL` if the validation failed.
+	 * However, returning a public key does not mean that the
+	 * validation was wholly successful; some engines may return
+	 * a decoded public key even if the chain did not end on a
+	 * trusted anchor.
+	 *
+	 * If validation succeeded and `usage` is not `NULL`, then
+	 * `*usage` is filled with a combination of `BR_KEYTYPE_SIGN`
+	 * and/or `BR_KEYTYPE_KEYX` that specifies the validated key
+	 * usage types. It is the caller's responsibility to check
+	 * that value against the intended use of the public key.
+	 *
+	 * \param ctx   validation context.
+	 * \return  the end-entity public key, or `NULL`.
+	 */
+	const br_x509_pkey *(*get_pkey)(
+		const br_x509_class *const *ctx, unsigned *usages);
+};
+
+/**
+ * \brief The "known key" X.509 engine structure.
+ *
+ * The structure contents are opaque (they shall not be accessed directly),
+ * except for the first field (the vtable).
+ *
+ * The "known key" engine returns an externally configured public key,
+ * and totally ignores the certificate contents.
+ */
+typedef struct {
+	/** \brief Reference to the context vtable. */
+	const br_x509_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	br_x509_pkey pkey;
+	unsigned usages;
+#endif
+} br_x509_knownkey_context;
+
+/**
+ * \brief Class instance for the "known key" X.509 engine.
+ */
+extern const br_x509_class br_x509_knownkey_vtable;
+
+/**
+ * \brief Initialize a "known key" X.509 engine with a known RSA public key.
+ *
+ * The `usages` parameter indicates the allowed key usages for that key
+ * (`BR_KEYTYPE_KEYX` and/or `BR_KEYTYPE_SIGN`).
+ *
+ * The provided pointers are linked in, not copied, so they must remain
+ * valid while the public key may be in usage.
+ *
+ * \param ctx      context to initialise.
+ * \param pk       known public key.
+ * \param usages   allowed key usages.
+ */
+void br_x509_knownkey_init_rsa(br_x509_knownkey_context *ctx,
+	const br_rsa_public_key *pk, unsigned usages);
+
+/**
+ * \brief Initialize a "known key" X.509 engine with a known EC public key.
+ *
+ * The `usages` parameter indicates the allowed key usages for that key
+ * (`BR_KEYTYPE_KEYX` and/or `BR_KEYTYPE_SIGN`).
+ *
+ * The provided pointers are linked in, not copied, so they must remain
+ * valid while the public key may be in usage.
+ *
+ * \param ctx      context to initialise.
+ * \param pk       known public key.
+ * \param usages   allowed key usages.
+ */
+void br_x509_knownkey_init_ec(br_x509_knownkey_context *ctx,
+	const br_ec_public_key *pk, unsigned usages);
+
+#ifndef BR_DOXYGEN_IGNORE
+/*
+ * The minimal X.509 engine has some state buffers which must be large
+ * enough to simultaneously accommodate:
+ * -- the public key extracted from the current certificate;
+ * -- the signature on the current certificate or on the previous
+ *    certificate;
+ * -- the public key extracted from the EE certificate.
+ *
+ * We store public key elements in their raw unsigned big-endian
+ * encoding. We want to support up to RSA-4096 with a short (up to 64
+ * bits) public exponent, thus a buffer for a public key must have
+ * length at least 520 bytes. Similarly, a RSA-4096 signature has length
+ * 512 bytes.
+ *
+ * Though RSA public exponents can formally be as large as the modulus
+ * (mathematically, even larger exponents would work, but PKCS#1 forbids
+ * them), exponents that do not fit on 32 bits are extremely rare,
+ * notably because some widespread implementations (e.g. Microsoft's
+ * CryptoAPI) don't support them. Moreover, large public exponent do not
+ * seem to imply any tangible security benefit, and they increase the
+ * cost of public key operations. The X.509 "minimal" engine will tolerate
+ * public exponents of arbitrary size as long as the modulus and the
+ * exponent can fit together in the dedicated buffer.
+ *
+ * EC public keys are shorter than RSA public keys; even with curve
+ * NIST P-521 (the largest curve we care to support), a public key is
+ * encoded over 133 bytes only.
+ */
+#define BR_X509_BUFSIZE_KEY   520
+#define BR_X509_BUFSIZE_SIG   512
+#endif
+
+/**
+ * \brief Type for receiving a name element.
+ *
+ * An array of such structures can be provided to the X.509 decoding
+ * engines. If the specified elements are found in the certificate
+ * subject DN or the SAN extension, then the name contents are copied
+ * as zero-terminated strings into the buffer.
+ *
+ * The decoder converts TeletexString and BMPString to UTF8String, and
+ * ensures that the resulting string is zero-terminated. If the string
+ * does not fit in the provided buffer, then the copy is aborted and an
+ * error is reported.
+ */
+typedef struct {
+	/**
+	 * \brief Element OID.
+	 *
+	 * For X.500 name elements (to be extracted from the subject DN),
+	 * this is the encoded OID for the requested name element; the
+	 * first byte shall contain the length of the DER-encoded OID
+	 * value, followed by the OID value (for instance, OID 2.5.4.3,
+	 * for id-at-commonName, will be `03 55 04 03`). This is
+	 * equivalent to full DER encoding with the length but without
+	 * the tag.
+	 *
+	 * For SAN name elements, the first byte (`oid[0]`) has value 0,
+	 * followed by another byte that matches the expected GeneralName
+	 * tag. Allowed second byte values are then:
+	 *
+	 *   - 1: `rfc822Name`
+	 *
+	 *   - 2: `dNSName`
+	 *
+	 *   - 6: `uniformResourceIdentifier`
+	 *
+	 *   - 0: `otherName`
+	 *
+	 * If first and second byte are 0, then this is a SAN element of
+	 * type `otherName`; the `oid[]` array should then contain, right
+	 * after the two bytes of value 0, an encoded OID (with the same
+	 * conventions as for X.500 name elements). If a match is found
+	 * for that OID, then the corresponding name element will be
+	 * extracted, as long as it is a supported string type.
+	 */
+	const unsigned char *oid;
+
+	/**
+	 * \brief Destination buffer.
+	 */
+	char *buf;
+
+	/**
+	 * \brief Length (in bytes) of the destination buffer.
+	 *
+	 * The buffer MUST NOT be smaller than 1 byte.
+	 */
+	size_t len;
+
+	/**
+	 * \brief Decoding status.
+	 *
+	 * Status is 0 if the name element was not found, 1 if it was
+	 * found and decoded, or -1 on error. Error conditions include
+	 * an unrecognised encoding, an invalid encoding, or a string
+	 * too large for the destination buffer.
+	 */
+	int status;
+
+} br_name_element;
+
+/**
+ * \brief Callback for validity date checks.
+ *
+ * The function receives as parameter an arbitrary user-provided context,
+ * and the notBefore and notAfter dates specified in an X.509 certificate,
+ * both expressed as a number of days and a number of seconds:
+ *
+ *   - Days are counted in a proleptic Gregorian calendar since
+ *     January 1st, 0 AD. Year "0 AD" is the one that preceded "1 AD";
+ *     it is also traditionally known as "1 BC".
+ *
+ *   - Seconds are counted since midnight, from 0 to 86400 (a count of
+ *     86400 is possible only if a leap second happened).
+ *
+ * Each date and time is understood in the UTC time zone. The "Unix
+ * Epoch" (January 1st, 1970, 00:00 UTC) corresponds to days=719528 and
+ * seconds=0; the "Windows Epoch" (January 1st, 1601, 00:00 UTC) is
+ * days=584754, seconds=0.
+ *
+ * This function must return -1 if the current date is strictly before
+ * the "notBefore" time, or +1 if the current date is strictly after the
+ * "notAfter" time. If neither condition holds, then the function returns
+ * 0, which means that the current date falls within the validity range of
+ * the certificate. If the function returns a value distinct from -1, 0
+ * and +1, then this is interpreted as an unavailability of the current
+ * time, which normally ends the validation process with a
+ * `BR_ERR_X509_TIME_UNKNOWN` error.
+ *
+ * During path validation, this callback will be invoked for each
+ * considered X.509 certificate. Validation fails if any of the calls
+ * returns a non-zero value.
+ *
+ * The context value is an abritrary pointer set by the caller when
+ * configuring this callback.
+ *
+ * \param tctx                 context pointer.
+ * \param not_before_days      notBefore date (days since Jan 1st, 0 AD).
+ * \param not_before_seconds   notBefore time (seconds, at most 86400).
+ * \param not_after_days       notAfter date (days since Jan 1st, 0 AD).
+ * \param not_after_seconds    notAfter time (seconds, at most 86400).
+ * \return  -1, 0 or +1.
+ */
+typedef int (*br_x509_time_check)(void *tctx,
+	uint32_t not_before_days, uint32_t not_before_seconds,
+	uint32_t not_after_days, uint32_t not_after_seconds);
+
+/**
+ * \brief The "minimal" X.509 engine structure.
+ *
+ * The structure contents are opaque (they shall not be accessed directly),
+ * except for the first field (the vtable).
+ *
+ * The "minimal" engine performs a rudimentary but serviceable X.509 path
+ * validation.
+ */
+typedef struct {
+	const br_x509_class *vtable;
+
+#ifndef BR_DOXYGEN_IGNORE
+	/* Structure for returning the EE public key. */
+	br_x509_pkey pkey;
+
+	/* CPU for the T0 virtual machine. */
+	struct {
+		uint32_t *dp;
+		uint32_t *rp;
+		const unsigned char *ip;
+	} cpu;
+	uint32_t dp_stack[31];
+	uint32_t rp_stack[31];
+	int err;
+
+	/* Server name to match with the SAN / CN of the EE certificate. */
+	const char *server_name;
+
+	/* Validated key usages. */
+	unsigned char key_usages;
+
+	/* Explicitly set date and time. */
+	uint32_t days, seconds;
+
+	/* Current certificate length (in bytes). Set to 0 when the
+	   certificate has been fully processed. */
+	uint32_t cert_length;
+
+	/* Number of certificates processed so far in the current chain.
+	   It is incremented at the end of the processing of a certificate,
+	   so it is 0 for the EE. */
+	uint32_t num_certs;
+
+	/* Certificate data chunk. */
+	const unsigned char *hbuf;
+	size_t hlen;
+
+	/* The pad serves as destination for various operations. */
+	unsigned char pad[256];
+
+	/* Buffer for EE public key data. */
+	unsigned char ee_pkey_data[BR_X509_BUFSIZE_KEY];
+
+	/* Buffer for currently decoded public key. */
+	unsigned char pkey_data[BR_X509_BUFSIZE_KEY];
+
+	/* Signature type: signer key type, offset to the hash
+	   function OID (in the T0 data block) and hash function
+	   output length (TBS hash length). */
+	unsigned char cert_signer_key_type;
+	uint16_t cert_sig_hash_oid;
+	unsigned char cert_sig_hash_len;
+
+	/* Current/last certificate signature. */
+	unsigned char cert_sig[BR_X509_BUFSIZE_SIG];
+	uint16_t cert_sig_len;
+
+	/* Minimum RSA key length (difference in bytes from 128). */
+	int16_t min_rsa_size;
+
+	/* Configured trust anchors. */
+	const br_x509_trust_anchor *trust_anchors;
+	size_t trust_anchors_num;
+
+	/*
+	 * Multi-hasher for the TBS.
+	 */
+	unsigned char do_mhash;
+	br_multihash_context mhash;
+	unsigned char tbs_hash[64];
+
+	/*
+	 * Simple hasher for the subject/issuer DN.
+	 */
+	unsigned char do_dn_hash;
+	const br_hash_class *dn_hash_impl;
+	br_hash_compat_context dn_hash;
+	unsigned char current_dn_hash[64];
+	unsigned char next_dn_hash[64];
+	unsigned char saved_dn_hash[64];
+
+	/*
+	 * Name elements to gather.
+	 */
+	br_name_element *name_elts;
+	size_t num_name_elts;
+
+	/*
+	 * Callback function (and context) to get the current date.
+	 */
+	void *itime_ctx;
+	br_x509_time_check itime;
+
+	/*
+	 * Public key cryptography implementations (signature verification).
+	 */
+	br_rsa_pkcs1_vrfy irsa;
+	br_ecdsa_vrfy iecdsa;
+	const br_ec_impl *iec;
+#endif
+
+} br_x509_minimal_context;
+
+/**
+ * \brief Class instance for the "minimal" X.509 engine.
+ */
+extern const br_x509_class br_x509_minimal_vtable;
+
+/**
+ * \brief Initialise a "minimal" X.509 engine.
+ *
+ * The `dn_hash_impl` parameter shall be a hash function internally used
+ * to match X.500 names (subject/issuer DN, and anchor names). Any standard
+ * hash function may be used, but a collision-resistant hash function is
+ * advised.
+ *
+ * After initialization, some implementations for signature verification
+ * (hash functions and signature algorithms) MUST be added.
+ *
+ * \param ctx                 context to initialise.
+ * \param dn_hash_impl        hash function for DN comparisons.
+ * \param trust_anchors       trust anchors.
+ * \param trust_anchors_num   number of trust anchors.
+ */
+void br_x509_minimal_init(br_x509_minimal_context *ctx,
+	const br_hash_class *dn_hash_impl,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num);
+
+/**
+ * \brief Set a supported hash function in an X.509 "minimal" engine.
+ *
+ * Hash functions are used with signature verification algorithms.
+ * Once initialised (with `br_x509_minimal_init()`), the context must
+ * be configured with the hash functions it shall support for that
+ * purpose. The hash function identifier MUST be one of the standard
+ * hash function identifiers (1 to 6, for MD5, SHA-1, SHA-224, SHA-256,
+ * SHA-384 and SHA-512).
+ *
+ * If `impl` is `NULL`, this _removes_ support for the designated
+ * hash function.
+ *
+ * \param ctx    validation context.
+ * \param id     hash function identifier (from 1 to 6).
+ * \param impl   hash function implementation (or `NULL`).
+ */
+static inline void
+br_x509_minimal_set_hash(br_x509_minimal_context *ctx,
+	int id, const br_hash_class *impl)
+{
+	br_multihash_setimpl(&ctx->mhash, id, impl);
+}
+
+/**
+ * \brief Set a RSA signature verification implementation in the X.509
+ * "minimal" engine.
+ *
+ * Once initialised (with `br_x509_minimal_init()`), the context must
+ * be configured with the signature verification implementations that
+ * it is supposed to support. If `irsa` is `0`, then the RSA support
+ * is disabled.
+ *
+ * \param ctx    validation context.
+ * \param irsa   RSA signature verification implementation (or `0`).
+ */
+static inline void
+br_x509_minimal_set_rsa(br_x509_minimal_context *ctx,
+	br_rsa_pkcs1_vrfy irsa)
+{
+	ctx->irsa = irsa;
+}
+
+/**
+ * \brief Set a ECDSA signature verification implementation in the X.509
+ * "minimal" engine.
+ *
+ * Once initialised (with `br_x509_minimal_init()`), the context must
+ * be configured with the signature verification implementations that
+ * it is supposed to support.
+ *
+ * If `iecdsa` is `0`, then this call disables ECDSA support; in that
+ * case, `iec` may be `NULL`. Otherwise, `iecdsa` MUST point to a function
+ * that verifies ECDSA signatures with format "asn1", and it will use
+ * `iec` as underlying elliptic curve support.
+ *
+ * \param ctx      validation context.
+ * \param iec      elliptic curve implementation (or `NULL`).
+ * \param iecdsa   ECDSA implementation (or `0`).
+ */
+static inline void
+br_x509_minimal_set_ecdsa(br_x509_minimal_context *ctx,
+	const br_ec_impl *iec, br_ecdsa_vrfy iecdsa)
+{
+	ctx->iecdsa = iecdsa;
+	ctx->iec = iec;
+}
+
+/**
+ * \brief Initialise a "minimal" X.509 engine with default algorithms.
+ *
+ * This function performs the same job as `br_x509_minimal_init()`, but
+ * also sets implementations for RSA, ECDSA, and the standard hash
+ * functions.
+ *
+ * \param ctx                 context to initialise.
+ * \param trust_anchors       trust anchors.
+ * \param trust_anchors_num   number of trust anchors.
+ */
+void br_x509_minimal_init_full(br_x509_minimal_context *ctx,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num);
+
+/**
+ * \brief Set the validation time for the X.509 "minimal" engine.
+ *
+ * The validation time is set as two 32-bit integers, for days and
+ * seconds since a fixed epoch:
+ *
+ *   - Days are counted in a proleptic Gregorian calendar since
+ *     January 1st, 0 AD. Year "0 AD" is the one that preceded "1 AD";
+ *     it is also traditionally known as "1 BC".
+ *
+ *   - Seconds are counted since midnight, from 0 to 86400 (a count of
+ *     86400 is possible only if a leap second happened).
+ *
+ * The validation date and time is understood in the UTC time zone. The
+ * "Unix Epoch" (January 1st, 1970, 00:00 UTC) corresponds to days=719528
+ * and seconds=0; the "Windows Epoch" (January 1st, 1601, 00:00 UTC) is
+ * days=584754, seconds=0.
+ *
+ * If the validation date and time are not explicitly set, but BearSSL
+ * was compiled with support for the system clock on the underlying
+ * platform, then the current time will automatically be used. Otherwise,
+ * not setting the validation date and time implies a validation
+ * failure (except in case of direct trust of the EE key).
+ *
+ * \param ctx       validation context.
+ * \param days      days since January 1st, 0 AD (Gregorian calendar).
+ * \param seconds   seconds since midnight (0 to 86400).
+ */
+static inline void
+br_x509_minimal_set_time(br_x509_minimal_context *ctx,
+	uint32_t days, uint32_t seconds)
+{
+	ctx->days = days;
+	ctx->seconds = seconds;
+	ctx->itime = 0;
+}
+
+/**
+ * \brief Set the validity range callback function for the X.509
+ * "minimal" engine.
+ *
+ * The provided function will be invoked to check whether the validation
+ * date is within the validity range for a given X.509 certificate; a
+ * call will be issued for each considered certificate. The provided
+ * context pointer (itime_ctx) will be passed as first parameter to the
+ * callback.
+ *
+ * \param tctx   context for callback invocation.
+ * \param cb     callback function.
+ */
+static inline void
+br_x509_minimal_set_time_callback(br_x509_minimal_context *ctx,
+	void *itime_ctx, br_x509_time_check itime)
+{
+	ctx->itime_ctx = itime_ctx;
+	ctx->itime = itime;
+}
+
+/**
+ * \brief Set the minimal acceptable length for RSA keys (X.509 "minimal"
+ * engine).
+ *
+ * The RSA key length is expressed in bytes. The default minimum key
+ * length is 128 bytes, corresponding to 1017 bits. RSA keys shorter
+ * than the configured length will be rejected, implying validation
+ * failure. This setting applies to keys extracted from certificates
+ * (both end-entity, and intermediate CA) but not to "CA" trust anchors.
+ *
+ * \param ctx           validation context.
+ * \param byte_length   minimum RSA key length, **in bytes** (not bits).
+ */
+static inline void
+br_x509_minimal_set_minrsa(br_x509_minimal_context *ctx, int byte_length)
+{
+	ctx->min_rsa_size = (int16_t)(byte_length - 128);
+}
+
+/**
+ * \brief Set the name elements to gather.
+ *
+ * The provided array is linked in the context. The elements are
+ * gathered from the EE certificate. If the same element type is
+ * requested several times, then the relevant structures will be filled
+ * in the order the matching values are encountered in the certificate.
+ *
+ * \param ctx        validation context.
+ * \param elts       array of name element structures to fill.
+ * \param num_elts   number of name element structures to fill.
+ */
+static inline void
+br_x509_minimal_set_name_elements(br_x509_minimal_context *ctx,
+	br_name_element *elts, size_t num_elts)
+{
+	ctx->name_elts = elts;
+	ctx->num_name_elts = num_elts;
+}
+
+/**
+ * \brief X.509 decoder context.
+ *
+ * This structure is _not_ for X.509 validation, but for extracting
+ * names and public keys from encoded certificates. Intended usage is
+ * to use (self-signed) certificates as trust anchors.
+ *
+ * Contents are opaque and shall not be accessed directly.
+ */
+typedef struct {
+
+#ifndef BR_DOXYGEN_IGNORE
+	/* Structure for returning the public key. */
+	br_x509_pkey pkey;
+
+	/* CPU for the T0 virtual machine. */
+	struct {
+		uint32_t *dp;
+		uint32_t *rp;
+		const unsigned char *ip;
+	} cpu;
+	uint32_t dp_stack[32];
+	uint32_t rp_stack[32];
+	int err;
+
+	/* The pad serves as destination for various operations. */
+	unsigned char pad[256];
+
+	/* Flag set when decoding succeeds. */
+	unsigned char decoded;
+
+	/* Validity dates. */
+	uint32_t notbefore_days, notbefore_seconds;
+	uint32_t notafter_days, notafter_seconds;
+
+	/* The "CA" flag. This is set to true if the certificate contains
+	   a Basic Constraints extension that asserts CA status. */
+	unsigned char isCA;
+
+	/* DN processing: the subject DN is extracted and pushed to the
+	   provided callback. */
+	unsigned char copy_dn;
+	void *append_dn_ctx;
+	void (*append_dn)(void *ctx, const void *buf, size_t len);
+
+	/* Certificate data chunk. */
+	const unsigned char *hbuf;
+	size_t hlen;
+
+	/* Buffer for decoded public key. */
+	unsigned char pkey_data[BR_X509_BUFSIZE_KEY];
+
+	/* Type of key and hash function used in the certificate signature. */
+	unsigned char signer_key_type;
+	unsigned char signer_hash_id;
+#endif
+
+} br_x509_decoder_context;
+
+/**
+ * \brief Initialise an X.509 decoder context for processing a new
+ * certificate.
+ *
+ * The `append_dn()` callback (with opaque context `append_dn_ctx`)
+ * will be invoked to receive, chunk by chunk, the certificate's
+ * subject DN. If `append_dn` is `0` then the subject DN will be
+ * ignored.
+ *
+ * \param ctx             X.509 decoder context to initialise.
+ * \param append_dn       DN receiver callback (or `0`).
+ * \param append_dn_ctx   context for the DN receiver callback.
+ */
+void br_x509_decoder_init(br_x509_decoder_context *ctx,
+	void (*append_dn)(void *ctx, const void *buf, size_t len),
+	void *append_dn_ctx);
+
+/**
+ * \brief Push some certificate bytes into a decoder context.
+ *
+ * If `len` is non-zero, then that many bytes are pushed, from address
+ * `data`, into the provided decoder context.
+ *
+ * \param ctx    X.509 decoder context.
+ * \param data   certificate data chunk.
+ * \param len    certificate data chunk length (in bytes).
+ */
+void br_x509_decoder_push(br_x509_decoder_context *ctx,
+	const void *data, size_t len);
+
+/**
+ * \brief Obtain the decoded public key.
+ *
+ * Returned value is a pointer to a structure internal to the decoder
+ * context; releasing or reusing the decoder context invalidates that
+ * structure.
+ *
+ * If decoding was not finished, or failed, then `NULL` is returned.
+ *
+ * \param ctx   X.509 decoder context.
+ * \return  the public key, or `NULL` on unfinished/error.
+ */
+static inline br_x509_pkey *
+br_x509_decoder_get_pkey(br_x509_decoder_context *ctx)
+{
+	if (ctx->decoded && ctx->err == 0) {
+		return &ctx->pkey;
+	} else {
+		return NULL;
+	}
+}
+
+/**
+ * \brief Get decoder error status.
+ *
+ * If no error was reported yet but the certificate decoding is not
+ * finished, then the error code is `BR_ERR_X509_TRUNCATED`. If decoding
+ * was successful, then 0 is returned.
+ *
+ * \param ctx   X.509 decoder context.
+ * \return  0 on successful decoding, or a non-zero error code.
+ */
+static inline int
+br_x509_decoder_last_error(br_x509_decoder_context *ctx)
+{
+	if (ctx->err != 0) {
+		return ctx->err;
+	}
+	if (!ctx->decoded) {
+		return BR_ERR_X509_TRUNCATED;
+	}
+	return 0;
+}
+
+/**
+ * \brief Get the "isCA" flag from an X.509 decoder context.
+ *
+ * This flag is set if the decoded certificate claims to be a CA through
+ * a Basic Constraints extension. This flag should not be read before
+ * decoding completed successfully.
+ *
+ * \param ctx   X.509 decoder context.
+ * \return  the "isCA" flag.
+ */
+static inline int
+br_x509_decoder_isCA(br_x509_decoder_context *ctx)
+{
+	return ctx->isCA;
+}
+
+/**
+ * \brief Get the issuing CA key type (type of algorithm used to sign the
+ * decoded certificate).
+ *
+ * This is `BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC`. The value 0 is returned
+ * if the signature type was not recognised.
+ *
+ * \param ctx   X.509 decoder context.
+ * \return  the issuing CA key type.
+ */
+static inline int
+br_x509_decoder_get_signer_key_type(br_x509_decoder_context *ctx)
+{
+	return ctx->signer_key_type;
+}
+
+/**
+ * \brief Get the identifier for the hash function used to sign the decoded
+ * certificate.
+ *
+ * This is 0 if the hash function was not recognised.
+ *
+ * \param ctx   X.509 decoder context.
+ * \return  the signature hash function identifier.
+ */
+static inline int
+br_x509_decoder_get_signer_hash_id(br_x509_decoder_context *ctx)
+{
+	return ctx->signer_hash_id;
+}
+
+/**
+ * \brief Type for an X.509 certificate (DER-encoded).
+ */
+typedef struct {
+	/** \brief The DER-encoded certificate data. */
+	unsigned char *data;
+	/** \brief The DER-encoded certificate length (in bytes). */
+	size_t data_len;
+} br_x509_certificate;
+
+/**
+ * \brief Private key decoder context.
+ *
+ * The private key decoder recognises RSA and EC private keys, either in
+ * their raw, DER-encoded format, or wrapped in an unencrypted PKCS#8
+ * archive (again DER-encoded).
+ *
+ * Structure contents are opaque and shall not be accessed directly.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	/* Structure for returning the private key. */
+	union {
+		br_rsa_private_key rsa;
+		br_ec_private_key ec;
+	} key;
+
+	/* CPU for the T0 virtual machine. */
+	struct {
+		uint32_t *dp;
+		uint32_t *rp;
+		const unsigned char *ip;
+	} cpu;
+	uint32_t dp_stack[32];
+	uint32_t rp_stack[32];
+	int err;
+
+	/* Private key data chunk. */
+	const unsigned char *hbuf;
+	size_t hlen;
+
+	/* The pad serves as destination for various operations. */
+	unsigned char pad[256];
+
+	/* Decoded key type; 0 until decoding is complete. */
+	unsigned char key_type;
+
+	/* Buffer for the private key elements. It shall be large enough
+	   to accommodate all elements for a RSA-4096 private key (roughly
+	   five 2048-bit integers, possibly a bit more). */
+	unsigned char key_data[3 * BR_X509_BUFSIZE_SIG];
+#endif
+} br_skey_decoder_context;
+
+/**
+ * \brief Initialise a private key decoder context.
+ *
+ * \param ctx   key decoder context to initialise.
+ */
+void br_skey_decoder_init(br_skey_decoder_context *ctx);
+
+/**
+ * \brief Push some data bytes into a private key decoder context.
+ *
+ * If `len` is non-zero, then that many data bytes, starting at address
+ * `data`, are pushed into the decoder.
+ *
+ * \param ctx    key decoder context.
+ * \param data   private key data chunk.
+ * \param len    private key data chunk length (in bytes).
+ */
+void br_skey_decoder_push(br_skey_decoder_context *ctx,
+	const void *data, size_t len);
+
+/**
+ * \brief Get the decoding status for a private key.
+ *
+ * Decoding status is 0 on success, or a non-zero error code. If the
+ * decoding is unfinished when this function is called, then the
+ * status code `BR_ERR_X509_TRUNCATED` is returned.
+ *
+ * \param ctx   key decoder context.
+ * \return  0 on successful decoding, or a non-zero error code.
+ */
+static inline int
+br_skey_decoder_last_error(const br_skey_decoder_context *ctx)
+{
+	if (ctx->err != 0) {
+		return ctx->err;
+	}
+	if (ctx->key_type == 0) {
+		return BR_ERR_X509_TRUNCATED;
+	}
+	return 0;
+}
+
+/**
+ * \brief Get the decoded private key type.
+ *
+ * Private key type is `BR_KEYTYPE_RSA` or `BR_KEYTYPE_EC`. If decoding is
+ * not finished or failed, then 0 is returned.
+ *
+ * \param ctx   key decoder context.
+ * \return  decoded private key type, or 0.
+ */
+static inline int
+br_skey_decoder_key_type(const br_skey_decoder_context *ctx)
+{
+	if (ctx->err == 0) {
+		return ctx->key_type;
+	} else {
+		return 0;
+	}
+}
+
+/**
+ * \brief Get the decoded RSA private key.
+ *
+ * This function returns `NULL` if the decoding failed, or is not
+ * finished, or the key is not RSA. The returned pointer references
+ * structures within the context that can become invalid if the context
+ * is reused or released.
+ *
+ * \param ctx   key decoder context.
+ * \return  decoded RSA private key, or `NULL`.
+ */
+static inline const br_rsa_private_key *
+br_skey_decoder_get_rsa(const br_skey_decoder_context *ctx)
+{
+	if (ctx->err == 0 && ctx->key_type == BR_KEYTYPE_RSA) {
+		return &ctx->key.rsa;
+	} else {
+		return NULL;
+	}
+}
+
+/**
+ * \brief Get the decoded EC private key.
+ *
+ * This function returns `NULL` if the decoding failed, or is not
+ * finished, or the key is not EC. The returned pointer references
+ * structures within the context that can become invalid if the context
+ * is reused or released.
+ *
+ * \param ctx   key decoder context.
+ * \return  decoded EC private key, or `NULL`.
+ */
+static inline const br_ec_private_key *
+br_skey_decoder_get_ec(const br_skey_decoder_context *ctx)
+{
+	if (ctx->err == 0 && ctx->key_type == BR_KEYTYPE_EC) {
+		return &ctx->key.ec;
+	} else {
+		return NULL;
+	}
+}
+
+/**
+ * \brief Encode an RSA private key (raw DER format).
+ *
+ * This function encodes the provided key into the "raw" format specified
+ * in PKCS#1 (RFC 8017, Appendix C, type `RSAPrivateKey`), with DER
+ * encoding rules.
+ *
+ * The key elements are:
+ *
+ *  - `sk`: the private key (`p`, `q`, `dp`, `dq` and `iq`)
+ *
+ *  - `pk`: the public key (`n` and `e`)
+ *
+ *  - `d` (size: `dlen` bytes): the private exponent
+ *
+ * The public key elements, and the private exponent `d`, can be
+ * recomputed from the private key (see `br_rsa_compute_modulus()`,
+ * `br_rsa_compute_pubexp()` and `br_rsa_compute_privexp()`).
+ *
+ * If `dest` is not `NULL`, then the encoded key is written at that
+ * address, and the encoded length (in bytes) is returned. If `dest` is
+ * `NULL`, then nothing is written, but the encoded length is still
+ * computed and returned.
+ *
+ * \param dest   the destination buffer (or `NULL`).
+ * \param sk     the RSA private key.
+ * \param pk     the RSA public key.
+ * \param d      the RSA private exponent.
+ * \param dlen   the RSA private exponent length (in bytes).
+ * \return  the encoded key length (in bytes).
+ */
+size_t br_encode_rsa_raw_der(void *dest, const br_rsa_private_key *sk,
+	const br_rsa_public_key *pk, const void *d, size_t dlen);
+
+/**
+ * \brief Encode an RSA private key (PKCS#8 DER format).
+ *
+ * This function encodes the provided key into the PKCS#8 format
+ * (RFC 5958, type `OneAsymmetricKey`). It wraps around the "raw DER"
+ * format for the RSA key, as implemented by `br_encode_rsa_raw_der()`.
+ *
+ * The key elements are:
+ *
+ *  - `sk`: the private key (`p`, `q`, `dp`, `dq` and `iq`)
+ *
+ *  - `pk`: the public key (`n` and `e`)
+ *
+ *  - `d` (size: `dlen` bytes): the private exponent
+ *
+ * The public key elements, and the private exponent `d`, can be
+ * recomputed from the private key (see `br_rsa_compute_modulus()`,
+ * `br_rsa_compute_pubexp()` and `br_rsa_compute_privexp()`).
+ *
+ * If `dest` is not `NULL`, then the encoded key is written at that
+ * address, and the encoded length (in bytes) is returned. If `dest` is
+ * `NULL`, then nothing is written, but the encoded length is still
+ * computed and returned.
+ *
+ * \param dest   the destination buffer (or `NULL`).
+ * \param sk     the RSA private key.
+ * \param pk     the RSA public key.
+ * \param d      the RSA private exponent.
+ * \param dlen   the RSA private exponent length (in bytes).
+ * \return  the encoded key length (in bytes).
+ */
+size_t br_encode_rsa_pkcs8_der(void *dest, const br_rsa_private_key *sk,
+	const br_rsa_public_key *pk, const void *d, size_t dlen);
+
+/**
+ * \brief Encode an EC private key (raw DER format).
+ *
+ * This function encodes the provided key into the "raw" format specified
+ * in RFC 5915 (type `ECPrivateKey`), with DER encoding rules.
+ *
+ * The private key is provided in `sk`, the public key being `pk`. If
+ * `pk` is `NULL`, then the encoded key will not include the public key
+ * in its `publicKey` field (which is nominally optional).
+ *
+ * If `dest` is not `NULL`, then the encoded key is written at that
+ * address, and the encoded length (in bytes) is returned. If `dest` is
+ * `NULL`, then nothing is written, but the encoded length is still
+ * computed and returned.
+ *
+ * If the key cannot be encoded (e.g. because there is no known OBJECT
+ * IDENTIFIER for the used curve), then 0 is returned.
+ *
+ * \param dest   the destination buffer (or `NULL`).
+ * \param sk     the EC private key.
+ * \param pk     the EC public key (or `NULL`).
+ * \return  the encoded key length (in bytes), or 0.
+ */
+size_t br_encode_ec_raw_der(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk);
+
+/**
+ * \brief Encode an EC private key (PKCS#8 DER format).
+ *
+ * This function encodes the provided key into the PKCS#8 format
+ * (RFC 5958, type `OneAsymmetricKey`). The curve is identified
+ * by an OID provided as parameters to the `privateKeyAlgorithm`
+ * field. The private key value (contents of the `privateKey` field)
+ * contains the DER encoding of the `ECPrivateKey` type defined in
+ * RFC 5915, without the `parameters` field (since they would be
+ * redundant with the information in `privateKeyAlgorithm`).
+ *
+ * The private key is provided in `sk`, the public key being `pk`. If
+ * `pk` is not `NULL`, then the encoded public key is included in the
+ * `publicKey` field of the private key value (but not in the `publicKey`
+ * field of the PKCS#8 `OneAsymmetricKey` wrapper).
+ *
+ * If `dest` is not `NULL`, then the encoded key is written at that
+ * address, and the encoded length (in bytes) is returned. If `dest` is
+ * `NULL`, then nothing is written, but the encoded length is still
+ * computed and returned.
+ *
+ * If the key cannot be encoded (e.g. because there is no known OBJECT
+ * IDENTIFIER for the used curve), then 0 is returned.
+ *
+ * \param dest   the destination buffer (or `NULL`).
+ * \param sk     the EC private key.
+ * \param pk     the EC public key (or `NULL`).
+ * \return  the encoded key length (in bytes), or 0.
+ */
+size_t br_encode_ec_pkcs8_der(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk);
+
+/**
+ * \brief PEM banner for RSA private key (raw).
+ */
+#define BR_ENCODE_PEM_RSA_RAW      "RSA PRIVATE KEY"
+
+/**
+ * \brief PEM banner for EC private key (raw).
+ */
+#define BR_ENCODE_PEM_EC_RAW       "EC PRIVATE KEY"
+
+/**
+ * \brief PEM banner for an RSA or EC private key in PKCS#8 format.
+ */
+#define BR_ENCODE_PEM_PKCS8        "PRIVATE KEY"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third_party/bearssl/src/aes_big_cbcdec.c b/third_party/bearssl/src/aes_big_cbcdec.c
new file mode 100644
index 0000000..d969a3b
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_cbcdec.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_cbcdec_vtable;
+	ctx->num_rounds = br_aes_big_keysched_inv(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		int i;
+
+		memcpy(tmp, buf, 16);
+		br_aes_big_decrypt(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_big_cbcdec_vtable = {
+	sizeof(br_aes_big_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_big_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_big_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_big_cbcenc.c b/third_party/bearssl/src/aes_big_cbcenc.c
new file mode 100644
index 0000000..265e53b
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_cbcenc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_big_cbcenc_vtable = {
+	sizeof(br_aes_big_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_big_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_big_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_big_ctr.c b/third_party/bearssl/src/aes_big_ctr.c
new file mode 100644
index 0000000..18fbb84
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_ctr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctr_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+
+		memcpy(tmp, iv, 12);
+		br_enc32be(tmp + 12, cc ++);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		if (len <= 16) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_big_ctr_vtable = {
+	sizeof(br_aes_big_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_big_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_big_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_big_ctrcbc.c b/third_party/bearssl/src/aes_big_ctrcbc.c
new file mode 100644
index 0000000..d45ca76
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = {
+	sizeof(br_aes_big_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_big_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_big_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_big_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_big_dec.c b/third_party/bearssl/src/aes_big_dec.c
new file mode 100644
index 0000000..a5d0e3c
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_dec.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box (used in key schedule for decryption).
+ */
+static const unsigned char iS[] = {
+	0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+	0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+	0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+	0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+	0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+	0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+	0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+	0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+	0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+	0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+	0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+	0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+	0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+	0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+	0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+	0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+	0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+	0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+	0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+	0x55, 0x21, 0x0C, 0x7D
+};
+
+static const uint32_t iSsm0[] = {
+	0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1,
+	0xACFA58AB, 0x4BE30393, 0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25,
+	0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F, 0xDEB15A49, 0x25BA1B67,
+	0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6,
+	0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3,
+	0x49E06929, 0x8EC9C844, 0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD,
+	0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4, 0x63DF4A18, 0xE51A3182,
+	0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94,
+	0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2,
+	0xE31F8F57, 0x6655AB2A, 0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5,
+	0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C, 0x8ACF1C2B, 0xA779B492,
+	0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A,
+	0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA,
+	0x5E719F06, 0xBD6E1051, 0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46,
+	0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF, 0x1998FB24, 0xD6BDE997,
+	0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB,
+	0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48,
+	0x1E1170AC, 0x6C5A724E, 0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927,
+	0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A, 0x0C0A67B1, 0x9357E70F,
+	0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16,
+	0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD,
+	0x2DB6A8B9, 0x141EA9C8, 0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD,
+	0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34, 0x8B432976, 0xCB23C6DC,
+	0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120,
+	0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3,
+	0x0D8652EC, 0x77C1E3D0, 0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422,
+	0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF, 0x87494EC7, 0xD938D1C1,
+	0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4,
+	0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8,
+	0x2E39F75E, 0x82C3AFF5, 0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3,
+	0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B, 0xCD267809, 0x6E5918F4,
+	0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6,
+	0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331,
+	0xC6A59430, 0x35A266C0, 0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815,
+	0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F, 0x764DD68D, 0x43EFB04D,
+	0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F,
+	0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252,
+	0xE9105633, 0x6DD64713, 0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89,
+	0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C, 0x9CD2DF59, 0x55F2733F,
+	0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86,
+	0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C,
+	0x283C498B, 0xFF0D9541, 0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190,
+	0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742
+};
+
+static unsigned
+mul2(unsigned x)
+{
+	x <<= 1;
+	return x ^ ((unsigned)(-(int)(x >> 8)) & 0x11B);
+}
+
+static unsigned
+mul9(unsigned x)
+{
+	return x ^ mul2(mul2(mul2(x)));
+}
+
+static unsigned
+mulb(unsigned x)
+{
+	unsigned x2;
+	
+	x2 = mul2(x);
+	return x ^ x2 ^ mul2(mul2(x2));
+}
+
+static unsigned
+muld(unsigned x)
+{
+	unsigned x4;
+
+	x4 = mul2(mul2(x));
+	return x ^ x4 ^ mul2(x4);
+}
+
+static unsigned
+mule(unsigned x)
+{
+	unsigned x2, x4;
+
+	x2 = mul2(x);
+	x4 = mul2(x2);
+	return x2 ^ x4 ^ mul2(x4);
+}
+
+/* see inner.h */
+unsigned
+br_aes_big_keysched_inv(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, m;
+
+	/*
+	 * Sub-keys for decryption are distinct from encryption sub-keys
+	 * in that InvMixColumns() is already applied for the inner
+	 * rounds.
+	 */
+	num_rounds = br_aes_keysched(skey, key, key_len);
+	m = (int)(num_rounds << 2);
+	for (i = 4; i < m; i ++) {
+		uint32_t p;
+		unsigned p0, p1, p2, p3;
+		uint32_t q0, q1, q2, q3;
+
+		p = skey[i];
+		p0 = p >> 24;
+		p1 = (p >> 16) & 0xFF;
+		p2 = (p >> 8) & 0xFF;
+		p3 = p & 0xFF;
+		q0 = mule(p0) ^ mulb(p1) ^ muld(p2) ^ mul9(p3);
+		q1 = mul9(p0) ^ mule(p1) ^ mulb(p2) ^ muld(p3);
+		q2 = muld(p0) ^ mul9(p1) ^ mule(p2) ^ mulb(p3);
+		q3 = mulb(p0) ^ muld(p1) ^ mul9(p2) ^ mule(p3);
+		skey[i] = (q0 << 24) | (q1 << 16) | (q2 << 8) | q3;
+	}
+	return num_rounds;
+}
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+	return (x << (32 - n)) | (x >> n);
+}
+
+#define iSboxExt0(x)   (iSsm0[x])
+#define iSboxExt1(x)   (rotr(iSsm0[x], 8))
+#define iSboxExt2(x)   (rotr(iSsm0[x], 16))
+#define iSboxExt3(x)   (rotr(iSsm0[x], 24))
+
+/* see bearssl.h */
+void
+br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	uint32_t s0, s1, s2, s3;
+	uint32_t t0, t1, t2, t3;
+	unsigned u;
+
+	buf = data;
+	s0 = br_dec32be(buf);
+	s1 = br_dec32be(buf + 4);
+	s2 = br_dec32be(buf + 8);
+	s3 = br_dec32be(buf + 12);
+	s0 ^= skey[(num_rounds << 2) + 0];
+	s1 ^= skey[(num_rounds << 2) + 1];
+	s2 ^= skey[(num_rounds << 2) + 2];
+	s3 ^= skey[(num_rounds << 2) + 3];
+	for (u = num_rounds - 1; u > 0; u --) {
+		uint32_t v0 = iSboxExt0(s0 >> 24)
+			^ iSboxExt1((s3 >> 16) & 0xFF)
+			^ iSboxExt2((s2 >> 8) & 0xFF)
+			^ iSboxExt3(s1 & 0xFF);
+		uint32_t v1 = iSboxExt0(s1 >> 24)
+			^ iSboxExt1((s0 >> 16) & 0xFF)
+			^ iSboxExt2((s3 >> 8) & 0xFF)
+			^ iSboxExt3(s2 & 0xFF);
+		uint32_t v2 = iSboxExt0(s2 >> 24)
+			^ iSboxExt1((s1 >> 16) & 0xFF)
+			^ iSboxExt2((s0 >> 8) & 0xFF)
+			^ iSboxExt3(s3 & 0xFF);
+		uint32_t v3 = iSboxExt0(s3 >> 24)
+			^ iSboxExt1((s2 >> 16) & 0xFF)
+			^ iSboxExt2((s1 >> 8) & 0xFF)
+			^ iSboxExt3(s0 & 0xFF);
+		s0 = v0;
+		s1 = v1;
+		s2 = v2;
+		s3 = v3;
+		s0 ^= skey[u << 2];
+		s1 ^= skey[(u << 2) + 1];
+		s2 ^= skey[(u << 2) + 2];
+		s3 ^= skey[(u << 2) + 3];
+	}
+	t0 = ((uint32_t)iS[s0 >> 24] << 24)
+		| ((uint32_t)iS[(s3 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s2 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s1 & 0xFF];
+	t1 = ((uint32_t)iS[s1 >> 24] << 24)
+		| ((uint32_t)iS[(s0 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s3 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s2 & 0xFF];
+	t2 = ((uint32_t)iS[s2 >> 24] << 24)
+		| ((uint32_t)iS[(s1 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s0 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s3 & 0xFF];
+	t3 = ((uint32_t)iS[s3 >> 24] << 24)
+		| ((uint32_t)iS[(s2 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s1 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s0 & 0xFF];
+	s0 = t0 ^ skey[0];
+	s1 = t1 ^ skey[1];
+	s2 = t2 ^ skey[2];
+	s3 = t3 ^ skey[3];
+	br_enc32be(buf, s0);
+	br_enc32be(buf + 4, s1);
+	br_enc32be(buf + 8, s2);
+	br_enc32be(buf + 12, s3);
+}
diff --git a/third_party/bearssl/src/aes_big_enc.c b/third_party/bearssl/src/aes_big_enc.c
new file mode 100644
index 0000000..bbabb9a
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_enc.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S   br_aes_S
+
+static const uint32_t Ssm0[] = {
+	0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD,
+	0xDE6F6FB1, 0x91C5C554, 0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D,
+	0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A, 0x8FCACA45, 0x1F82829D,
+	0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B,
+	0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7,
+	0xE4727296, 0x9BC0C05B, 0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A,
+	0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F, 0x6834345C, 0x51A5A5F4,
+	0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F,
+	0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1,
+	0x0A05050F, 0x2F9A9AB5, 0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D,
+	0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F, 0x1209091B, 0x1D83839E,
+	0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB,
+	0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E,
+	0x5E2F2F71, 0x13848497, 0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C,
+	0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED, 0xD46A6ABE, 0x8DCBCB46,
+	0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A,
+	0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7,
+	0x66333355, 0x11858594, 0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81,
+	0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3, 0xA25151F3, 0x5DA3A3FE,
+	0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504,
+	0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A,
+	0xFDF3F30E, 0xBFD2D26D, 0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F,
+	0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739, 0x93C4C457, 0x55A7A7F2,
+	0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395,
+	0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E,
+	0x3B9090AB, 0x0B888883, 0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C,
+	0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76, 0xDBE0E03B, 0x64323256,
+	0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4,
+	0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4,
+	0xD3E4E437, 0xF279798B, 0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7,
+	0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0, 0xD86C6CB4, 0xAC5656FA,
+	0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818,
+	0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1,
+	0x73B4B4C7, 0x97C6C651, 0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21,
+	0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85, 0xE0707090, 0x7C3E3E42,
+	0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12,
+	0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158,
+	0x3A1D1D27, 0x279E9EB9, 0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133,
+	0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7, 0x2D9B9BB6, 0x3C1E1E22,
+	0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A,
+	0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631,
+	0x844242C6, 0xD06868B8, 0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11,
+	0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A
+};
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+	return (x << (32 - n)) | (x >> n);
+}
+
+#define SboxExt0(x)   (Ssm0[x])
+#define SboxExt1(x)   (rotr(Ssm0[x], 8))
+#define SboxExt2(x)   (rotr(Ssm0[x], 16))
+#define SboxExt3(x)   (rotr(Ssm0[x], 24))
+
+
+/* see bearssl.h */
+void
+br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	uint32_t s0, s1, s2, s3;
+	uint32_t t0, t1, t2, t3;
+	unsigned u;
+
+	buf = data;
+	s0 = br_dec32be(buf);
+	s1 = br_dec32be(buf + 4);
+	s2 = br_dec32be(buf + 8);
+	s3 = br_dec32be(buf + 12);
+	s0 ^= skey[0];
+	s1 ^= skey[1];
+	s2 ^= skey[2];
+	s3 ^= skey[3];
+	for (u = 1; u < num_rounds; u ++) {
+		uint32_t v0, v1, v2, v3;
+
+		v0 = SboxExt0(s0 >> 24)
+			^ SboxExt1((s1 >> 16) & 0xFF)
+			^ SboxExt2((s2 >> 8) & 0xFF)
+			^ SboxExt3(s3 & 0xFF);
+		v1 = SboxExt0(s1 >> 24)
+			^ SboxExt1((s2 >> 16) & 0xFF)
+			^ SboxExt2((s3 >> 8) & 0xFF)
+			^ SboxExt3(s0 & 0xFF);
+		v2 = SboxExt0(s2 >> 24)
+			^ SboxExt1((s3 >> 16) & 0xFF)
+			^ SboxExt2((s0 >> 8) & 0xFF)
+			^ SboxExt3(s1 & 0xFF);
+		v3 = SboxExt0(s3 >> 24)
+			^ SboxExt1((s0 >> 16) & 0xFF)
+			^ SboxExt2((s1 >> 8) & 0xFF)
+			^ SboxExt3(s2 & 0xFF);
+		s0 = v0;
+		s1 = v1;
+		s2 = v2;
+		s3 = v3;
+		s0 ^= skey[u << 2];
+		s1 ^= skey[(u << 2) + 1];
+		s2 ^= skey[(u << 2) + 2];
+		s3 ^= skey[(u << 2) + 3];
+	}
+	t0 = ((uint32_t)S[s0 >> 24] << 24)
+		| ((uint32_t)S[(s1 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s2 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s3 & 0xFF];
+	t1 = ((uint32_t)S[s1 >> 24] << 24)
+		| ((uint32_t)S[(s2 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s3 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s0 & 0xFF];
+	t2 = ((uint32_t)S[s2 >> 24] << 24)
+		| ((uint32_t)S[(s3 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s0 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s1 & 0xFF];
+	t3 = ((uint32_t)S[s3 >> 24] << 24)
+		| ((uint32_t)S[(s0 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s1 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s2 & 0xFF];
+	s0 = t0 ^ skey[num_rounds << 2];
+	s1 = t1 ^ skey[(num_rounds << 2) + 1];
+	s2 = t2 ^ skey[(num_rounds << 2) + 2];
+	s3 = t3 ^ skey[(num_rounds << 2) + 3];
+	br_enc32be(buf, s0);
+	br_enc32be(buf + 4, s1);
+	br_enc32be(buf + 8, s2);
+	br_enc32be(buf + 12, s3);
+}
diff --git a/third_party/bearssl/src/aes_common.c b/third_party/bearssl/src/aes_common.c
new file mode 100644
index 0000000..72c64fb
--- /dev/null
+++ b/third_party/bearssl/src/aes_common.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const uint32_t Rcon[] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+	0x40000000, 0x80000000, 0x1B000000, 0x36000000
+};
+
+#define S   br_aes_S
+
+/* see inner.h */
+const unsigned char br_aes_S[] = {
+	0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
+	0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+	0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
+	0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+	0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
+	0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+	0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
+	0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+	0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
+	0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+	0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
+	0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+	0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
+	0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+	0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
+	0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+	0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
+	0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+	0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
+	0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+	0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
+	0xB0, 0x54, 0xBB, 0x16
+};
+
+static uint32_t
+SubWord(uint32_t x)
+{
+	return ((uint32_t)S[x >> 24] << 24)
+		| ((uint32_t)S[(x >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(x >> 8) & 0xFF] << 8)
+		| (uint32_t)S[x & 0xFF];
+}
+
+/* see inner.h */
+unsigned
+br_aes_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	for (i = 0; i < nk; i ++) {
+		skey[i] = br_dec32be((const unsigned char *)key + (i << 2));
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		uint32_t tmp;
+
+		tmp = skey[i - 1];
+		if (j == 0) {
+			tmp = (tmp << 8) | (tmp >> 24);
+			tmp = SubWord(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = SubWord(tmp);
+		}
+		skey[i] = skey[i - nk] ^ tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	return num_rounds;
+}
diff --git a/third_party/bearssl/src/aes_ct.c b/third_party/bearssl/src/aes_ct.c
new file mode 100644
index 0000000..66776d9
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_Sbox(uint32_t *q)
+{
+	/*
+	 * This S-box implementation is a straightforward translation of
+	 * the circuit described by Boyar and Peralta in "A new
+	 * combinational logic minimization technique with applications
+	 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	 *
+	 * Note that variables x* (input) and s* (output) are numbered
+	 * in "reverse" order (x0 is the high bit, x7 is the low bit).
+	 */
+
+	uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
+	uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint32_t y20, y21;
+	uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+	uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
+	uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+	uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+	uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+	uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+	uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+	uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+	uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
+	uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+	x0 = q[7];
+	x1 = q[6];
+	x2 = q[5];
+	x3 = q[4];
+	x4 = q[3];
+	x5 = q[2];
+	x6 = q[1];
+	x7 = q[0];
+
+	/*
+	 * Top linear transformation.
+	 */
+	y14 = x3 ^ x5;
+	y13 = x0 ^ x6;
+	y9 = x0 ^ x3;
+	y8 = x0 ^ x5;
+	t0 = x1 ^ x2;
+	y1 = t0 ^ x7;
+	y4 = y1 ^ x3;
+	y12 = y13 ^ y14;
+	y2 = y1 ^ x0;
+	y5 = y1 ^ x6;
+	y3 = y5 ^ y8;
+	t1 = x4 ^ y12;
+	y15 = t1 ^ x5;
+	y20 = t1 ^ x1;
+	y6 = y15 ^ x7;
+	y10 = y15 ^ t0;
+	y11 = y20 ^ y9;
+	y7 = x7 ^ y11;
+	y17 = y10 ^ y11;
+	y19 = y10 ^ y8;
+	y16 = t0 ^ y11;
+	y21 = y13 ^ y16;
+	y18 = x0 ^ y16;
+
+	/*
+	 * Non-linear section.
+	 */
+	t2 = y12 & y15;
+	t3 = y3 & y6;
+	t4 = t3 ^ t2;
+	t5 = y4 & x7;
+	t6 = t5 ^ t2;
+	t7 = y13 & y16;
+	t8 = y5 & y1;
+	t9 = t8 ^ t7;
+	t10 = y2 & y7;
+	t11 = t10 ^ t7;
+	t12 = y9 & y11;
+	t13 = y14 & y17;
+	t14 = t13 ^ t12;
+	t15 = y8 & y10;
+	t16 = t15 ^ t12;
+	t17 = t4 ^ t14;
+	t18 = t6 ^ t16;
+	t19 = t9 ^ t14;
+	t20 = t11 ^ t16;
+	t21 = t17 ^ y20;
+	t22 = t18 ^ y19;
+	t23 = t19 ^ y21;
+	t24 = t20 ^ y18;
+
+	t25 = t21 ^ t22;
+	t26 = t21 & t23;
+	t27 = t24 ^ t26;
+	t28 = t25 & t27;
+	t29 = t28 ^ t22;
+	t30 = t23 ^ t24;
+	t31 = t22 ^ t26;
+	t32 = t31 & t30;
+	t33 = t32 ^ t24;
+	t34 = t23 ^ t33;
+	t35 = t27 ^ t33;
+	t36 = t24 & t35;
+	t37 = t36 ^ t34;
+	t38 = t27 ^ t36;
+	t39 = t29 & t38;
+	t40 = t25 ^ t39;
+
+	t41 = t40 ^ t37;
+	t42 = t29 ^ t33;
+	t43 = t29 ^ t40;
+	t44 = t33 ^ t37;
+	t45 = t42 ^ t41;
+	z0 = t44 & y15;
+	z1 = t37 & y6;
+	z2 = t33 & x7;
+	z3 = t43 & y16;
+	z4 = t40 & y1;
+	z5 = t29 & y7;
+	z6 = t42 & y11;
+	z7 = t45 & y17;
+	z8 = t41 & y10;
+	z9 = t44 & y12;
+	z10 = t37 & y3;
+	z11 = t33 & y4;
+	z12 = t43 & y13;
+	z13 = t40 & y5;
+	z14 = t29 & y2;
+	z15 = t42 & y9;
+	z16 = t45 & y14;
+	z17 = t41 & y8;
+
+	/*
+	 * Bottom linear transformation.
+	 */
+	t46 = z15 ^ z16;
+	t47 = z10 ^ z11;
+	t48 = z5 ^ z13;
+	t49 = z9 ^ z10;
+	t50 = z2 ^ z12;
+	t51 = z2 ^ z5;
+	t52 = z7 ^ z8;
+	t53 = z0 ^ z3;
+	t54 = z6 ^ z7;
+	t55 = z16 ^ z17;
+	t56 = z12 ^ t48;
+	t57 = t50 ^ t53;
+	t58 = z4 ^ t46;
+	t59 = z3 ^ t54;
+	t60 = t46 ^ t57;
+	t61 = z14 ^ t57;
+	t62 = t52 ^ t58;
+	t63 = t49 ^ t58;
+	t64 = z4 ^ t59;
+	t65 = t61 ^ t62;
+	t66 = z1 ^ t63;
+	s0 = t59 ^ t63;
+	s6 = t56 ^ ~t62;
+	s7 = t48 ^ ~t60;
+	t67 = t64 ^ t65;
+	s3 = t53 ^ t66;
+	s4 = t51 ^ t66;
+	s5 = t47 ^ t65;
+	s1 = t64 ^ ~s3;
+	s2 = t55 ^ ~t67;
+
+	q[7] = s0;
+	q[6] = s1;
+	q[5] = s2;
+	q[4] = s3;
+	q[3] = s4;
+	q[2] = s5;
+	q[1] = s6;
+	q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct_ortho(uint32_t *q)
+{
+#define SWAPN(cl, ch, s, x, y)   do { \
+		uint32_t a, b; \
+		a = (x); \
+		b = (y); \
+		(x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \
+		(y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \
+	} while (0)
+
+#define SWAP2(x, y)   SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)
+#define SWAP4(x, y)   SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)
+#define SWAP8(x, y)   SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
+
+	SWAP2(q[0], q[1]);
+	SWAP2(q[2], q[3]);
+	SWAP2(q[4], q[5]);
+	SWAP2(q[6], q[7]);
+
+	SWAP4(q[0], q[2]);
+	SWAP4(q[1], q[3]);
+	SWAP4(q[4], q[6]);
+	SWAP4(q[5], q[7]);
+
+	SWAP8(q[0], q[4]);
+	SWAP8(q[1], q[5]);
+	SWAP8(q[2], q[6]);
+	SWAP8(q[3], q[7]);
+}
+
+static const unsigned char Rcon[] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+	uint32_t q[8];
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] = x;
+	}
+	br_aes_ct_ortho(q);
+	br_aes_ct_bitslice_Sbox(q);
+	br_aes_ct_ortho(q);
+	return q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+	uint32_t skey[120];
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	tmp = 0;
+	for (i = 0; i < nk; i ++) {
+		tmp = br_dec32le((const unsigned char *)key + (i << 2));
+		skey[(i << 1) + 0] = tmp;
+		skey[(i << 1) + 1] = tmp;
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[(i - nk) << 1];
+		skey[(i << 1) + 0] = tmp;
+		skey[(i << 1) + 1] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	for (i = 0; i < nkf; i += 4) {
+		br_aes_ct_ortho(skey + (i << 1));
+	}
+	for (i = 0, j = 0; i < nkf; i ++, j += 2) {
+		comp_skey[i] = (skey[j + 0] & 0x55555555)
+			| (skey[j + 1] & 0xAAAAAAAA);
+	}
+	return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct_skey_expand(uint32_t *skey,
+	unsigned num_rounds, const uint32_t *comp_skey)
+{
+	unsigned u, v, n;
+
+	n = (num_rounds + 1) << 2;
+	for (u = 0, v = 0; u < n; u ++, v += 2) {
+		uint32_t x, y;
+
+		x = y = comp_skey[u];
+		x &= 0x55555555;
+		skey[v + 0] = x | (x << 1);
+		y &= 0xAAAAAAAA;
+		skey[v + 1] = y | (y >> 1);
+	}
+}
diff --git a/third_party/bearssl/src/aes_ct64.c b/third_party/bearssl/src/aes_ct64.c
new file mode 100644
index 0000000..1523811
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_Sbox(uint64_t *q)
+{
+	/*
+	 * This S-box implementation is a straightforward translation of
+	 * the circuit described by Boyar and Peralta in "A new
+	 * combinational logic minimization technique with applications
+	 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	 *
+	 * Note that variables x* (input) and s* (output) are numbered
+	 * in "reverse" order (x0 is the high bit, x7 is the low bit).
+	 */
+
+	uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
+	uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint64_t y20, y21;
+	uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+	uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+	uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+	uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+	uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+	uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+	uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+	uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
+	uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+	x0 = q[7];
+	x1 = q[6];
+	x2 = q[5];
+	x3 = q[4];
+	x4 = q[3];
+	x5 = q[2];
+	x6 = q[1];
+	x7 = q[0];
+
+	/*
+	 * Top linear transformation.
+	 */
+	y14 = x3 ^ x5;
+	y13 = x0 ^ x6;
+	y9 = x0 ^ x3;
+	y8 = x0 ^ x5;
+	t0 = x1 ^ x2;
+	y1 = t0 ^ x7;
+	y4 = y1 ^ x3;
+	y12 = y13 ^ y14;
+	y2 = y1 ^ x0;
+	y5 = y1 ^ x6;
+	y3 = y5 ^ y8;
+	t1 = x4 ^ y12;
+	y15 = t1 ^ x5;
+	y20 = t1 ^ x1;
+	y6 = y15 ^ x7;
+	y10 = y15 ^ t0;
+	y11 = y20 ^ y9;
+	y7 = x7 ^ y11;
+	y17 = y10 ^ y11;
+	y19 = y10 ^ y8;
+	y16 = t0 ^ y11;
+	y21 = y13 ^ y16;
+	y18 = x0 ^ y16;
+
+	/*
+	 * Non-linear section.
+	 */
+	t2 = y12 & y15;
+	t3 = y3 & y6;
+	t4 = t3 ^ t2;
+	t5 = y4 & x7;
+	t6 = t5 ^ t2;
+	t7 = y13 & y16;
+	t8 = y5 & y1;
+	t9 = t8 ^ t7;
+	t10 = y2 & y7;
+	t11 = t10 ^ t7;
+	t12 = y9 & y11;
+	t13 = y14 & y17;
+	t14 = t13 ^ t12;
+	t15 = y8 & y10;
+	t16 = t15 ^ t12;
+	t17 = t4 ^ t14;
+	t18 = t6 ^ t16;
+	t19 = t9 ^ t14;
+	t20 = t11 ^ t16;
+	t21 = t17 ^ y20;
+	t22 = t18 ^ y19;
+	t23 = t19 ^ y21;
+	t24 = t20 ^ y18;
+
+	t25 = t21 ^ t22;
+	t26 = t21 & t23;
+	t27 = t24 ^ t26;
+	t28 = t25 & t27;
+	t29 = t28 ^ t22;
+	t30 = t23 ^ t24;
+	t31 = t22 ^ t26;
+	t32 = t31 & t30;
+	t33 = t32 ^ t24;
+	t34 = t23 ^ t33;
+	t35 = t27 ^ t33;
+	t36 = t24 & t35;
+	t37 = t36 ^ t34;
+	t38 = t27 ^ t36;
+	t39 = t29 & t38;
+	t40 = t25 ^ t39;
+
+	t41 = t40 ^ t37;
+	t42 = t29 ^ t33;
+	t43 = t29 ^ t40;
+	t44 = t33 ^ t37;
+	t45 = t42 ^ t41;
+	z0 = t44 & y15;
+	z1 = t37 & y6;
+	z2 = t33 & x7;
+	z3 = t43 & y16;
+	z4 = t40 & y1;
+	z5 = t29 & y7;
+	z6 = t42 & y11;
+	z7 = t45 & y17;
+	z8 = t41 & y10;
+	z9 = t44 & y12;
+	z10 = t37 & y3;
+	z11 = t33 & y4;
+	z12 = t43 & y13;
+	z13 = t40 & y5;
+	z14 = t29 & y2;
+	z15 = t42 & y9;
+	z16 = t45 & y14;
+	z17 = t41 & y8;
+
+	/*
+	 * Bottom linear transformation.
+	 */
+	t46 = z15 ^ z16;
+	t47 = z10 ^ z11;
+	t48 = z5 ^ z13;
+	t49 = z9 ^ z10;
+	t50 = z2 ^ z12;
+	t51 = z2 ^ z5;
+	t52 = z7 ^ z8;
+	t53 = z0 ^ z3;
+	t54 = z6 ^ z7;
+	t55 = z16 ^ z17;
+	t56 = z12 ^ t48;
+	t57 = t50 ^ t53;
+	t58 = z4 ^ t46;
+	t59 = z3 ^ t54;
+	t60 = t46 ^ t57;
+	t61 = z14 ^ t57;
+	t62 = t52 ^ t58;
+	t63 = t49 ^ t58;
+	t64 = z4 ^ t59;
+	t65 = t61 ^ t62;
+	t66 = z1 ^ t63;
+	s0 = t59 ^ t63;
+	s6 = t56 ^ ~t62;
+	s7 = t48 ^ ~t60;
+	t67 = t64 ^ t65;
+	s3 = t53 ^ t66;
+	s4 = t51 ^ t66;
+	s5 = t47 ^ t65;
+	s1 = t64 ^ ~s3;
+	s2 = t55 ^ ~t67;
+
+	q[7] = s0;
+	q[6] = s1;
+	q[5] = s2;
+	q[4] = s3;
+	q[3] = s4;
+	q[2] = s5;
+	q[1] = s6;
+	q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_ortho(uint64_t *q)
+{
+#define SWAPN(cl, ch, s, x, y)   do { \
+		uint64_t a, b; \
+		a = (x); \
+		b = (y); \
+		(x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
+		(y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
+	} while (0)
+
+#define SWAP2(x, y)    SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA,  1, x, y)
+#define SWAP4(x, y)    SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC,  2, x, y)
+#define SWAP8(x, y)    SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0,  4, x, y)
+
+	SWAP2(q[0], q[1]);
+	SWAP2(q[2], q[3]);
+	SWAP2(q[4], q[5]);
+	SWAP2(q[6], q[7]);
+
+	SWAP4(q[0], q[2]);
+	SWAP4(q[1], q[3]);
+	SWAP4(q[4], q[6]);
+	SWAP4(q[5], q[7]);
+
+	SWAP8(q[0], q[4]);
+	SWAP8(q[1], q[5]);
+	SWAP8(q[2], q[6]);
+	SWAP8(q[3], q[7]);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
+{
+	uint64_t x0, x1, x2, x3;
+
+	x0 = w[0];
+	x1 = w[1];
+	x2 = w[2];
+	x3 = w[3];
+	x0 |= (x0 << 16);
+	x1 |= (x1 << 16);
+	x2 |= (x2 << 16);
+	x3 |= (x3 << 16);
+	x0 &= (uint64_t)0x0000FFFF0000FFFF;
+	x1 &= (uint64_t)0x0000FFFF0000FFFF;
+	x2 &= (uint64_t)0x0000FFFF0000FFFF;
+	x3 &= (uint64_t)0x0000FFFF0000FFFF;
+	x0 |= (x0 << 8);
+	x1 |= (x1 << 8);
+	x2 |= (x2 << 8);
+	x3 |= (x3 << 8);
+	x0 &= (uint64_t)0x00FF00FF00FF00FF;
+	x1 &= (uint64_t)0x00FF00FF00FF00FF;
+	x2 &= (uint64_t)0x00FF00FF00FF00FF;
+	x3 &= (uint64_t)0x00FF00FF00FF00FF;
+	*q0 = x0 | (x2 << 8);
+	*q1 = x1 | (x3 << 8);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
+{
+	uint64_t x0, x1, x2, x3;
+
+	x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
+	x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
+	x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+	x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+	x0 |= (x0 >> 8);
+	x1 |= (x1 >> 8);
+	x2 |= (x2 >> 8);
+	x3 |= (x3 >> 8);
+	x0 &= (uint64_t)0x0000FFFF0000FFFF;
+	x1 &= (uint64_t)0x0000FFFF0000FFFF;
+	x2 &= (uint64_t)0x0000FFFF0000FFFF;
+	x3 &= (uint64_t)0x0000FFFF0000FFFF;
+	w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
+	w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
+	w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
+	w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
+}
+
+static const unsigned char Rcon[] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+	uint64_t q[8];
+
+	memset(q, 0, sizeof q);
+	q[0] = x;
+	br_aes_ct64_ortho(q);
+	br_aes_ct64_bitslice_Sbox(q);
+	br_aes_ct64_ortho(q);
+	return (uint32_t)q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+	uint32_t skey[60];
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	br_range_dec32le(skey, (key_len >> 2), key);
+	tmp = skey[(key_len >> 2) - 1];
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[i - nk];
+		skey[i] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+
+	for (i = 0, j = 0; i < nkf; i += 4, j += 2) {
+		uint64_t q[8];
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], skey + i);
+		q[1] = q[0];
+		q[2] = q[0];
+		q[3] = q[0];
+		q[5] = q[4];
+		q[6] = q[4];
+		q[7] = q[4];
+		br_aes_ct64_ortho(q);
+		comp_skey[j + 0] =
+			  (q[0] & (uint64_t)0x1111111111111111)
+			| (q[1] & (uint64_t)0x2222222222222222)
+			| (q[2] & (uint64_t)0x4444444444444444)
+			| (q[3] & (uint64_t)0x8888888888888888);
+		comp_skey[j + 1] =
+			  (q[4] & (uint64_t)0x1111111111111111)
+			| (q[5] & (uint64_t)0x2222222222222222)
+			| (q[6] & (uint64_t)0x4444444444444444)
+			| (q[7] & (uint64_t)0x8888888888888888);
+	}
+	return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_skey_expand(uint64_t *skey,
+	unsigned num_rounds, const uint64_t *comp_skey)
+{
+	unsigned u, v, n;
+
+	n = (num_rounds + 1) << 1;
+	for (u = 0, v = 0; u < n; u ++, v += 4) {
+		uint64_t x0, x1, x2, x3;
+
+		x0 = x1 = x2 = x3 = comp_skey[u];
+		x0 &= (uint64_t)0x1111111111111111;
+		x1 &= (uint64_t)0x2222222222222222;
+		x2 &= (uint64_t)0x4444444444444444;
+		x3 &= (uint64_t)0x8888888888888888;
+		x1 >>= 1;
+		x2 >>= 2;
+		x3 >>= 3;
+		skey[v + 0] = (x0 << 4) - x0;
+		skey[v + 1] = (x1 << 4) - x1;
+		skey[v + 2] = (x2 << 4) - x2;
+		skey[v + 3] = (x3 << 4) - x3;
+	}
+}
diff --git a/third_party/bearssl/src/aes_ct64_cbcdec.c b/third_party/bearssl/src/aes_ct64_cbcdec.c
new file mode 100644
index 0000000..5a7360b
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_cbcdec.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_cbcdec_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint64_t sk_exp[120];
+	uint32_t ivw[4];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 4, iv);
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w1[16], w2[16];
+		int i;
+
+		if (len >= 64) {
+			br_range_dec32le(w1, 16, buf);
+		} else {
+			br_range_dec32le(w1, len >> 2, buf);
+		}
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w1 + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w2 + (i << 2), q[i], q[i + 4]);
+		}
+		for (i = 0; i < 4; i ++) {
+			w2[i] ^= ivw[i];
+		}
+		if (len >= 64) {
+			for (i = 4; i < 16; i ++) {
+				w2[i] ^= w1[i - 4];
+			}
+			memcpy(ivw, w1 + 12, sizeof ivw);
+			br_range_enc32le(buf, w2, 16);
+		} else {
+			int j;
+
+			j = (int)(len >> 2);
+			for (i = 4; i < j; i ++) {
+				w2[i] ^= w1[i - 4];
+			}
+			memcpy(ivw, w1 + j - 4, sizeof ivw);
+			br_range_enc32le(buf, w2, j);
+			break;
+		}
+		buf += 64;
+		len -= 64;
+	}
+	br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable = {
+	sizeof(br_aes_ct64_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_ct64_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_ct64_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_ct64_cbcenc.c b/third_party/bearssl/src/aes_ct64_cbcenc.c
new file mode 100644
index 0000000..6cb9dec
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_cbcenc.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_cbcenc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint64_t sk_exp[120];
+	uint32_t ivw[4];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 4, iv);
+	buf = data;
+	while (len > 0) {
+		uint32_t w[4];
+		uint64_t q[8];
+
+		w[0] = ivw[0] ^ br_dec32le(buf);
+		w[1] = ivw[1] ^ br_dec32le(buf + 4);
+		w[2] = ivw[2] ^ br_dec32le(buf + 8);
+		w[3] = ivw[3] ^ br_dec32le(buf + 12);
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		memcpy(ivw, w, sizeof w);
+		br_enc32le(buf, w[0]);
+		br_enc32le(buf + 4, w[1]);
+		br_enc32le(buf + 8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+		buf += 16;
+		len -= 16;
+	}
+	br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable = {
+	sizeof(br_aes_ct64_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_ct64_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_ct64_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_ct64_ctr.c b/third_party/bearssl/src/aes_ct64_ctr.c
new file mode 100644
index 0000000..1275873
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_ctr.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctr_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t ivw[16];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 3, iv);
+	memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
+	memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
+	memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t));
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i;
+
+		/*
+		 * TODO: see if we can save on the first br_aes_ct64_ortho()
+		 * call, since iv0/iv1/iv2 are constant for the whole run.
+		 */
+		memcpy(w, ivw, sizeof ivw);
+		w[3] = br_swap32(cc);
+		w[7] = br_swap32(cc + 1);
+		w[11] = br_swap32(cc + 2);
+		w[15] = br_swap32(cc + 3);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			cc += (uint32_t)len >> 4;
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+		cc += 4;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct64_ctr_vtable = {
+	sizeof(br_aes_ct64_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_ct64_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_ct64_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_ct64_ctrcbc.c b/third_party/bearssl/src/aes_ct64_ctrcbc.c
new file mode 100644
index 0000000..21bb8ef
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_ctrcbc.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i, j;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		j = (len >= 64) ? 16 : (int)(len >> 2);
+		for (i = 0; i < j; i += 4) {
+			uint32_t carry;
+
+			w[i + 0] = br_swap32(iv0);
+			w[i + 1] = br_swap32(iv1);
+			w[i + 2] = br_swap32(iv2);
+			w[i + 3] = br_swap32(iv3);
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+		memset(w + i, 0, (16 - i) * sizeof(uint32_t));
+
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t q[8];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[4];
+
+		w[0] = cm0 ^ br_dec32le(buf +  0);
+		w[1] = cm1 ^ br_dec32le(buf +  4);
+		w[2] = cm2 ^ br_dec32le(buf +  8);
+		w[3] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+
+		cm0 = w[0];
+		cm1 = w[1];
+		cm2 = w[2];
+		cm3 = w[3];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+	int first_iter;
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0;
+		w[5] = cm1;
+		w[6] = cm2;
+		w[7] = cm3;
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		w[0] ^= br_dec32le(buf +  0);
+		w[1] ^= br_dec32le(buf +  4);
+		w[2] ^= br_dec32le(buf +  8);
+		w[3] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, w[0]);
+		br_enc32le(buf +  4, w[1]);
+		br_enc32le(buf +  8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= w[0];
+			cm1 ^= w[1];
+			cm2 ^= w[2];
+			cm3 ^= w[3];
+		} else {
+			cm0 = w[0] ^ w[4];
+			cm1 = w[1] ^ w[5];
+			cm2 = w[2] ^ w[6];
+			cm3 = w[3] ^ w[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			w[0] = cm0;
+			w[1] = cm1;
+			w[2] = cm2;
+			w[3] = cm3;
+			br_aes_ct64_interleave_in(&q[0], &q[4], w);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_bitslice_encrypt(
+				ctx->num_rounds, sk_exp, q);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_interleave_out(w, q[0], q[4]);
+			cm0 = w[0];
+			cm1 = w[1];
+			cm2 = w[2];
+			cm3 = w[3];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0 ^ br_dec32le(buf +  0);
+		w[5] = cm1 ^ br_dec32le(buf +  4);
+		w[6] = cm2 ^ br_dec32le(buf +  8);
+		w[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		br_enc32le(tmp +  0, w[0]);
+		br_enc32le(tmp +  4, w[1]);
+		br_enc32le(tmp +  8, w[2]);
+		br_enc32le(tmp + 12, w[3]);
+		xorbuf(buf, tmp, 16);
+		cm0 = w[4];
+		cm1 = w[5];
+		cm2 = w[6];
+		cm3 = w[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
+	sizeof(br_aes_ct64_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct64_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct64_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_ct64_dec.c b/third_party/bearssl/src/aes_ct64_dec.c
new file mode 100644
index 0000000..ab00e09
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_dec.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_invSbox(uint64_t *q)
+{
+	/*
+	 * See br_aes_ct_bitslice_invSbox(). This is the natural extension
+	 * to 64-bit registers.
+	 */
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+
+	br_aes_ct64_bitslice_Sbox(q);
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] ^= sk[i];
+	}
+}
+
+static void
+inv_shift_rows(uint64_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint64_t x;
+
+		x = q[i];
+		q[i] = (x & (uint64_t)0x000000000000FFFF)
+			| ((x & (uint64_t)0x000000000FFF0000) << 4)
+			| ((x & (uint64_t)0x00000000F0000000) >> 12)
+			| ((x & (uint64_t)0x000000FF00000000) << 8)
+			| ((x & (uint64_t)0x0000FF0000000000) >> 8)
+			| ((x & (uint64_t)0x000F000000000000) << 12)
+			| ((x & (uint64_t)0xFFF0000000000000) >> 4);
+	}
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+	return (x << 32) | (x >> 32);
+}
+
+static void
+inv_mix_columns(uint64_t *q)
+{
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 16) | (q0 << 48);
+	r1 = (q1 >> 16) | (q1 << 48);
+	r2 = (q2 >> 16) | (q2 << 48);
+	r3 = (q3 >> 16) | (q3 << 48);
+	r4 = (q4 >> 16) | (q4 << 48);
+	r5 = (q5 >> 16) | (q5 << 48);
+	r6 = (q6 >> 16) | (q6 << 48);
+	r7 = (q7 >> 16) | (q7 << 48);
+
+	q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+	q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+	q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+	q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+	q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+	q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+	q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+	q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey + (num_rounds << 3));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(q);
+		br_aes_ct64_bitslice_invSbox(q);
+		add_round_key(q, skey + (u << 3));
+		inv_mix_columns(q);
+	}
+	inv_shift_rows(q);
+	br_aes_ct64_bitslice_invSbox(q);
+	add_round_key(q, skey);
+}
diff --git a/third_party/bearssl/src/aes_ct64_enc.c b/third_party/bearssl/src/aes_ct64_enc.c
new file mode 100644
index 0000000..78631ce
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_enc.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+	q[0] ^= sk[0];
+	q[1] ^= sk[1];
+	q[2] ^= sk[2];
+	q[3] ^= sk[3];
+	q[4] ^= sk[4];
+	q[5] ^= sk[5];
+	q[6] ^= sk[6];
+	q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint64_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint64_t x;
+
+		x = q[i];
+		q[i] = (x & (uint64_t)0x000000000000FFFF)
+			| ((x & (uint64_t)0x00000000FFF00000) >> 4)
+			| ((x & (uint64_t)0x00000000000F0000) << 12)
+			| ((x & (uint64_t)0x0000FF0000000000) >> 8)
+			| ((x & (uint64_t)0x000000FF00000000) << 8)
+			| ((x & (uint64_t)0xF000000000000000) >> 12)
+			| ((x & (uint64_t)0x0FFF000000000000) << 4);
+	}
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+	return (x << 32) | (x >> 32);
+}
+
+static inline void
+mix_columns(uint64_t *q)
+{
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 16) | (q0 << 48);
+	r1 = (q1 >> 16) | (q1 << 48);
+	r2 = (q2 >> 16) | (q2 << 48);
+	r3 = (q3 >> 16) | (q3 << 48);
+	r4 = (q4 >> 16) | (q4 << 48);
+	r5 = (q5 >> 16) | (q5 << 48);
+	r6 = (q6 >> 16) | (q6 << 48);
+	r7 = (q7 >> 16) | (q7 << 48);
+
+	q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);
+	q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);
+	q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);
+	q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);
+	q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);
+	q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);
+	q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);
+	q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		br_aes_ct64_bitslice_Sbox(q);
+		shift_rows(q);
+		mix_columns(q);
+		add_round_key(q, skey + (u << 3));
+	}
+	br_aes_ct64_bitslice_Sbox(q);
+	shift_rows(q);
+	add_round_key(q, skey + (num_rounds << 3));
+}
diff --git a/third_party/bearssl/src/aes_ct_cbcdec.c b/third_party/bearssl/src/aes_ct_cbcdec.c
new file mode 100644
index 0000000..522645a
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_cbcdec.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_cbcdec_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	iv3 = br_dec32le(ivbuf + 12);
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], sq[8];
+
+		q[0] = br_dec32le(buf);
+		q[2] = br_dec32le(buf + 4);
+		q[4] = br_dec32le(buf + 8);
+		q[6] = br_dec32le(buf + 12);
+		if (len >= 32) {
+			q[1] = br_dec32le(buf + 16);
+			q[3] = br_dec32le(buf + 20);
+			q[5] = br_dec32le(buf + 24);
+			q[7] = br_dec32le(buf + 28);
+		} else {
+			q[1] = 0;
+			q[3] = 0;
+			q[5] = 0;
+			q[7] = 0;
+		}
+		memcpy(sq, q, sizeof q);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		br_enc32le(buf, q[0] ^ iv0);
+		br_enc32le(buf + 4, q[2] ^ iv1);
+		br_enc32le(buf + 8, q[4] ^ iv2);
+		br_enc32le(buf + 12, q[6] ^ iv3);
+		if (len < 32) {
+			iv0 = sq[0];
+			iv1 = sq[2];
+			iv2 = sq[4];
+			iv3 = sq[6];
+			break;
+		}
+		br_enc32le(buf + 16, q[1] ^ sq[0]);
+		br_enc32le(buf + 20, q[3] ^ sq[2]);
+		br_enc32le(buf + 24, q[5] ^ sq[4]);
+		br_enc32le(buf + 28, q[7] ^ sq[6]);
+		iv0 = sq[1];
+		iv1 = sq[3];
+		iv2 = sq[5];
+		iv3 = sq[7];
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32le(ivbuf, iv0);
+	br_enc32le(ivbuf + 4, iv1);
+	br_enc32le(ivbuf + 8, iv2);
+	br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct_cbcdec_vtable = {
+	sizeof(br_aes_ct_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_ct_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_ct_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_ct_cbcenc.c b/third_party/bearssl/src/aes_ct_cbcenc.c
new file mode 100644
index 0000000..cb85977
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_cbcenc.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_cbcenc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t q[8];
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	iv3 = br_dec32le(ivbuf + 12);
+	buf = data;
+	while (len > 0) {
+		q[0] = iv0 ^ br_dec32le(buf);
+		q[2] = iv1 ^ br_dec32le(buf + 4);
+		q[4] = iv2 ^ br_dec32le(buf + 8);
+		q[6] = iv3 ^ br_dec32le(buf + 12);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		iv0 = q[0];
+		iv1 = q[2];
+		iv2 = q[4];
+		iv3 = q[6];
+		br_enc32le(buf, iv0);
+		br_enc32le(buf + 4, iv1);
+		br_enc32le(buf + 8, iv2);
+		br_enc32le(buf + 12, iv3);
+		buf += 16;
+		len -= 16;
+	}
+	br_enc32le(ivbuf, iv0);
+	br_enc32le(ivbuf + 4, iv1);
+	br_enc32le(ivbuf + 8, iv2);
+	br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct_cbcenc_vtable = {
+	sizeof(br_aes_ct_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_ct_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_ct_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_ct_ctr.c b/third_party/bearssl/src/aes_ct_ctr.c
new file mode 100644
index 0000000..f407689
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_ctr.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctr_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	const unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8];
+		unsigned char tmp[32];
+
+		/*
+		 * TODO: see if we can save on the first br_aes_ct_ortho()
+		 * call, since iv0/iv1/iv2 are constant for the whole run.
+		 */
+		q[0] = q[1] = iv0;
+		q[2] = q[3] = iv1;
+		q[4] = q[5] = iv2;
+		q[6] = br_swap32(cc);
+		q[7] = br_swap32(cc + 1);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			cc ++;
+			if (len > 16) {
+				cc ++;
+			}
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+		cc += 2;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct_ctr_vtable = {
+	sizeof(br_aes_ct_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_ct_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_ct_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_ct_ctrcbc.c b/third_party/bearssl/src/aes_ct_ctrcbc.c
new file mode 100644
index 0000000..8ae9fc7
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_ctrcbc.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[32];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+		q[1] = br_swap32(iv0);
+		q[3] = br_swap32(iv1);
+		q[5] = br_swap32(iv2);
+		q[7] = br_swap32(iv3);
+		if (len > 16) {
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t q[8];
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	buf = data;
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+
+	while (len > 0) {
+		q[0] = cm0 ^ br_dec32le(buf +  0);
+		q[2] = cm1 ^ br_dec32le(buf +  4);
+		q[4] = cm2 ^ br_dec32le(buf +  8);
+		q[6] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		cm0 = q[0];
+		cm1 = q[2];
+		cm2 = q[4];
+		cm3 = q[6];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+	int first_iter;
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		uint32_t q[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0;
+		q[3] = cm1;
+		q[5] = cm2;
+		q[7] = cm3;
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		q[0] ^= br_dec32le(buf +  0);
+		q[2] ^= br_dec32le(buf +  4);
+		q[4] ^= br_dec32le(buf +  8);
+		q[6] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, q[0]);
+		br_enc32le(buf +  4, q[2]);
+		br_enc32le(buf +  8, q[4]);
+		br_enc32le(buf + 12, q[6]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= q[0];
+			cm1 ^= q[2];
+			cm2 ^= q[4];
+			cm3 ^= q[6];
+		} else {
+			cm0 = q[0] ^ q[1];
+			cm1 = q[2] ^ q[3];
+			cm2 = q[4] ^ q[5];
+			cm3 = q[6] ^ q[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			q[0] = cm0;
+			q[2] = cm1;
+			q[4] = cm2;
+			q[6] = cm3;
+			br_aes_ct_ortho(q);
+			br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+			br_aes_ct_ortho(q);
+			cm0 = q[0];
+			cm1 = q[2];
+			cm2 = q[4];
+			cm3 = q[6];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0 ^ br_dec32le(buf +  0);
+		q[3] = cm1 ^ br_dec32le(buf +  4);
+		q[5] = cm2 ^ br_dec32le(buf +  8);
+		q[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp +  0, q[0]);
+		br_enc32le(tmp +  4, q[2]);
+		br_enc32le(tmp +  8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		xorbuf(buf, tmp, 16);
+		cm0 = q[1];
+		cm1 = q[3];
+		cm2 = q[5];
+		cm3 = q[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
+	sizeof(br_aes_ct_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_ct_dec.c b/third_party/bearssl/src/aes_ct_dec.c
new file mode 100644
index 0000000..7f32d2b
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_dec.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_invSbox(uint32_t *q)
+{
+	/*
+	 * AES S-box is:
+	 *   S(x) = A(I(x)) ^ 0x63
+	 * where I() is inversion in GF(256), and A() is a linear
+	 * transform (0 is formally defined to be its own inverse).
+	 * Since inversion is an involution, the inverse S-box can be
+	 * computed from the S-box as:
+	 *   iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
+	 * where B() is the inverse of A(). Indeed, for any y in GF(256):
+	 *   iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
+	 *
+	 * Note: we reuse the implementation of the forward S-box,
+	 * instead of duplicating it here, so that total code size is
+	 * lower. By merging the B() transforms into the S-box circuit
+	 * we could make faster CBC decryption, but CBC decryption is
+	 * already quite faster than CBC encryption because we can
+	 * process two blocks in parallel.
+	 */
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+
+	br_aes_ct_bitslice_Sbox(q);
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] ^= sk[i];
+	}
+}
+
+static void
+inv_shift_rows(uint32_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint32_t x;
+
+		x = q[i];
+		q[i] = (x & 0x000000FF)
+			| ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6)
+			| ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4)
+			| ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2);
+	}
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+	return (x << 16) | (x >> 16);
+}
+
+static void
+inv_mix_columns(uint32_t *q)
+{
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 8) | (q0 << 24);
+	r1 = (q1 >> 8) | (q1 << 24);
+	r2 = (q2 >> 8) | (q2 << 24);
+	r3 = (q3 >> 8) | (q3 << 24);
+	r4 = (q4 >> 8) | (q4 << 24);
+	r5 = (q5 >> 8) | (q5 << 24);
+	r6 = (q6 >> 8) | (q6 << 24);
+	r7 = (q7 >> 8) | (q7 << 24);
+
+	q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+	q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+	q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+	q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+	q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+	q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+	q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+	q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_decrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey + (num_rounds << 3));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(q);
+		br_aes_ct_bitslice_invSbox(q);
+		add_round_key(q, skey + (u << 3));
+		inv_mix_columns(q);
+	}
+	inv_shift_rows(q);
+	br_aes_ct_bitslice_invSbox(q);
+	add_round_key(q, skey);
+}
diff --git a/third_party/bearssl/src/aes_ct_enc.c b/third_party/bearssl/src/aes_ct_enc.c
new file mode 100644
index 0000000..089bf35
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_enc.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+	q[0] ^= sk[0];
+	q[1] ^= sk[1];
+	q[2] ^= sk[2];
+	q[3] ^= sk[3];
+	q[4] ^= sk[4];
+	q[5] ^= sk[5];
+	q[6] ^= sk[6];
+	q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint32_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint32_t x;
+
+		x = q[i];
+		q[i] = (x & 0x000000FF)
+			| ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
+			| ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
+			| ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
+	}
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+	return (x << 16) | (x >> 16);
+}
+
+static inline void
+mix_columns(uint32_t *q)
+{
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 8) | (q0 << 24);
+	r1 = (q1 >> 8) | (q1 << 24);
+	r2 = (q2 >> 8) | (q2 << 24);
+	r3 = (q3 >> 8) | (q3 << 24);
+	r4 = (q4 >> 8) | (q4 << 24);
+	r5 = (q5 >> 8) | (q5 << 24);
+	r6 = (q6 >> 8) | (q6 << 24);
+	r7 = (q7 >> 8) | (q7 << 24);
+
+	q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);
+	q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);
+	q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);
+	q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);
+	q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);
+	q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);
+	q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);
+	q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_encrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		br_aes_ct_bitslice_Sbox(q);
+		shift_rows(q);
+		mix_columns(q);
+		add_round_key(q, skey + (u << 3));
+	}
+	br_aes_ct_bitslice_Sbox(q);
+	shift_rows(q);
+	add_round_key(q, skey + (num_rounds << 3));
+}
diff --git a/third_party/bearssl/src/aes_pwr8.c b/third_party/bearssl/src/aes_pwr8.c
new file mode 100644
index 0000000..b2c63c3
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+static void
+key_schedule_128(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+	static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2 = current subkey
+		 * v3 = Rcon (x4 words)
+		 * v6 = constant 8, copied into four words
+		 * v7 = constant 0x11B, copied into four words
+		 * v8 = constant for byteswapping words
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		lxvw4x(34, 0, %[key])
+		vspltisw(3, 1)
+		vspltisw(6, 8)
+		lxvw4x(39, 0, %[fmod])
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * First subkey is a copy of the key itself.
+		 */
+#if BR_POWER8_LE
+		vperm(4, 2, 2, 8)
+		stxvw4x(36, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+		/*
+		 * Loop must run 10 times.
+		 */
+		li(%[cc], 10)
+		mtctr(%[cc])
+	label(loop)
+		/* Increment subkey address */
+		addi(%[sk], %[sk], 16)
+
+		/* Compute SubWord(RotWord(temp)) xor Rcon  (into v4, splat) */
+		vrlw(4, 2, 1)
+		vsbox(4, 4)
+#if BR_POWER8_LE
+		vxor(4, 4, 3)
+#else
+		vsldoi(5, 3, 0, 3)
+		vxor(4, 4, 5)
+#endif
+		vspltw(4, 4, 3)
+
+		/* XOR words for next subkey */
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vxor(2, 2, 4)
+
+		/* Store next subkey */
+#if BR_POWER8_LE
+		vperm(4, 2, 2, 8)
+		stxvw4x(36, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+		/* Update Rcon */
+		vadduwm(3, 3, 3)
+		vsrw(4, 3, 6)
+		vsubuwm(4, 0, 4)
+		vand(4, 4, 7)
+		vxor(3, 3, 4)
+
+		bdnz(loop)
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key), [fmod] "b" (fmod)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
+	);
+}
+
+static void
+key_schedule_192(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2, v3 = current subkey
+		 * v5 = Rcon (x4 words) (already shifted on big-endian)
+		 * v6 = constant 8, copied into four words
+		 * v8 = constant for byteswapping words
+		 *
+		 * The left two words of v3 are ignored.
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		li(%[cc], 8)
+		lxvw4x(34, 0, %[key])
+		lxvw4x(35, %[cc], %[key])
+		vsldoi(3, 3, 0, 8)
+		vspltisw(5, 1)
+#if !BR_POWER8_LE
+		vsldoi(5, 5, 0, 3)
+#endif
+		vspltisw(6, 8)
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * Loop must run 8 times. Each iteration produces 256
+		 * bits of subkeys, with a 64-bit overlap.
+		 */
+		li(%[cc], 8)
+		mtctr(%[cc])
+		li(%[cc], 16)
+	label(loop)
+
+		/*
+		 * Last 6 words in v2:v3l. Compute next 6 words into
+		 * v3r:v4.
+		 */
+		vrlw(10, 3, 1)
+		vsbox(10, 10)
+		vxor(10, 10, 5)
+		vspltw(10, 10, 1)
+		vsldoi(11, 0, 10, 8)
+
+		vsldoi(12, 0, 2, 12)
+		vxor(12, 2, 12)
+		vsldoi(13, 0, 12, 12)
+		vxor(12, 12, 13)
+		vsldoi(13, 0, 12, 12)
+		vxor(12, 12, 13)
+
+		vspltw(13, 12, 3)
+		vxor(13, 13, 3)
+		vsldoi(14, 0, 3, 12)
+		vxor(13, 13, 14)
+
+		vsldoi(4, 12, 13, 8)
+		vsldoi(14, 0, 3, 8)
+		vsldoi(3, 14, 12, 8)
+
+		vxor(3, 3, 11)
+		vxor(4, 4, 10)
+
+		/*
+		 * Update Rcon. Since for a 192-bit key, we use only 8
+		 * such constants, we will not hit the field modulus,
+		 * so a simple shift (addition) works well.
+		 */
+		vadduwm(5, 5, 5)
+
+		/*
+		 * Write out the two left 128-bit words
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		vperm(11, 3, 3, 8)
+		stxvw4x(42, 0, %[sk])
+		stxvw4x(43, %[cc], %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+		stxvw4x(35, %[cc], %[sk])
+#endif
+		addi(%[sk], %[sk], 24)
+
+		/*
+		 * Shift words for next iteration.
+		 */
+		vsldoi(2, 3, 4, 8)
+		vsldoi(3, 4, 0, 8)
+
+		bdnz(loop)
+
+		/*
+		 * The loop wrote the first 50 subkey words, but we need
+		 * to produce 52, so we must do one last write.
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		stxvw4x(42, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+	);
+}
+
+static void
+key_schedule_256(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2, v3 = current subkey
+		 * v6 = Rcon (x4 words) (already shifted on big-endian)
+		 * v7 = constant 8, copied into four words
+		 * v8 = constant for byteswapping words
+		 *
+		 * The left two words of v3 are ignored.
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		li(%[cc], 16)
+		lxvw4x(34, 0, %[key])
+		lxvw4x(35, %[cc], %[key])
+		vspltisw(6, 1)
+#if !BR_POWER8_LE
+		vsldoi(6, 6, 0, 3)
+#endif
+		vspltisw(7, 8)
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * Loop must run 7 times. Each iteration produces two
+		 * subkeys.
+		 */
+		li(%[cc], 7)
+		mtctr(%[cc])
+		li(%[cc], 16)
+	label(loop)
+
+		/*
+		 * Current words are in v2:v3. Compute next word in v4.
+		 */
+		vrlw(10, 3, 1)
+		vsbox(10, 10)
+		vxor(10, 10, 6)
+		vspltw(10, 10, 3)
+
+		vsldoi(4, 0, 2, 12)
+		vxor(4, 2, 4)
+		vsldoi(5, 0, 4, 12)
+		vxor(4, 4, 5)
+		vsldoi(5, 0, 4, 12)
+		vxor(4, 4, 5)
+		vxor(4, 4, 10)
+
+		/*
+		 * Then other word in v5.
+		 */
+		vsbox(10, 4)
+		vspltw(10, 10, 3)
+
+		vsldoi(5, 0, 3, 12)
+		vxor(5, 3, 5)
+		vsldoi(11, 0, 5, 12)
+		vxor(5, 5, 11)
+		vsldoi(11, 0, 5, 12)
+		vxor(5, 5, 11)
+		vxor(5, 5, 10)
+
+		/*
+		 * Update Rcon. Since for a 256-bit key, we use only 7
+		 * such constants, we will not hit the field modulus,
+		 * so a simple shift (addition) works well.
+		 */
+		vadduwm(6, 6, 6)
+
+		/*
+		 * Write out the two left 128-bit words
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		vperm(11, 3, 3, 8)
+		stxvw4x(42, 0, %[sk])
+		stxvw4x(43, %[cc], %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+		stxvw4x(35, %[cc], %[sk])
+#endif
+		addi(%[sk], %[sk], 32)
+
+		/*
+		 * Replace v2:v3 with v4:v5.
+		 */
+		vxor(2, 0, 4)
+		vxor(3, 0, 5)
+
+		bdnz(loop)
+
+		/*
+		 * The loop wrote the first 14 subkeys, but we need 15,
+		 * so we must do an extra write.
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		stxvw4x(42, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+	);
+}
+
+/* see inner.h */
+int
+br_aes_pwr8_supported(void)
+{
+	return 1;
+}
+
+/* see inner.h */
+unsigned
+br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
+{
+	switch (len) {
+	case 16:
+		key_schedule_128(sk, key);
+		return 10;
+	case 24:
+		key_schedule_192(sk, key);
+		return 12;
+	default:
+		key_schedule_256(sk, key);
+		return 14;
+	}
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_cbcdec.c b/third_party/bearssl/src/aes_pwr8_cbcdec.c
new file mode 100644
index 0000000..e535ba6
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_cbcdec.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcdec_128(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 10)
+		vxor(17, 17, 10)
+		vxor(18, 18, 10)
+		vxor(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcdec_192(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 12)
+		vxor(17, 17, 12)
+		vxor(18, 18, 12)
+		vxor(19, 19, 12)
+		vncipher(16, 16, 11)
+		vncipher(17, 17, 11)
+		vncipher(18, 18, 11)
+		vncipher(19, 19, 11)
+		vncipher(16, 16, 10)
+		vncipher(17, 17, 10)
+		vncipher(18, 18, 10)
+		vncipher(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcdec_256(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(45, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(46, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 14)
+		vxor(17, 17, 14)
+		vxor(18, 18, 14)
+		vxor(19, 19, 14)
+		vncipher(16, 16, 13)
+		vncipher(17, 17, 13)
+		vncipher(18, 18, 13)
+		vncipher(19, 19, 13)
+		vncipher(16, 16, 12)
+		vncipher(17, 17, 12)
+		vncipher(18, 18, 12)
+		vncipher(19, 19, 12)
+		vncipher(16, 16, 11)
+		vncipher(17, 17, 11)
+		vncipher(18, 18, 11)
+		vncipher(19, 19, 11)
+		vncipher(16, 16, 10)
+		vncipher(17, 17, 10)
+		vncipher(18, 18, 10)
+		vncipher(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char nextiv[16];
+	unsigned char *buf;
+
+	if (len == 0) {
+		return;
+	}
+	buf = data;
+	memcpy(nextiv, buf + len - 16, 16);
+	if (len >= 64) {
+		size_t num_blocks;
+		unsigned char tmp[16];
+
+		num_blocks = (len >> 4) & ~(size_t)3;
+		memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		case 12:
+			cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		default:
+			cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		}
+		buf += num_blocks << 4;
+		len &= 63;
+		memcpy(iv, tmp, 16);
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		memcpy(tmp, buf, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcdec_128(ctx->skey.skni, iv, tmp, 4);
+			break;
+		case 12:
+			cbcdec_192(ctx->skey.skni, iv, tmp, 4);
+			break;
+		default:
+			cbcdec_256(ctx->skey.skni, iv, tmp, 4);
+			break;
+		}
+		memcpy(buf, tmp, len);
+	}
+	memcpy(iv, nextiv, 16);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
+	sizeof(br_aes_pwr8_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_pwr8_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_pwr8_cbcdec_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_cbcenc.c b/third_party/bearssl/src/aes_pwr8_cbcenc.c
new file mode 100644
index 0000000..00f8eca
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_cbcenc.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcenc_128(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipherlast(16, 16, 10)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcenc_192(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(43, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(44, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipher(16, 16, 10)
+		vcipher(16, 16, 11)
+		vcipherlast(16, 16, 12)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcenc_256(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(43, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(44, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(45, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(46, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipher(16, 16, 10)
+		vcipher(16, 16, 11)
+		vcipher(16, 16, 12)
+		vcipher(16, 16, 13)
+		vcipherlast(16, 16, 14)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	if (len > 0) {
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcenc_128(ctx->skey.skni, iv, data, len);
+			break;
+		case 12:
+			cbcenc_192(ctx->skey.skni, iv, data, len);
+			break;
+		default:
+			cbcenc_256(ctx->skey.skni, iv, data, len);
+			break;
+		}
+		memcpy(iv, (unsigned char *)data + (len - 16), 16);
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
+	sizeof(br_aes_pwr8_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_pwr8_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_pwr8_cbcenc_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_ctr.c b/third_party/bearssl/src/aes_pwr8_ctr.c
new file mode 100644
index 0000000..f5d20c0
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_ctr.c
@@ -0,0 +1,717 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_ctr_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipherlast(16, 16, 10)
+		vcipherlast(17, 17, 10)
+		vcipherlast(18, 18, 10)
+		vcipherlast(19, 19, 10)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipher(16, 16, 10)
+		vcipher(17, 17, 10)
+		vcipher(18, 18, 10)
+		vcipher(19, 19, 10)
+		vcipher(16, 16, 11)
+		vcipher(17, 17, 11)
+		vcipher(18, 18, 11)
+		vcipher(19, 19, 11)
+		vcipherlast(16, 16, 12)
+		vcipherlast(17, 17, 12)
+		vcipherlast(18, 18, 12)
+		vcipherlast(19, 19, 12)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(45, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(46, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipher(16, 16, 10)
+		vcipher(17, 17, 10)
+		vcipher(18, 18, 10)
+		vcipher(19, 19, 10)
+		vcipher(16, 16, 11)
+		vcipher(17, 17, 11)
+		vcipher(18, 18, 11)
+		vcipher(19, 19, 11)
+		vcipher(16, 16, 12)
+		vcipher(17, 17, 12)
+		vcipher(18, 18, 12)
+		vcipher(19, 19, 12)
+		vcipher(16, 16, 13)
+		vcipher(17, 17, 13)
+		vcipher(18, 18, 13)
+		vcipher(19, 19, 13)
+		vcipherlast(16, 16, 14)
+		vcipherlast(17, 17, 14)
+		vcipherlast(18, 18, 14)
+		vcipherlast(19, 19, 14)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char ivbuf[64];
+
+	buf = data;
+	memcpy(ivbuf +  0, iv, 12);
+	memcpy(ivbuf + 16, iv, 12);
+	memcpy(ivbuf + 32, iv, 12);
+	memcpy(ivbuf + 48, iv, 12);
+	if (len >= 64) {
+		br_enc32be(ivbuf + 12, cc + 0);
+		br_enc32be(ivbuf + 28, cc + 1);
+		br_enc32be(ivbuf + 44, cc + 2);
+		br_enc32be(ivbuf + 60, cc + 3);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		}
+		cc += (len >> 4) & ~(size_t)3;
+		buf += len & ~(size_t)63;
+		len &= 63;
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		memcpy(tmp, buf, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		br_enc32be(ivbuf + 12, cc + 0);
+		br_enc32be(ivbuf + 28, cc + 1);
+		br_enc32be(ivbuf + 44, cc + 2);
+		br_enc32be(ivbuf + 60, cc + 3);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		}
+		memcpy(buf, tmp, len);
+		cc += (len + 15) >> 4;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
+	sizeof(br_aes_pwr8_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_pwr8_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_pwr8_ctr_run
+};
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_ctrcbc.c b/third_party/bearssl/src/aes_pwr8_ctrcbc.c
new file mode 100644
index 0000000..a67d30b
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_ctrcbc.c
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+/*
+ * Register conventions for CTR + CBC-MAC:
+ *
+ *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ *   Register v15 contains the byteswap index register (little-endian only)
+ *   Register v16 contains the CTR counter value
+ *   Register v17 contains the CBC-MAC current value
+ *   Registers v18 to v27 are scratch
+ *   Counter increment uses v28, v29 and v30
+ *
+ * For CTR alone:
+ *  
+ *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ *   Register v15 contains the byteswap index register (little-endian only)
+ *   Registers v16 to v19 contain the CTR counter values (four blocks)
+ *   Registers v20 to v27 are scratch
+ *   Counter increment uses v28, v29 and v30
+ */
+
+#define LOAD_SUBKEYS_128 \
+		lxvw4x(32, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(33, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(34, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(35, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(36, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(37, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(38, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(39, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(40, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(41, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(42, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_192 \
+		LOAD_SUBKEYS_128 \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(43, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(44, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_256 \
+		LOAD_SUBKEYS_192 \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(45, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(46, %[cc], %[sk])
+
+#define BLOCK_ENCRYPT_128(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipherlast(x, x, 10)
+
+#define BLOCK_ENCRYPT_192(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipher(x, x, 10) \
+		vcipher(x, x, 11) \
+		vcipherlast(x, x, 12)
+
+#define BLOCK_ENCRYPT_256(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipher(x, x, 10) \
+		vcipher(x, x, 11) \
+		vcipher(x, x, 12) \
+		vcipher(x, x, 13) \
+		vcipherlast(x, x, 14)
+
+#define BLOCK_ENCRYPT_X2_128(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipherlast(x, x, 10) \
+		vcipherlast(y, y, 10)
+
+#define BLOCK_ENCRYPT_X2_192(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipher(x, x, 10) \
+		vcipher(y, y, 10) \
+		vcipher(x, x, 11) \
+		vcipher(y, y, 11) \
+		vcipherlast(x, x, 12) \
+		vcipherlast(y, y, 12)
+
+#define BLOCK_ENCRYPT_X2_256(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipher(x, x, 10) \
+		vcipher(y, y, 10) \
+		vcipher(x, x, 11) \
+		vcipher(y, y, 11) \
+		vcipher(x, x, 12) \
+		vcipher(y, y, 12) \
+		vcipher(x, x, 13) \
+		vcipher(y, y, 13) \
+		vcipherlast(x, x, 14) \
+		vcipherlast(y, y, 14)
+
+#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipherlast(x0, x0, 10) \
+		vcipherlast(x1, x1, 10) \
+		vcipherlast(x2, x2, 10) \
+		vcipherlast(x3, x3, 10)
+
+#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipher(x0, x0, 10) \
+		vcipher(x1, x1, 10) \
+		vcipher(x2, x2, 10) \
+		vcipher(x3, x3, 10) \
+		vcipher(x0, x0, 11) \
+		vcipher(x1, x1, 11) \
+		vcipher(x2, x2, 11) \
+		vcipher(x3, x3, 11) \
+		vcipherlast(x0, x0, 12) \
+		vcipherlast(x1, x1, 12) \
+		vcipherlast(x2, x2, 12) \
+		vcipherlast(x3, x3, 12)
+
+#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipher(x0, x0, 10) \
+		vcipher(x1, x1, 10) \
+		vcipher(x2, x2, 10) \
+		vcipher(x3, x3, 10) \
+		vcipher(x0, x0, 11) \
+		vcipher(x1, x1, 11) \
+		vcipher(x2, x2, 11) \
+		vcipher(x3, x3, 11) \
+		vcipher(x0, x0, 12) \
+		vcipher(x1, x1, 12) \
+		vcipher(x2, x2, 12) \
+		vcipher(x3, x3, 12) \
+		vcipher(x0, x0, 13) \
+		vcipher(x1, x1, 13) \
+		vcipher(x2, x2, 13) \
+		vcipher(x3, x3, 13) \
+		vcipherlast(x0, x0, 14) \
+		vcipherlast(x1, x1, 14) \
+		vcipherlast(x2, x2, 14) \
+		vcipherlast(x3, x3, 14)
+
+#if BR_POWER8_LE
+static const uint32_t idx2be[] = {
+	0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+};
+#define BYTESWAP_INIT     lxvw4x(47, 0, %[idx2be])
+#define BYTESWAP(x)       vperm(x, x, x, 15)
+#define BYTESWAPX(d, s)   vperm(d, s, s, 15)
+#define BYTESWAP_REG      , [idx2be] "b" (idx2be)
+#else
+#define BYTESWAP_INIT
+#define BYTESWAP(x)
+#define BYTESWAPX(d, s)   vand(d, s, s)
+#define BYTESWAP_REG
+#endif
+
+static const uint32_t ctrinc[] = {
+	0, 0, 0, 1
+};
+static const uint32_t ctrinc_x4[] = {
+	0, 0, 0, 4
+};
+#define INCR_128_INIT      lxvw4x(60, 0, %[ctrinc])
+#define INCR_128_X4_INIT   lxvw4x(60, 0, %[ctrinc_x4])
+#define INCR_128(d, s) \
+		vaddcuw(29, s, 28) \
+		vadduwm(d, s, 28) \
+		vsldoi(30, 29, 29, 4) \
+		vaddcuw(29, d, 30) \
+		vadduwm(d, d, 30) \
+		vsldoi(30, 29, 29, 4) \
+		vaddcuw(29, d, 30) \
+		vadduwm(d, d, 30) \
+		vsldoi(30, 29, 29, 4) \
+		vadduwm(d, d, 30)
+
+#define MKCTR(size) \
+static void \
+ctr_ ## size(const unsigned char *sk, \
+	unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
+{ \
+	long cc, cc0, cc1, cc2, cc3; \
+ \
+	cc = 0; \
+	cc0 = 0; \
+	cc1 = 16; \
+	cc2 = 32; \
+	cc3 = 48; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_X4_INIT \
+ \
+		/* \
+		 * Load current CTR counters into v16 to v19. \
+		 */ \
+		lxvw4x(48, %[cc0], %[ctrbuf]) \
+		lxvw4x(49, %[cc1], %[ctrbuf]) \
+		lxvw4x(50, %[cc2], %[ctrbuf]) \
+		lxvw4x(51, %[cc3], %[ctrbuf]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+ \
+		mtctr(%[num_blocks_x4]) \
+ \
+	label(loop) \
+		/* \
+		 * Compute next counter values into v20..v23. \
+		 */ \
+		INCR_128(20, 16) \
+		INCR_128(21, 17) \
+		INCR_128(22, 18) \
+		INCR_128(23, 19) \
+ \
+		/* \
+		 * Encrypt counter values and XOR into next data blocks. \
+		 */ \
+		lxvw4x(56, %[cc0], %[buf]) \
+		lxvw4x(57, %[cc1], %[buf]) \
+		lxvw4x(58, %[cc2], %[buf]) \
+		lxvw4x(59, %[cc3], %[buf]) \
+		BYTESWAP(24) \
+		BYTESWAP(25) \
+		BYTESWAP(26) \
+		BYTESWAP(27) \
+		BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
+		vxor(16, 16, 24) \
+		vxor(17, 17, 25) \
+		vxor(18, 18, 26) \
+		vxor(19, 19, 27) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+		stxvw4x(48, %[cc0], %[buf]) \
+		stxvw4x(49, %[cc1], %[buf]) \
+		stxvw4x(50, %[cc2], %[buf]) \
+		stxvw4x(51, %[cc3], %[buf]) \
+ \
+		/* \
+		 * Update counters and data pointer. \
+		 */ \
+		vand(16, 20, 20) \
+		vand(17, 21, 21) \
+		vand(18, 22, 22) \
+		vand(19, 23, 23) \
+		addi(%[buf], %[buf], 64) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Write back new counter values. \
+		 */ \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+		stxvw4x(48, %[cc0], %[ctrbuf]) \
+		stxvw4x(49, %[cc1], %[ctrbuf]) \
+		stxvw4x(50, %[cc2], %[ctrbuf]) \
+		stxvw4x(51, %[cc3], %[ctrbuf]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf), \
+	[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
+: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
+	[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKCTR(128)
+MKCTR(192)
+MKCTR(256)
+
+#define MKCBCMAC(size) \
+static void \
+cbcmac_ ## size(const unsigned char *sk, \
+	unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+ \
+		/* \
+		 * Load current CBC-MAC value into v16. \
+		 */ \
+		lxvw4x(48, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+ \
+		mtctr(%[num_blocks]) \
+ \
+	label(loop) \
+		/* \
+		 * Load next block, XOR into current CBC-MAC value, \
+		 * and then encrypt it. \
+		 */ \
+		lxvw4x(49, %[cc], %[buf]) \
+		BYTESWAP(17) \
+		vxor(16, 16, 17) \
+		BLOCK_ENCRYPT_ ## size(16) \
+		addi(%[buf], %[buf], 16) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Write back new CBC-MAC value. \
+		 */ \
+		BYTESWAP(16) \
+		stxvw4x(48, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKCBCMAC(128)
+MKCBCMAC(192)
+MKCBCMAC(256)
+
+#define MKENCRYPT(size) \
+static void \
+ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
+	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+	size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_INIT \
+ \
+		/* \
+		 * Load current CTR counter into v16, and current \
+		 * CBC-MAC IV into v17. \
+		 */ \
+		lxvw4x(48, %[cc], %[ctr]) \
+		lxvw4x(49, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+ \
+		/* \
+		 * At each iteration, we do two parallel encryption: \
+		 *  - new counter value for encryption of the next block; \
+		 *  - CBC-MAC over the previous encrypted block. \
+		 * Thus, each plaintext block implies two AES instances, \
+		 * over two successive iterations. This requires a single \
+		 * counter encryption before the loop, and a single \
+		 * CBC-MAC encryption after the loop. \
+		 */ \
+ \
+		/* \
+		 * Encrypt first block (into v20). \
+		 */ \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		INCR_128(22, 16) \
+		BLOCK_ENCRYPT_ ## size(16) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		vand(16, 22, 22) \
+		addi(%[buf], %[buf], 16) \
+ \
+		/* \
+		 * Load loop counter; skip the loop if there is only \
+		 * one block in total (already handled by the boundary \
+		 * conditions). \
+		 */ \
+		mtctr(%[num_blocks]) \
+		bdz(fastexit) \
+ \
+	label(loop) \
+		/* \
+		 * Upon loop entry: \
+		 *    v16   counter value for next block \
+		 *    v17   current CBC-MAC value \
+		 *    v20   encrypted previous block \
+		 */ \
+		vxor(17, 17, 20) \
+		INCR_128(22, 16) \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		addi(%[buf], %[buf], 16) \
+		vand(16, 22, 22) \
+ \
+		bdnz(loop) \
+ \
+	label(fastexit) \
+		vxor(17, 17, 20) \
+		BLOCK_ENCRYPT_ ## size(17) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		stxvw4x(48, %[cc], %[ctr]) \
+		stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKENCRYPT(128)
+MKENCRYPT(192)
+MKENCRYPT(256)
+
+#define MKDECRYPT(size) \
+static void \
+ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
+	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+	size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_INIT \
+ \
+		/* \
+		 * Load current CTR counter into v16, and current \
+		 * CBC-MAC IV into v17. \
+		 */ \
+		lxvw4x(48, %[cc], %[ctr]) \
+		lxvw4x(49, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+ \
+		/* \
+		 * At each iteration, we do two parallel encryption: \
+		 *  - new counter value for decryption of the next block; \
+		 *  - CBC-MAC over the next encrypted block. \
+		 * Each iteration performs the two AES instances related \
+		 * to the current block; there is thus no need for some \
+		 * extra pre-loop and post-loop work as in encryption. \
+		 */ \
+ \
+		mtctr(%[num_blocks]) \
+ \
+	label(loop) \
+		/* \
+		 * Upon loop entry: \
+		 *    v16   counter value for next block \
+		 *    v17   current CBC-MAC value \
+		 */ \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		vxor(17, 17, 20) \
+		INCR_128(22, 16) \
+		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		addi(%[buf], %[buf], 16) \
+		vand(16, 22, 22) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Store back counter and CBC-MAC value. \
+		 */ \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		stxvw4x(48, %[cc], %[ctr]) \
+		stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKDECRYPT(128)
+MKDECRYPT(192)
+MKDECRYPT(256)
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	if (len == 0) {
+		return;
+	}
+	switch (ctx->num_rounds) {
+	case 10:
+		ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	case 12:
+		ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	default:
+		ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	if (len == 0) {
+		return;
+	}
+	switch (ctx->num_rounds) {
+	case 10:
+		ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	case 12:
+		ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	default:
+		ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	}
+}
+
+static inline void
+incr_ctr(void *dst, const void *src)
+{
+	uint64_t hi, lo;
+
+	hi = br_dec64be(src);
+	lo = br_dec64be((const unsigned char *)src + 8);
+	lo ++;
+	hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
+	br_enc64be(dst, hi);
+	br_enc64be((unsigned char *)dst + 8, lo);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char ctrbuf[64];
+
+	memcpy(ctrbuf, ctr, 16);
+	incr_ctr(ctrbuf + 16, ctrbuf);
+	incr_ctr(ctrbuf + 32, ctrbuf + 16);
+	incr_ctr(ctrbuf + 48, ctrbuf + 32);
+	if (len >= 64) {
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		}
+		data = (unsigned char *)data + (len & ~(size_t)63);
+		len &= 63;
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		if (len >= 32) {
+			if (len >= 48) {
+				memcpy(ctr, ctrbuf + 48, 16);
+			} else {
+				memcpy(ctr, ctrbuf + 32, 16);
+			}
+		} else {
+			if (len >= 16) {
+				memcpy(ctr, ctrbuf + 16, 16);
+			}
+		}
+		memcpy(tmp, data, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		}
+		memcpy(data, tmp, len);
+	} else {
+		memcpy(ctr, ctrbuf, 16);
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	if (len > 0) {
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		case 12:
+			cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		default:
+			cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
+	sizeof(br_aes_pwr8_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_pwr8_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_pwr8_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_small_cbcdec.c b/third_party/bearssl/src/aes_small_cbcdec.c
new file mode 100644
index 0000000..8567244
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_cbcdec.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_cbcdec_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		int i;
+
+		memcpy(tmp, buf, 16);
+		br_aes_small_decrypt(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_small_cbcdec_vtable = {
+	sizeof(br_aes_small_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_small_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_small_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_small_cbcenc.c b/third_party/bearssl/src/aes_small_cbcenc.c
new file mode 100644
index 0000000..0dc2910
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_cbcenc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_small_cbcenc_vtable = {
+	sizeof(br_aes_small_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_small_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_small_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_small_ctr.c b/third_party/bearssl/src/aes_small_ctr.c
new file mode 100644
index 0000000..d5d371c
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_ctr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctr_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+
+		memcpy(tmp, iv, 12);
+		br_enc32be(tmp + 12, cc ++);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		if (len <= 16) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_small_ctr_vtable = {
+	sizeof(br_aes_small_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_small_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_small_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_small_ctrcbc.c b/third_party/bearssl/src/aes_small_ctrcbc.c
new file mode 100644
index 0000000..2d6ba32
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = {
+	sizeof(br_aes_small_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_small_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_small_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_small_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_small_dec.c b/third_party/bearssl/src/aes_small_dec.c
new file mode 100644
index 0000000..59dca8e
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_dec.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box.
+ */
+static const unsigned char iS[] = {
+	0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+	0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+	0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+	0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+	0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+	0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+	0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+	0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+	0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+	0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+	0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+	0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+	0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+	0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+	0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+	0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+	0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+	0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+	0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+	0x55, 0x21, 0x0C, 0x7D
+};
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		uint32_t k;
+
+		k = *skeys ++;
+		state[i + 0] ^= (unsigned)(k >> 24);
+		state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+		state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+		state[i + 3] ^= (unsigned)k & 0xFF;
+	}
+}
+
+static void
+inv_sub_bytes(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		state[i] = iS[state[i]];
+	}
+}
+
+static void
+inv_shift_rows(unsigned *state)
+{
+	unsigned tmp;
+
+	tmp = state[13];
+	state[13] = state[9];
+	state[9] = state[5];
+	state[5] = state[1];
+	state[1] = tmp;
+
+	tmp = state[2];
+	state[2] = state[10];
+	state[10] = tmp;
+	tmp = state[6];
+	state[6] = state[14];
+	state[14] = tmp;
+
+	tmp = state[3];
+	state[3] = state[7];
+	state[7] = state[11];
+	state[11] = state[15];
+	state[15] = tmp;
+}
+
+static inline unsigned
+gf256red(unsigned x)
+{
+	unsigned y;
+
+	y = x >> 8;
+	return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 0xFF;
+}
+
+static void
+inv_mix_columns(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		unsigned s0, s1, s2, s3;
+		unsigned t0, t1, t2, t3;
+
+		s0 = state[i + 0];
+		s1 = state[i + 1];
+		s2 = state[i + 2];
+		s3 = state[i + 3];
+		t0 = (s0 << 1) ^ (s0 << 2) ^ (s0 << 3)
+			^ s1 ^ (s1 << 1) ^ (s1 << 3)
+			^ s2 ^ (s2 << 2) ^ (s2 << 3)
+			^ s3 ^ (s3 << 3);
+		t1 = s0 ^ (s0 << 3)
+			^ (s1 << 1) ^ (s1 << 2) ^ (s1 << 3)
+			^ s2 ^ (s2 << 1) ^ (s2 << 3)
+			^ s3 ^ (s3 << 2) ^ (s3 << 3);
+		t2 = s0 ^ (s0 << 2) ^ (s0 << 3)
+			^ s1 ^ (s1 << 3)
+			^ (s2 << 1) ^ (s2 << 2) ^ (s2 << 3)
+			^ s3 ^ (s3 << 1) ^ (s3 << 3);
+		t3 = s0 ^ (s0 << 1) ^ (s0 << 3)
+			^ s1 ^ (s1 << 2) ^ (s1 << 3)
+			^ s2 ^ (s2 << 3)
+			^ (s3 << 1) ^ (s3 << 2) ^ (s3 << 3);
+		state[i + 0] = gf256red(t0);
+		state[i + 1] = gf256red(t1);
+		state[i + 2] = gf256red(t2);
+		state[i + 3] = gf256red(t3);
+	}
+}
+
+/* see inner.h */
+void
+br_aes_small_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	unsigned state[16];
+	unsigned u;
+
+	buf = data;
+	for (u = 0; u < 16; u ++) {
+		state[u] = buf[u];
+	}
+	add_round_key(state, skey + (num_rounds << 2));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(state);
+		inv_sub_bytes(state);
+		add_round_key(state, skey + (u << 2));
+		inv_mix_columns(state);
+	}
+	inv_shift_rows(state);
+	inv_sub_bytes(state);
+	add_round_key(state, skey);
+	for (u = 0; u < 16; u ++) {
+		buf[u] = state[u];
+	}
+}
diff --git a/third_party/bearssl/src/aes_small_enc.c b/third_party/bearssl/src/aes_small_enc.c
new file mode 100644
index 0000000..29f48a8
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_enc.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S   br_aes_S
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		uint32_t k;
+
+		k = *skeys ++;
+		state[i + 0] ^= (unsigned)(k >> 24);
+		state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+		state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+		state[i + 3] ^= (unsigned)k & 0xFF;
+	}
+}
+
+static void
+sub_bytes(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		state[i] = S[state[i]];
+	}
+}
+
+static void
+shift_rows(unsigned *state)
+{
+	unsigned tmp;
+
+	tmp = state[1];
+	state[1] = state[5];
+	state[5] = state[9];
+	state[9] = state[13];
+	state[13] = tmp;
+
+	tmp = state[2];
+	state[2] = state[10];
+	state[10] = tmp;
+	tmp = state[6];
+	state[6] = state[14];
+	state[14] = tmp;
+
+	tmp = state[15];
+	state[15] = state[11];
+	state[11] = state[7];
+	state[7] = state[3];
+	state[3] = tmp;
+}
+
+static void
+mix_columns(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		unsigned s0, s1, s2, s3;
+		unsigned t0, t1, t2, t3;
+
+		s0 = state[i + 0];
+		s1 = state[i + 1];
+		s2 = state[i + 2];
+		s3 = state[i + 3];
+		t0 = (s0 << 1) ^ s1 ^ (s1 << 1) ^ s2 ^ s3;
+		t1 = s0 ^ (s1 << 1) ^ s2 ^ (s2 << 1) ^ s3;
+		t2 = s0 ^ s1 ^ (s2 << 1) ^ s3 ^ (s3 << 1);
+		t3 = s0 ^ (s0 << 1) ^ s1 ^ s2 ^ (s3 << 1);
+		state[i + 0] = t0 ^ ((unsigned)(-(int)(t0 >> 8)) & 0x11B);
+		state[i + 1] = t1 ^ ((unsigned)(-(int)(t1 >> 8)) & 0x11B);
+		state[i + 2] = t2 ^ ((unsigned)(-(int)(t2 >> 8)) & 0x11B);
+		state[i + 3] = t3 ^ ((unsigned)(-(int)(t3 >> 8)) & 0x11B);
+	}
+}
+
+/* see inner.h */
+void
+br_aes_small_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	unsigned state[16];
+	unsigned u;
+
+	buf = data;
+	for (u = 0; u < 16; u ++) {
+		state[u] = buf[u];
+	}
+	add_round_key(state, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		sub_bytes(state);
+		shift_rows(state);
+		mix_columns(state);
+		add_round_key(state, skey + (u << 2));
+	}
+	sub_bytes(state);
+	shift_rows(state);
+	add_round_key(state, skey + (num_rounds << 2));
+	for (u = 0; u < 16; u ++) {
+		buf[u] = state[u];
+	}
+}
diff --git a/third_party/bearssl/src/aes_x86ni.c b/third_party/bearssl/src/aes_x86ni.c
new file mode 100644
index 0000000..d5408f1
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * AES-NI opcodes.
+ */
+
+#if BR_AES_X86NI
+
+/* see inner.h */
+int
+br_aes_x86ni_supported(void)
+{
+	/*
+	 * Bit mask for features in ECX:
+	 *   19   SSE4.1 (used for _mm_insert_epi32(), for AES-CTR)
+	 *   25   AES-NI
+	 */
+	return br_cpuid(0, 0, 0x02080000, 0);
+}
+
+BR_TARGETS_X86_UP
+
+BR_TARGET("sse2,aes")
+static inline __m128i
+expand_step128(__m128i k, __m128i k2)
+{
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k2 = _mm_shuffle_epi32(k2, 0xFF);
+	return _mm_xor_si128(k, k2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step192(__m128i *t1, __m128i *t2, __m128i *t3)
+{
+	__m128i t4;
+
+	*t2 = _mm_shuffle_epi32(*t2, 0x55);
+	t4 = _mm_slli_si128(*t1, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	*t1 = _mm_xor_si128(*t1, *t2);
+	*t2 = _mm_shuffle_epi32(*t1, 0xFF);
+	t4 = _mm_slli_si128(*t3, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	*t3 = _mm_xor_si128(*t3, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_1(__m128i *t1, __m128i *t2)
+{
+	__m128i t4;
+
+	*t2 = _mm_shuffle_epi32(*t2, 0xFF);
+	t4 = _mm_slli_si128(*t1, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	*t1 = _mm_xor_si128(*t1, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_2(__m128i *t1, __m128i *t3)
+{
+	__m128i t2, t4;
+
+	t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
+	t2 = _mm_shuffle_epi32(t4, 0xAA);
+	t4 = _mm_slli_si128(*t3, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	*t3 = _mm_xor_si128(*t3, t2);
+}
+
+/*
+ * Perform key schedule for AES, encryption direction. Subkeys are written
+ * in sk[], and the number of rounds is returned. Key length MUST be 16,
+ * 24 or 32 bytes.
+ */
+BR_TARGET("sse2,aes")
+static unsigned
+x86ni_keysched(__m128i *sk, const void *key, size_t len)
+{
+	const unsigned char *kb;
+
+#define KEXP128(k, i, rcon)   do { \
+		k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \
+		sk[i] = k; \
+	} while (0)
+
+#define KEXP192(i, rcon1, rcon2)   do { \
+		sk[(i) + 0] = t1; \
+		sk[(i) + 1] = t3; \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon1); \
+		expand_step192(&t1, &t2, &t3); \
+		sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \
+			_mm_castsi128_pd(sk[(i) + 1]), \
+			_mm_castsi128_pd(t1), 0)); \
+		sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \
+			_mm_castsi128_pd(t1), \
+			_mm_castsi128_pd(t3), 1)); \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon2); \
+		expand_step192(&t1, &t2, &t3); \
+	} while (0)
+
+#define KEXP256(i, rcon)   do { \
+		sk[(i) + 0] = t3; \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon); \
+		expand_step256_1(&t1, &t2); \
+		sk[(i) + 1] = t1; \
+		expand_step256_2(&t1, &t3); \
+	} while (0)
+
+	kb = key;
+	switch (len) {
+		__m128i t1, t2, t3;
+
+	case 16:
+		t1 = _mm_loadu_si128((const void *)kb);
+		sk[0] = t1;
+		KEXP128(t1,  1, 0x01);
+		KEXP128(t1,  2, 0x02);
+		KEXP128(t1,  3, 0x04);
+		KEXP128(t1,  4, 0x08);
+		KEXP128(t1,  5, 0x10);
+		KEXP128(t1,  6, 0x20);
+		KEXP128(t1,  7, 0x40);
+		KEXP128(t1,  8, 0x80);
+		KEXP128(t1,  9, 0x1B);
+		KEXP128(t1, 10, 0x36);
+		return 10;
+
+	case 24:
+		t1 = _mm_loadu_si128((const void *)kb);
+		t3 = _mm_loadu_si128((const void *)(kb + 8));
+		t3 = _mm_shuffle_epi32(t3, 0x4E);
+		KEXP192(0, 0x01, 0x02);
+		KEXP192(3, 0x04, 0x08);
+		KEXP192(6, 0x10, 0x20);
+		KEXP192(9, 0x40, 0x80);
+		sk[12] = t1;
+		return 12;
+
+	case 32:
+		t1 = _mm_loadu_si128((const void *)kb);
+		t3 = _mm_loadu_si128((const void *)(kb + 16));
+		sk[0] = t1;
+		KEXP256( 1, 0x01);
+		KEXP256( 3, 0x02);
+		KEXP256( 5, 0x04);
+		KEXP256( 7, 0x08);
+		KEXP256( 9, 0x10);
+		KEXP256(11, 0x20);
+		sk[13] = t3;
+		t2 = _mm_aeskeygenassist_si128(t3, 0x40);
+		expand_step256_1(&t1, &t2);
+		sk[14] = t1;
+		return 14;
+
+	default:
+		return 0;
+	}
+
+#undef KEXP128
+#undef KEXP192
+#undef KEXP256
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len)
+{
+	__m128i sk[15];
+	unsigned num_rounds;
+
+	num_rounds = x86ni_keysched(sk, key, len);
+	memcpy(skni, sk, (num_rounds + 1) << 4);
+	return num_rounds;
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len)
+{
+	__m128i sk[15];
+	unsigned u, num_rounds;
+
+	num_rounds = x86ni_keysched(sk, key, len);
+	_mm_storeu_si128((void *)skni, sk[num_rounds]);
+	for (u = 1; u < num_rounds; u ++) {
+		_mm_storeu_si128((void *)(skni + (u << 4)),
+			_mm_aesimc_si128(sk[num_rounds - u]));
+	}
+	_mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]);
+	return num_rounds;
+}
+
+BR_TARGETS_X86_DOWN
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_cbcdec.c b/third_party/bearssl/src/aes_x86ni_cbcdec.c
new file mode 100644
index 0000000..862b1b5
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_cbcdec.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_cbcdec_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(iv);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x0, x1, x2, x3, e0, e1, e2, e3;
+
+		x0 = _mm_loadu_si128((void *)(buf +  0));
+		if (len >= 64) {
+			x1 = _mm_loadu_si128((void *)(buf + 16));
+			x2 = _mm_loadu_si128((void *)(buf + 32));
+			x3 = _mm_loadu_si128((void *)(buf + 48));
+		} else {
+			x0 = _mm_loadu_si128((void *)(buf +  0));
+			if (len >= 32) {
+				x1 = _mm_loadu_si128((void *)(buf + 16));
+				if (len >= 48) {
+					x2 = _mm_loadu_si128(
+						(void *)(buf + 32));
+					x3 = x2;
+				} else {
+					x2 = x0;
+					x3 = x1;
+				}
+			} else {
+				x1 = x0;
+				x2 = x0;
+				x3 = x0;
+			}
+		}
+		e0 = x0;
+		e1 = x1;
+		e2 = x2;
+		e3 = x3;
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesdec_si128(x0, sk[1]);
+		x1 = _mm_aesdec_si128(x1, sk[1]);
+		x2 = _mm_aesdec_si128(x2, sk[1]);
+		x3 = _mm_aesdec_si128(x3, sk[1]);
+		x0 = _mm_aesdec_si128(x0, sk[2]);
+		x1 = _mm_aesdec_si128(x1, sk[2]);
+		x2 = _mm_aesdec_si128(x2, sk[2]);
+		x3 = _mm_aesdec_si128(x3, sk[2]);
+		x0 = _mm_aesdec_si128(x0, sk[3]);
+		x1 = _mm_aesdec_si128(x1, sk[3]);
+		x2 = _mm_aesdec_si128(x2, sk[3]);
+		x3 = _mm_aesdec_si128(x3, sk[3]);
+		x0 = _mm_aesdec_si128(x0, sk[4]);
+		x1 = _mm_aesdec_si128(x1, sk[4]);
+		x2 = _mm_aesdec_si128(x2, sk[4]);
+		x3 = _mm_aesdec_si128(x3, sk[4]);
+		x0 = _mm_aesdec_si128(x0, sk[5]);
+		x1 = _mm_aesdec_si128(x1, sk[5]);
+		x2 = _mm_aesdec_si128(x2, sk[5]);
+		x3 = _mm_aesdec_si128(x3, sk[5]);
+		x0 = _mm_aesdec_si128(x0, sk[6]);
+		x1 = _mm_aesdec_si128(x1, sk[6]);
+		x2 = _mm_aesdec_si128(x2, sk[6]);
+		x3 = _mm_aesdec_si128(x3, sk[6]);
+		x0 = _mm_aesdec_si128(x0, sk[7]);
+		x1 = _mm_aesdec_si128(x1, sk[7]);
+		x2 = _mm_aesdec_si128(x2, sk[7]);
+		x3 = _mm_aesdec_si128(x3, sk[7]);
+		x0 = _mm_aesdec_si128(x0, sk[8]);
+		x1 = _mm_aesdec_si128(x1, sk[8]);
+		x2 = _mm_aesdec_si128(x2, sk[8]);
+		x3 = _mm_aesdec_si128(x3, sk[8]);
+		x0 = _mm_aesdec_si128(x0, sk[9]);
+		x1 = _mm_aesdec_si128(x1, sk[9]);
+		x2 = _mm_aesdec_si128(x2, sk[9]);
+		x3 = _mm_aesdec_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesdeclast_si128(x0, sk[10]);
+			x1 = _mm_aesdeclast_si128(x1, sk[10]);
+			x2 = _mm_aesdeclast_si128(x2, sk[10]);
+			x3 = _mm_aesdeclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesdec_si128(x0, sk[10]);
+			x1 = _mm_aesdec_si128(x1, sk[10]);
+			x2 = _mm_aesdec_si128(x2, sk[10]);
+			x3 = _mm_aesdec_si128(x3, sk[10]);
+			x0 = _mm_aesdec_si128(x0, sk[11]);
+			x1 = _mm_aesdec_si128(x1, sk[11]);
+			x2 = _mm_aesdec_si128(x2, sk[11]);
+			x3 = _mm_aesdec_si128(x3, sk[11]);
+			x0 = _mm_aesdeclast_si128(x0, sk[12]);
+			x1 = _mm_aesdeclast_si128(x1, sk[12]);
+			x2 = _mm_aesdeclast_si128(x2, sk[12]);
+			x3 = _mm_aesdeclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesdec_si128(x0, sk[10]);
+			x1 = _mm_aesdec_si128(x1, sk[10]);
+			x2 = _mm_aesdec_si128(x2, sk[10]);
+			x3 = _mm_aesdec_si128(x3, sk[10]);
+			x0 = _mm_aesdec_si128(x0, sk[11]);
+			x1 = _mm_aesdec_si128(x1, sk[11]);
+			x2 = _mm_aesdec_si128(x2, sk[11]);
+			x3 = _mm_aesdec_si128(x3, sk[11]);
+			x0 = _mm_aesdec_si128(x0, sk[12]);
+			x1 = _mm_aesdec_si128(x1, sk[12]);
+			x2 = _mm_aesdec_si128(x2, sk[12]);
+			x3 = _mm_aesdec_si128(x3, sk[12]);
+			x0 = _mm_aesdec_si128(x0, sk[13]);
+			x1 = _mm_aesdec_si128(x1, sk[13]);
+			x2 = _mm_aesdec_si128(x2, sk[13]);
+			x3 = _mm_aesdec_si128(x3, sk[13]);
+			x0 = _mm_aesdeclast_si128(x0, sk[14]);
+			x1 = _mm_aesdeclast_si128(x1, sk[14]);
+			x2 = _mm_aesdeclast_si128(x2, sk[14]);
+			x3 = _mm_aesdeclast_si128(x3, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, ivx);
+		x1 = _mm_xor_si128(x1, e0);
+		x2 = _mm_xor_si128(x2, e1);
+		x3 = _mm_xor_si128(x3, e2);
+		ivx = e3;
+		_mm_storeu_si128((void *)(buf +  0), x0);
+		if (len >= 64) {
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			if (len >= 32) {
+				_mm_storeu_si128((void *)(buf + 16), x1);
+				if (len >= 48) {
+					_mm_storeu_si128(
+						(void *)(buf + 32), x2);
+				}
+			}
+			break;
+		}
+	}
+	_mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = {
+	sizeof(br_aes_x86ni_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_x86ni_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_x86ni_cbcdec_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_cbcenc.c b/third_party/bearssl/src/aes_x86ni_cbcenc.c
new file mode 100644
index 0000000..85feecd
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_cbcenc.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_cbcenc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(iv);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		_mm_storeu_si128((void *)buf, x);
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = {
+	sizeof(br_aes_x86ni_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_x86ni_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_x86ni_cbcenc_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_ctr.c b/third_party/bearssl/src/aes_x86ni_ctr.c
new file mode 100644
index 0000000..1cddd60
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_ctr.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctr_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+uint32_t
+br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char ivbuf[16];
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx;
+	unsigned u;
+
+	buf = data;
+	memcpy(ivbuf, iv, 12);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	ivx = _mm_loadu_si128((void *)ivbuf);
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3);
+		x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3);
+		x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3);
+		x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3);
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+			cc += 4;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			cc += (uint32_t)len >> 4;
+			break;
+		}
+	}
+	return cc;
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_x86ni_ctr_vtable = {
+	sizeof(br_aes_x86ni_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_x86ni_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_x86ni_ctr_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_ctrcbc.c b/third_party/bearssl/src/aes_x86ni_ctrcbc.c
new file mode 100644
index 0000000..f57fead
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_ctrcbc.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx0, ivx1, ivx2, ivx3;
+	__m128i erev, zero, one, four, notthree;
+	unsigned u;
+
+	buf = data;
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+	four = _mm_set_epi64x(0, 4);
+	notthree = _mm_sub_epi64(zero, four);
+
+	/*
+	 * Decode the counter in big-endian and pre-increment the other
+	 * three counters.
+	 */
+	ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev);
+	ivx1 = _mm_add_epi64(ivx0, one);
+	ivx1 = _mm_sub_epi64(ivx1,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8));
+	ivx2 = _mm_add_epi64(ivx1, one);
+	ivx2 = _mm_sub_epi64(ivx2,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8));
+	ivx3 = _mm_add_epi64(ivx2, one);
+	ivx3 = _mm_sub_epi64(ivx3,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8));
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		/*
+		 * Load counter values; we need to byteswap them because
+		 * the specification says that they use big-endian.
+		 */
+		x0 = _mm_shuffle_epi8(ivx0, erev);
+		x1 = _mm_shuffle_epi8(ivx1, erev);
+		x2 = _mm_shuffle_epi8(ivx2, erev);
+		x3 = _mm_shuffle_epi8(ivx3, erev);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			switch (len) {
+			case 16:
+				ivx0 = ivx1;
+				break;
+			case 32:
+				ivx0 = ivx2;
+				break;
+			case 48:
+				ivx0 = ivx3;
+				break;
+			}
+			break;
+		}
+
+		/*
+		 * Add 4 to each counter value. For carry propagation
+		 * into the upper 64-bit words, we would need to compare
+		 * the results with 4, but SSE2+ has only _signed_
+		 * comparisons. Instead, we mask out the low two bits,
+		 * and check whether the remaining bits are zero.
+		 */
+		ivx0 = _mm_add_epi64(ivx0, four);
+		ivx1 = _mm_add_epi64(ivx1, four);
+		ivx2 = _mm_add_epi64(ivx2, four);
+		ivx3 = _mm_add_epi64(ivx3, four);
+		ivx0 = _mm_sub_epi64(ivx0,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx0, notthree), zero), 8));
+		ivx1 = _mm_sub_epi64(ivx1,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx1, notthree), zero), 8));
+		ivx2 = _mm_sub_epi64(ivx2,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx2, notthree), zero), 8));
+		ivx3 = _mm_sub_epi64(ivx3,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx3, notthree), zero), 8));
+	}
+
+	/*
+	 * Write back new counter value. The loop took care to put the
+	 * right counter value in ivx0.
+	 */
+	_mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev));
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(cbcmac);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(cbcmac, ivx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+	int first_iter;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = cmx;
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+
+		x0 = _mm_xor_si128(x0, dx);
+		if (first_iter) {
+			cmx = _mm_xor_si128(cmx, x0);
+			first_iter = 0;
+		} else {
+			cmx = _mm_xor_si128(x1, x0);
+		}
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			cmx = _mm_xor_si128(cmx, sk[0]);
+			cmx = _mm_aesenc_si128(cmx, sk[1]);
+			cmx = _mm_aesenc_si128(cmx, sk[2]);
+			cmx = _mm_aesenc_si128(cmx, sk[3]);
+			cmx = _mm_aesenc_si128(cmx, sk[4]);
+			cmx = _mm_aesenc_si128(cmx, sk[5]);
+			cmx = _mm_aesenc_si128(cmx, sk[6]);
+			cmx = _mm_aesenc_si128(cmx, sk[7]);
+			cmx = _mm_aesenc_si128(cmx, sk[8]);
+			cmx = _mm_aesenc_si128(cmx, sk[9]);
+			if (num_rounds == 10) {
+				cmx = _mm_aesenclast_si128(cmx, sk[10]);
+			} else if (num_rounds == 12) {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenclast_si128(cmx, sk[12]);
+			} else {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenc_si128(cmx, sk[12]);
+				cmx = _mm_aesenc_si128(cmx, sk[13]);
+				cmx = _mm_aesenclast_si128(cmx, sk[14]);
+			}
+			break;
+		}
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = _mm_xor_si128(cmx, dx);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, dx);
+		cmx = x1;
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = {
+	sizeof(br_aes_x86ni_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aesctr_drbg.c b/third_party/bearssl/src/aesctr_drbg.c
new file mode 100644
index 0000000..8dbd501
--- /dev/null
+++ b/third_party/bearssl/src/aesctr_drbg.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
+	const br_block_ctr_class *aesctr,
+	const void *seed, size_t len)
+{
+	unsigned char tmp[16];
+
+	ctx->vtable = &br_aesctr_drbg_vtable;
+	memset(tmp, 0, sizeof tmp);
+	aesctr->init(&ctx->sk.vtable, tmp, 16);
+	ctx->cc = 0;
+	br_aesctr_drbg_update(ctx, seed, len);
+}
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx, void *out, size_t len)
+{
+	unsigned char *buf;
+	unsigned char iv[12];
+
+	buf = out;
+	memset(iv, 0, sizeof iv);
+	while (len > 0) {
+		size_t clen;
+
+		/*
+		 * We generate data by blocks of at most 65280 bytes. This
+		 * allows for unambiguously testing the counter overflow
+		 * condition; also, it should work on 16-bit architectures
+		 * (where 'size_t' is 16 bits only).
+		 */
+		clen = len;
+		if (clen > 65280) {
+			clen = 65280;
+		}
+
+		/*
+		 * We make sure that the counter won't exceed the configured
+		 * limit.
+		 */
+		if ((uint32_t)(ctx->cc + ((clen + 15) >> 4)) > 32768) {
+			clen = (32768 - ctx->cc) << 4;
+			if (clen > len) {
+				clen = len;
+			}
+		}
+
+		/*
+		 * Run CTR.
+		 */
+		memset(buf, 0, clen);
+		ctx->cc = ctx->sk.vtable->run(&ctx->sk.vtable,
+			iv, ctx->cc, buf, clen);
+		buf += clen;
+		len -= clen;
+
+		/*
+		 * Every 32768 blocks, we force a state update.
+		 */
+		if (ctx->cc >= 32768) {
+			br_aesctr_drbg_update(ctx, NULL, 0);
+		}
+	}
+}
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_update(br_aesctr_drbg_context *ctx, const void *seed, size_t len)
+{
+	/*
+	 * We use a Hirose construction on AES-256 to make a hash function.
+	 * Function definition:
+	 *  - running state consists in two 16-byte blocks G and H
+	 *  - initial values of G and H are conventional
+	 *  - there is a fixed block-sized constant C
+	 *  - for next data block m:
+	 *      set AES key to H||m
+	 *      G' = E(G) xor G
+	 *      H' = E(G xor C) xor G xor C
+	 *      G <- G', H <- H'
+	 *  - once all blocks have been processed, output is H||G
+	 *
+	 * Constants:
+	 *   G_init = B6 B6 ... B6
+	 *   H_init = A5 A5 ... A5
+	 *   C      = 01 00 ... 00
+	 *
+	 * With this hash function h(), we compute the new state as
+	 * follows:
+	 *  - produce a state-dependent value s as encryption of an
+	 *    all-one block with AES and the current key
+	 *  - compute the new key as the first 128 bits of h(s||seed)
+	 *
+	 * Original Hirose article:
+	 *    https://www.iacr.org/archive/fse2006/40470213/40470213.pdf
+	 */
+
+	unsigned char s[16], iv[12];
+	unsigned char G[16], H[16];
+	int first;
+
+	/*
+	 * Use an all-one IV to get a fresh output block that depends on the
+	 * current seed.
+	 */
+	memset(iv, 0xFF, sizeof iv);
+	memset(s, 0, 16);
+	ctx->sk.vtable->run(&ctx->sk.vtable, iv, 0xFFFFFFFF, s, 16);
+
+	/*
+	 * Set G[] and H[] to conventional start values.
+	 */
+	memset(G, 0xB6, sizeof G);
+	memset(H, 0x5A, sizeof H);
+
+	/*
+	 * Process the concatenation of the current state and the seed
+	 * with the custom hash function.
+	 */
+	first = 1;
+	for (;;) {
+		unsigned char tmp[32];
+		unsigned char newG[16];
+
+		/*
+		 * Assemble new key H||m into tmp[].
+		 */
+		memcpy(tmp, H, 16);
+		if (first) {
+			memcpy(tmp + 16, s, 16);
+			first = 0;
+		} else {
+			size_t clen;
+
+			if (len == 0) {
+				break;
+			}
+			clen = len < 16 ? len : 16;
+			memcpy(tmp + 16, seed, clen);
+			memset(tmp + 16 + clen, 0, 16 - clen);
+			seed = (const unsigned char *)seed + clen;
+			len -= clen;
+		}
+		ctx->sk.vtable->init(&ctx->sk.vtable, tmp, 32);
+
+		/*
+		 * Compute new G and H values.
+		 */
+		memcpy(iv, G, 12);
+		memcpy(newG, G, 16);
+		ctx->sk.vtable->run(&ctx->sk.vtable, iv,
+			br_dec32be(G + 12), newG, 16);
+		iv[0] ^= 0x01;
+		memcpy(H, G, 16);
+		H[0] ^= 0x01;
+		ctx->sk.vtable->run(&ctx->sk.vtable, iv,
+			br_dec32be(G + 12), H, 16);
+		memcpy(G, newG, 16);
+	}
+
+	/*
+	 * Output hash value is H||G. We truncate it to its first 128 bits,
+	 * i.e. H; that's our new AES key.
+	 */
+	ctx->sk.vtable->init(&ctx->sk.vtable, H, 16);
+	ctx->cc = 0;
+}
+
+/* see bearssl_rand.h */
+const br_prng_class br_aesctr_drbg_vtable = {
+	sizeof(br_aesctr_drbg_context),
+	(void (*)(const br_prng_class **, const void *, const void *, size_t))
+		&br_aesctr_drbg_init,
+	(void (*)(const br_prng_class **, void *, size_t))
+		&br_aesctr_drbg_generate,
+	(void (*)(const br_prng_class **, const void *, size_t))
+		&br_aesctr_drbg_update
+};
diff --git a/third_party/bearssl/src/asn1enc.c b/third_party/bearssl/src/asn1enc.c
new file mode 100644
index 0000000..7a74963
--- /dev/null
+++ b/third_party/bearssl/src/asn1enc.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+br_asn1_uint
+br_asn1_uint_prepare(const void *xdata, size_t xlen)
+{
+	const unsigned char *x;
+	br_asn1_uint t;
+
+	x = xdata;
+	while (xlen > 0 && *x == 0) {
+		x ++;
+		xlen --;
+	}
+	t.data = x;
+	t.len = xlen;
+	t.asn1len = xlen;
+	if (xlen == 0 || x[0] >= 0x80) {
+		t.asn1len ++;
+	}
+	return t;
+}
+
+/* see inner.h */
+size_t
+br_asn1_encode_length(void *dest, size_t len)
+{
+	unsigned char *buf;
+	size_t z;
+	int i, j;
+
+	buf = dest;
+	if (len < 0x80) {
+		if (buf != NULL) {
+			*buf = len;
+		}
+		return 1;
+	}
+	i = 0;
+	for (z = len; z != 0; z >>= 8) {
+		i ++;
+	}
+	if (buf != NULL) {
+		*buf ++ = 0x80 + i;
+		for (j = i - 1; j >= 0; j --) {
+			*buf ++ = len >> (j << 3);
+		}
+	}
+	return i + 1;
+}
+
+/* see inner.h */
+size_t
+br_asn1_encode_uint(void *dest, br_asn1_uint pp)
+{
+	unsigned char *buf;
+	size_t lenlen;
+
+	if (dest == NULL) {
+		return 1 + br_asn1_encode_length(NULL, pp.asn1len) + pp.asn1len;
+	}
+	buf = dest;
+	*buf ++ = 0x02;
+	lenlen = br_asn1_encode_length(buf, pp.asn1len);
+	buf += lenlen;
+	*buf = 0x00;
+	memcpy(buf + pp.asn1len - pp.len, pp.data, pp.len);
+	return 1 + lenlen + pp.asn1len;
+}
diff --git a/third_party/bearssl/src/ccm.c b/third_party/bearssl/src/ccm.c
new file mode 100644
index 0000000..68cc913
--- /dev/null
+++ b/third_party/bearssl/src/ccm.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary.
+ *
+ *  - 'ptr' contains a value from 0 to 15, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current CBC-MAC computation.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the plaintext bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available. When 'ptr' is 0, the
+ *    contents of buf[] are to be ignored.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ */
+
+/* see bearssl_block.h */
+void
+br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	ctx->bctx = bctx;
+}
+
+/* see bearssl_block.h */
+int
+br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
+	uint64_t aad_len, uint64_t data_len, size_t tag_len)
+{
+	unsigned char tmp[16];
+	unsigned u, q;
+
+	if (nonce_len < 7 || nonce_len > 13) {
+		return 0;
+	}
+	if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) {
+		return 0;
+	}
+	q = 15 - (unsigned)nonce_len;
+	ctx->tag_len = tag_len;
+
+	/*
+	 * Block B0, to start CBC-MAC.
+	 */
+	tmp[0] = (aad_len > 0 ? 0x40 : 0x00)
+		| (((unsigned)tag_len - 2) << 2)
+		| (q - 1);
+	memcpy(tmp + 1, nonce, nonce_len);
+	for (u = 0; u < q; u ++) {
+		tmp[15 - u] = (unsigned char)data_len;
+		data_len >>= 8;
+	}
+	if (data_len != 0) {
+		/*
+		 * If the data length was not entirely consumed in the
+		 * loop above, then it exceeds the maximum limit of
+		 * q bytes (when encoded).
+		 */
+		return 0;
+	}
+
+	/*
+	 * Start CBC-MAC.
+	 */
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp);
+
+	/*
+	 * Assemble AAD length header.
+	 */
+	if ((aad_len >> 32) != 0) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFF;
+		br_enc64be(ctx->buf + 2, aad_len);
+		ctx->ptr = 10;
+	} else if (aad_len >= 0xFF00) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFE;
+		br_enc32be(ctx->buf + 2, (uint32_t)aad_len);
+		ctx->ptr = 6;
+	} else if (aad_len > 0) {
+		br_enc16be(ctx->buf, (unsigned)aad_len);
+		ctx->ptr = 2;
+	} else {
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Make initial counter value and compute tag mask.
+	 */
+	ctx->ctr[0] = q - 1;
+	memcpy(ctx->ctr + 1, nonce, nonce_len);
+	memset(ctx->ctr + 1 + nonce_len, 0, q);
+	memset(ctx->tagmask, 0, sizeof ctx->tagmask);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+		ctx->tagmask, sizeof ctx->tagmask);
+
+	return 1;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete partial block, if needed.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			memcpy(ctx->buf + ptr, dbuf, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, dbuf, clen);
+		dbuf += clen;
+		len -= clen;
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process complete blocks.
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len);
+	dbuf += len;
+
+	/*
+	 * Copy last partial block in the context buffer.
+	 */
+	memcpy(ctx->buf, dbuf, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_flip(br_ccm_context *ctx)
+{
+	size_t ptr;
+
+	/*
+	 * Complete AAD partial block with zeros, if necessary.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Counter was already set by br_ccm_reset().
+	 */
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete a partial block, if any: ctx->buf[] contains
+	 * ctx->ptr plaintext bytes (already reported), and the other
+	 * bytes are CTR stream output.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+		size_t u;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[ptr + u];
+				x = dbuf[u];
+				ctx->buf[ptr + u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned w;
+
+				w = ctx->buf[ptr + u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[ptr + u] = w;
+			}
+		}
+		dbuf += clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr < sizeof ctx->buf) {
+			ctx->ptr = ptr;
+			return;
+		}
+		(*ctx->bctx)->mac(ctx->bctx,
+			ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process all complete blocks. Note that the ctrcbc API is for
+	 * encrypt-then-MAC (CBC-MAC is computed over the encrypted
+	 * blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed
+	 * over the plaintext blocks). Therefore, we need to use the
+	 * _decryption_ function for encryption, and the encryption
+	 * function for decryption (this works because CTR encryption
+	 * and decryption are identical, so the choice really is about
+	 * computing the CBC-MAC before or after XORing with the CTR
+	 * stream).
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	if (encrypt) {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * If there is some remaining data, then we need to compute an
+	 * extra block of CTR stream.
+	 */
+	if (ptr != 0) {
+		size_t u;
+
+		memset(ctx->buf, 0, sizeof ctx->buf);
+		(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+			ctx->buf, sizeof ctx->buf);
+		if (encrypt) {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[u];
+				x = dbuf[u];
+				ctx->buf[u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w;
+
+				w = ctx->buf[u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[u] = w;
+			}
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+size_t
+br_ccm_get_tag(br_ccm_context *ctx, void *tag)
+{
+	size_t ptr;
+	size_t u;
+
+	/*
+	 * If there is some buffered data, then we need to pad it with
+	 * zeros and finish up CBC-MAC.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * XOR the tag mask into the CBC-MAC output.
+	 */
+	for (u = 0; u < ctx->tag_len; u ++) {
+		ctx->cbcmac[u] ^= ctx->tagmask[u];
+	}
+	memcpy(tag, ctx->cbcmac, ctx->tag_len);
+	return ctx->tag_len;
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_ccm_check_tag(br_ccm_context *ctx, const void *tag)
+{
+	unsigned char tmp[16];
+	size_t u, tag_len;
+	uint32_t z;
+
+	tag_len = br_ccm_get_tag(ctx, tmp);
+	z = 0;
+	for (u = 0; u < tag_len; u ++) {
+		z |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(z);
+}
diff --git a/third_party/bearssl/src/ccopy.c b/third_party/bearssl/src/ccopy.c
new file mode 100644
index 0000000..2beace7
--- /dev/null
+++ b/third_party/bearssl/src/ccopy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		uint32_t x, y;
+
+		x = *s ++;
+		y = *d;
+		*d = MUX(ctl, x, y);
+		d ++;
+	}
+}
diff --git a/third_party/bearssl/src/chacha20_ct.c b/third_party/bearssl/src/chacha20_ct.c
new file mode 100644
index 0000000..9961eb1
--- /dev/null
+++ b/third_party/bearssl/src/chacha20_ct.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+uint32_t
+br_chacha20_ct_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t kw[8], ivw[3];
+	size_t u;
+
+	static const uint32_t CW[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+	};
+
+	buf = data;
+	for (u = 0; u < 8; u ++) {
+		kw[u] = br_dec32le((const unsigned char *)key + (u << 2));
+	}
+	for (u = 0; u < 3; u ++) {
+		ivw[u] = br_dec32le((const unsigned char *)iv + (u << 2));
+	}
+	while (len > 0) {
+		uint32_t state[16];
+		int i;
+		size_t clen;
+		unsigned char tmp[64];
+
+		memcpy(&state[0], CW, sizeof CW);
+		memcpy(&state[4], kw, sizeof kw);
+		state[12] = cc;
+		memcpy(&state[13], ivw, sizeof ivw);
+		for (i = 0; i < 10; i ++) {
+
+#define QROUND(a, b, c, d)   do { \
+		state[a] += state[b]; \
+		state[d] ^= state[a]; \
+		state[d] = (state[d] << 16) | (state[d] >> 16); \
+		state[c] += state[d]; \
+		state[b] ^= state[c]; \
+		state[b] = (state[b] << 12) | (state[b] >> 20); \
+		state[a] += state[b]; \
+		state[d] ^= state[a]; \
+		state[d] = (state[d] <<  8) | (state[d] >> 24); \
+		state[c] += state[d]; \
+		state[b] ^= state[c]; \
+		state[b] = (state[b] <<  7) | (state[b] >> 25); \
+	} while (0)
+
+			QROUND( 0,  4,  8, 12);
+			QROUND( 1,  5,  9, 13);
+			QROUND( 2,  6, 10, 14);
+			QROUND( 3,  7, 11, 15);
+			QROUND( 0,  5, 10, 15);
+			QROUND( 1,  6, 11, 12);
+			QROUND( 2,  7,  8, 13);
+			QROUND( 3,  4,  9, 14);
+
+#undef QROUND
+
+		}
+		for (u = 0; u < 4; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + CW[u]);
+		}
+		for (u = 4; u < 12; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + kw[u - 4]);
+		}
+		br_enc32le(&tmp[48], state[12] + cc);
+		for (u = 13; u < 16; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + ivw[u - 13]);
+		}
+
+		clen = len < 64 ? len : 64;
+		for (u = 0; u < clen; u ++) {
+			buf[u] ^= tmp[u];
+		}
+		buf += clen;
+		len -= clen;
+		cc ++;
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/chacha20_sse2.c b/third_party/bearssl/src/chacha20_sse2.c
new file mode 100644
index 0000000..92b4a4a
--- /dev/null
+++ b/third_party/bearssl/src/chacha20_sse2.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_SSE2
+
+/*
+ * This file contains a ChaCha20 implementation that leverages SSE2
+ * opcodes for better performance.
+ */
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+	/*
+	 * If using 64-bit mode, then SSE2 opcodes should be automatically
+	 * available, since they are part of the ABI.
+	 *
+	 * In 32-bit mode, we use CPUID to detect the SSE2 feature.
+	 */
+
+#if BR_amd64
+	return &br_chacha20_sse2_run;
+#else
+
+	/*
+	 * SSE2 support is indicated by bit 26 in EDX.
+	 */
+	if (br_cpuid(0, 0, 0, 0x04000000)) {
+		return &br_chacha20_sse2_run;
+	} else {
+		return 0;
+	}
+#endif
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2")
+uint32_t
+br_chacha20_sse2_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t ivtmp[4];
+	__m128i kw0, kw1;
+	__m128i iw, cw;
+	__m128i one;
+
+	static const uint32_t CW[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+	};
+
+	buf = data;
+	kw0 = _mm_loadu_si128(key);
+	kw1 = _mm_loadu_si128((const void *)((const unsigned char *)key + 16));
+	ivtmp[0] = cc;
+	memcpy(ivtmp + 1, iv, 12);
+	iw = _mm_loadu_si128((const void *)ivtmp);
+	cw = _mm_loadu_si128((const void *)CW);
+	one = _mm_set_epi32(0, 0, 0, 1);
+
+	while (len > 0) {
+		/*
+		 * sj contains state words 4*j to 4*j+3.
+		 */
+		__m128i s0, s1, s2, s3;
+		int i;
+
+		s0 = cw;
+		s1 = kw0;
+		s2 = kw1;
+		s3 = iw;
+		for (i = 0; i < 10; i ++) {
+			/*
+			 * Even round is straightforward application on
+			 * the state words.
+			 */
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 16),
+				_mm_srli_epi32(s3, 16));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 12),
+				_mm_srli_epi32(s1, 20));
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 8),
+				_mm_srli_epi32(s3, 24));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 7),
+				_mm_srli_epi32(s1, 25));
+
+			/*
+			 * For the odd round, we must rotate some state
+			 * words so that the computations apply on the
+			 * right combinations of words.
+			 */
+			s1 = _mm_shuffle_epi32(s1, 0x39);
+			s2 = _mm_shuffle_epi32(s2, 0x4E);
+			s3 = _mm_shuffle_epi32(s3, 0x93);
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 16),
+				_mm_srli_epi32(s3, 16));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 12),
+				_mm_srli_epi32(s1, 20));
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 8),
+				_mm_srli_epi32(s3, 24));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 7),
+				_mm_srli_epi32(s1, 25));
+
+			/*
+			 * After the odd round, we rotate back the values
+			 * to undo the rotate at the start of the odd round.
+			 */
+			s1 = _mm_shuffle_epi32(s1, 0x93);
+			s2 = _mm_shuffle_epi32(s2, 0x4E);
+			s3 = _mm_shuffle_epi32(s3, 0x39);
+		}
+
+		/*
+		 * Addition with the initial state.
+		 */
+		s0 = _mm_add_epi32(s0, cw);
+		s1 = _mm_add_epi32(s1, kw0);
+		s2 = _mm_add_epi32(s2, kw1);
+		s3 = _mm_add_epi32(s3, iw);
+
+		/*
+		 * Increment block counter.
+		 */
+		iw = _mm_add_epi32(iw, one);
+
+		/*
+		 * XOR final state with the data.
+		 */
+		if (len < 64) {
+			unsigned char tmp[64];
+			size_t u;
+
+			_mm_storeu_si128((void *)(tmp +  0), s0);
+			_mm_storeu_si128((void *)(tmp + 16), s1);
+			_mm_storeu_si128((void *)(tmp + 32), s2);
+			_mm_storeu_si128((void *)(tmp + 48), s3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			break;
+		} else {
+			__m128i b0, b1, b2, b3;
+
+			b0 = _mm_loadu_si128((const void *)(buf +  0));
+			b1 = _mm_loadu_si128((const void *)(buf + 16));
+			b2 = _mm_loadu_si128((const void *)(buf + 32));
+			b3 = _mm_loadu_si128((const void *)(buf + 48));
+			b0 = _mm_xor_si128(b0, s0);
+			b1 = _mm_xor_si128(b1, s1);
+			b2 = _mm_xor_si128(b2, s2);
+			b3 = _mm_xor_si128(b3, s3);
+			_mm_storeu_si128((void *)(buf +  0), b0);
+			_mm_storeu_si128((void *)(buf + 16), b1);
+			_mm_storeu_si128((void *)(buf + 32), b2);
+			_mm_storeu_si128((void *)(buf + 48), b3);
+			buf += 64;
+			len -= 64;
+		}
+	}
+
+	/*
+	 * _mm_extract_epi32() requires SSE4.1. We prefer to stick to
+	 * raw SSE2, thus we use _mm_extract_epi16().
+	 */
+	return (uint32_t)_mm_extract_epi16(iw, 0)
+		| ((uint32_t)_mm_extract_epi16(iw, 1) << 16);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/config.h b/third_party/bearssl/src/config.h
new file mode 100644
index 0000000..edf5d25
--- /dev/null
+++ b/third_party/bearssl/src/config.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CONFIG_H__
+#define CONFIG_H__
+
+/*
+ * This file contains compile-time flags that can override the
+ * autodetection performed in relevant files. Each flag is a macro; it
+ * deactivates the feature if defined to 0, activates it if defined to a
+ * non-zero integer (normally 1). If the macro is not defined, then
+ * autodetection applies.
+ */
+ 
+/* The x86 intrinsics seem to be incomplete compared to what aes_x86ni expects when compiling with NXDK */
+#ifdef NXDK
+#define BR_AES_X86NI 0
+#define BR_ENABLE_INTRINSICS 0
+#define BR_SSE2 0
+#define BR_RDRAND 0
+#undef _WIN32
+#endif
+
+/*
+ * When BR_64 is enabled, 64-bit integer types are assumed to be
+ * efficient (i.e. the architecture has 64-bit registers and can
+ * do 64-bit operations as fast as 32-bit operations).
+ *
+#define BR_64   1
+ */
+
+/*
+ * When BR_LOMUL is enabled, then multiplications of 32-bit values whose
+ * result are truncated to the low 32 bits are assumed to be
+ * substantially more efficient than 32-bit multiplications that yield
+ * 64-bit results. This is typically the case on low-end ARM Cortex M
+ * systems (M0, M0+, M1, and arguably M3 and M4 as well).
+ *
+#define BR_LOMUL   1
+ */
+
+/*
+ * When BR_SLOW_MUL is enabled, multiplications are assumed to be
+ * substantially slow with regards to other integer operations, thus
+ * making it worth to make more operations for a given task if it allows
+ * using less multiplications.
+ *
+#define BR_SLOW_MUL   1
+ */
+
+/*
+ * When BR_SLOW_MUL15 is enabled, short multplications (on 15-bit words)
+ * are assumed to be substantially slow with regards to other integer
+ * operations, thus making it worth to make more integer operations if
+ * it allows using less multiplications.
+ *
+#define BR_SLOW_MUL15   1
+ */
+
+/*
+ * When BR_CT_MUL31 is enabled, multiplications of 31-bit values (used
+ * in the "i31" big integer implementation) use an alternate implementation
+ * which is slower and larger than the normal multiplication, but should
+ * ensure constant-time multiplications even on architectures where the
+ * multiplication opcode takes a variable number of cycles to complete.
+ *
+#define BR_CT_MUL31   1
+ */
+
+/*
+ * When BR_CT_MUL15 is enabled, multiplications of 15-bit values (held
+ * in 32-bit words) use an alternate implementation which is slower and
+ * larger than the normal multiplication, but should ensure
+ * constant-time multiplications on most/all architectures where the
+ * basic multiplication is not constant-time.
+#define BR_CT_MUL15   1
+ */
+
+/*
+ * When BR_NO_ARITH_SHIFT is enabled, arithmetic right shifts (with sign
+ * extension) are performed with a sequence of operations which is bigger
+ * and slower than a simple right shift on a signed value. This avoids
+ * relying on an implementation-defined behaviour. However, most if not
+ * all C compilers use sign extension for right shifts on signed values,
+ * so this alternate macro is disabled by default.
+#define BR_NO_ARITH_SHIFT   1
+ */
+
+/*
+ * When BR_RDRAND is enabled, the SSL engine will use the RDRAND opcode
+ * to automatically obtain quality randomness for seeding its internal
+ * PRNG. Since that opcode is present only in recent x86 CPU, its
+ * support is dynamically tested; if the current CPU does not support
+ * it, then another random source will be used, such as /dev/urandom or
+ * CryptGenRandom().
+ *
+#define BR_RDRAND   1
+ */
+
+/*
+ * When BR_USE_GETENTROPY is enabled, the SSL engine will use the
+ * getentropy() function to obtain quality randomness for seeding its
+ * internal PRNG. On Linux and FreeBSD, getentropy() is implemented by
+ * the standard library with the system call getrandom(); on OpenBSD,
+ * getentropy() is the system call, and there is no getrandom() wrapper,
+ * hence the use of the getentropy() function for maximum portability.
+ *
+ * If the getentropy() call fails, and BR_USE_URANDOM is not explicitly
+ * disabled, then /dev/urandom will be used as a fallback mechanism. On
+ * FreeBSD and OpenBSD, this does not change much, since /dev/urandom
+ * will block if not enough entropy has been obtained since last boot.
+ * On Linux, /dev/urandom might not block, which can be troublesome in
+ * early boot stages, which is why getentropy() is preferred.
+ *
+#define BR_USE_GETENTROPY   1
+ */
+
+/*
+ * When BR_USE_URANDOM is enabled, the SSL engine will use /dev/urandom
+ * to automatically obtain quality randomness for seeding its internal
+ * PRNG.
+ *
+#define BR_USE_URANDOM   1
+ */
+
+/*
+ * When BR_USE_WIN32_RAND is enabled, the SSL engine will use the Win32
+ * (CryptoAPI) functions (CryptAcquireContext(), CryptGenRandom()...) to
+ * automatically obtain quality randomness for seeding its internal PRNG.
+ *
+ * Note: if both BR_USE_URANDOM and BR_USE_WIN32_RAND are defined, the
+ * former takes precedence.
+ *
+#define BR_USE_WIN32_RAND   1
+ */
+
+/*
+ * When BR_ARMEL_CORTEXM_GCC is enabled, some operations are replaced with
+ * inline assembly which is shorter and/or faster. This should be used
+ * only when all of the following are true:
+ *   - target architecture is ARM in Thumb mode
+ *   - target endianness is little-endian
+ *   - compiler is GCC (or GCC-compatible for inline assembly syntax)
+ *
+ * This is meant for the low-end cores (Cortex M0, M0+, M1, M3).
+ * Note: if BR_LOMUL is not explicitly enabled or disabled, then
+ * enabling BR_ARMEL_CORTEXM_GCC also enables BR_LOMUL.
+ *
+#define BR_ARMEL_CORTEXM_GCC   1
+ */
+
+/*
+ * When BR_AES_X86NI is enabled, the AES implementation using the x86 "NI"
+ * instructions (dedicated AES opcodes) will be compiled. If this is not
+ * enabled explicitly, then that AES implementation will be compiled only
+ * if a compatible compiler is detected. If set explicitly to 0, the
+ * implementation will not be compiled at all.
+ *
+#define BR_AES_X86NI   1
+ */
+
+/*
+ * When BR_SSE2 is enabled, SSE2 intrinsics will be used for some
+ * algorithm implementations that use them (e.g. chacha20_sse2). If this
+ * is not enabled explicitly, then support for SSE2 intrinsics will be
+ * automatically detected. If set explicitly to 0, then SSE2 code will
+ * not be compiled at all.
+ *
+#define BR_SSE2   1
+ */
+
+/*
+ * When BR_POWER8 is enabled, the AES implementation using the POWER ISA
+ * 2.07 opcodes (available on POWER8 processors and later) is compiled.
+ * If this is not enabled explicitly, then that implementation will be
+ * compiled only if a compatible compiler is detected, _and_ the target
+ * architecture is POWER8 or later.
+ *
+#define BR_POWER8   1
+ */
+
+/*
+ * When BR_INT128 is enabled, then code using the 'unsigned __int64'
+ * and 'unsigned __int128' types will be used to leverage 64x64->128
+ * unsigned multiplications. This should work with GCC and compatible
+ * compilers on 64-bit architectures.
+ *
+#define BR_INT128   1
+ */
+
+/*
+ * When BR_UMUL128 is enabled, then code using the '_umul128()' and
+ * '_addcarry_u64()' intrinsics will be used to implement 64x64->128
+ * unsigned multiplications. This should work on Visual C on x64 systems.
+ *
+#define BR_UMUL128   1
+ */
+
+/*
+ * When BR_LE_UNALIGNED is enabled, then the current architecture is
+ * assumed to use little-endian encoding for integers, and to tolerate
+ * unaligned accesses with no or minimal time penalty.
+ *
+#define BR_LE_UNALIGNED   1
+ */
+
+/*
+ * When BR_BE_UNALIGNED is enabled, then the current architecture is
+ * assumed to use big-endian encoding for integers, and to tolerate
+ * unaligned accesses with no or minimal time penalty.
+ *
+#define BR_BE_UNALIGNED   1
+ */
+
+#endif
diff --git a/third_party/bearssl/src/dec16be.c b/third_party/bearssl/src/dec16be.c
new file mode 100644
index 0000000..4f3f7f4
--- /dev/null
+++ b/third_party/bearssl/src/dec16be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec16be(uint16_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec16be(buf);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/dec16le.c b/third_party/bearssl/src/dec16le.c
new file mode 100644
index 0000000..84d8536
--- /dev/null
+++ b/third_party/bearssl/src/dec16le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec16le(uint16_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec16le(buf);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/dec32be.c b/third_party/bearssl/src/dec32be.c
new file mode 100644
index 0000000..5a8fc59
--- /dev/null
+++ b/third_party/bearssl/src/dec32be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec32be(uint32_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec32be(buf);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/dec32le.c b/third_party/bearssl/src/dec32le.c
new file mode 100644
index 0000000..ed36e71
--- /dev/null
+++ b/third_party/bearssl/src/dec32le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec32le(uint32_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec32le(buf);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/dec64be.c b/third_party/bearssl/src/dec64be.c
new file mode 100644
index 0000000..0c40a76
--- /dev/null
+++ b/third_party/bearssl/src/dec64be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec64be(uint64_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec64be(buf);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/dec64le.c b/third_party/bearssl/src/dec64le.c
new file mode 100644
index 0000000..cbd02c2
--- /dev/null
+++ b/third_party/bearssl/src/dec64le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec64le(uint64_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec64le(buf);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/des_ct.c b/third_party/bearssl/src/des_ct.c
new file mode 100644
index 0000000..581c0ab
--- /dev/null
+++ b/third_party/bearssl/src/des_ct.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * During key schedule, we need to apply bit extraction PC-2 then permute
+ * things into our bitslice representation. PC-2 extracts 48 bits out
+ * of two 28-bit words (kl and kr), and we store these bits into two
+ * 32-bit words sk0 and sk1.
+ *
+ *  -- bit 16+x of sk0 comes from bit QL0[x] of kl
+ *  -- bit x of sk0 comes from bit QR0[x] of kr
+ *  -- bit 16+x of sk1 comes from bit QL1[x] of kl
+ *  -- bit x of sk1 comes from bit QR1[x] of kr
+ */
+
+static const unsigned char QL0[] = {
+	17,  4, 27, 23, 13, 22,  7, 18,
+	16, 24,  2, 20,  1,  8, 15, 26
+};
+
+static const unsigned char QR0[] = {
+	25, 19,  9,  1,  5, 11, 23,  8,
+	17,  0, 22,  3,  6, 20, 27, 24
+};
+
+static const unsigned char QL1[] = {
+	28, 28, 14, 11, 28, 28, 25,  0,
+	28, 28,  5,  9, 28, 28, 12, 21
+};
+
+static const unsigned char QR1[] = {
+	28, 28, 15,  4, 28, 28, 26, 16,
+	28, 28, 12,  7, 28, 28, 10, 14
+};
+
+/*
+ * 32-bit rotation. The C compiler is supposed to recognize it as a
+ * rotation and use the local architecture rotation opcode (if available).
+ */
+static inline uint32_t
+rotl(uint32_t x, int n)
+{
+	return (x << n) | (x >> (32 - n));
+}
+
+/*
+ * Compute key schedule for 8 key bytes (produces 32 subkey words).
+ */
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+	int i;
+
+	br_des_keysched_unit(skey, key);
+
+	/*
+	 * Apply PC-2 + bitslicing.
+	 */
+	for (i = 0; i < 16; i ++) {
+		uint32_t kl, kr, sk0, sk1;
+		int j;
+
+		kl = skey[(i << 1) + 0];
+		kr = skey[(i << 1) + 1];
+		sk0 = 0;
+		sk1 = 0;
+		for (j = 0; j < 16; j ++) {
+			sk0 <<= 1;
+			sk1 <<= 1;
+			sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16;
+			sk0 |= (kr >> QR0[j]) & (uint32_t)1;
+			sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16;
+			sk1 |= (kr >> QR1[j]) & (uint32_t)1;
+		}
+
+		skey[(i << 1) + 0] = sk0;
+		skey[(i << 1) + 1] = sk1;
+	}
+
+#if 0
+		/*
+		 * Speed-optimized version for PC-2 + bitslicing.
+		 * (Unused. Kept for reference only.)
+		 */
+		sk0 = kl & (uint32_t)0x00100000;
+		sk0 |= (kl & (uint32_t)0x08008000) << 2;
+		sk0 |= (kl & (uint32_t)0x00400000) << 4;
+		sk0 |= (kl & (uint32_t)0x00800000) << 5;
+		sk0 |= (kl & (uint32_t)0x00040000) << 6;
+		sk0 |= (kl & (uint32_t)0x00010000) << 7;
+		sk0 |= (kl & (uint32_t)0x00000100) << 10;
+		sk0 |= (kl & (uint32_t)0x00022000) << 14;
+		sk0 |= (kl & (uint32_t)0x00000082) << 18;
+		sk0 |= (kl & (uint32_t)0x00000004) << 19;
+		sk0 |= (kl & (uint32_t)0x04000000) >> 10;
+		sk0 |= (kl & (uint32_t)0x00000010) << 26;
+		sk0 |= (kl & (uint32_t)0x01000000) >> 2;
+
+		sk0 |= kr & (uint32_t)0x00000100;
+		sk0 |= (kr & (uint32_t)0x00000008) << 1;
+		sk0 |= (kr & (uint32_t)0x00000200) << 4;
+		sk0 |= rotl(kr & (uint32_t)0x08000021, 6);
+		sk0 |= (kr & (uint32_t)0x01000000) >> 24;
+		sk0 |= (kr & (uint32_t)0x00000002) << 11;
+		sk0 |= (kr & (uint32_t)0x00100000) >> 18;
+		sk0 |= (kr & (uint32_t)0x00400000) >> 17;
+		sk0 |= (kr & (uint32_t)0x00800000) >> 14;
+		sk0 |= (kr & (uint32_t)0x02020000) >> 10;
+		sk0 |= (kr & (uint32_t)0x00080000) >> 5;
+		sk0 |= (kr & (uint32_t)0x00000040) >> 3;
+		sk0 |= (kr & (uint32_t)0x00000800) >> 1;
+
+		sk1 = kl & (uint32_t)0x02000000;
+		sk1 |= (kl & (uint32_t)0x00001000) << 5;
+		sk1 |= (kl & (uint32_t)0x00000200) << 11;
+		sk1 |= (kl & (uint32_t)0x00004000) << 15;
+		sk1 |= (kl & (uint32_t)0x00000020) << 16;
+		sk1 |= (kl & (uint32_t)0x00000800) << 17;
+		sk1 |= (kl & (uint32_t)0x00000001) << 24;
+		sk1 |= (kl & (uint32_t)0x00200000) >> 5;
+
+		sk1 |= (kr & (uint32_t)0x00000010) << 8;
+		sk1 |= (kr & (uint32_t)0x04000000) >> 17;
+		sk1 |= (kr & (uint32_t)0x00004000) >> 14;
+		sk1 |= (kr & (uint32_t)0x00000400) >> 9;
+		sk1 |= (kr & (uint32_t)0x00010000) >> 8;
+		sk1 |= (kr & (uint32_t)0x00001000) >> 7;
+		sk1 |= (kr & (uint32_t)0x00000080) >> 3;
+		sk1 |= (kr & (uint32_t)0x00008000) >> 2;
+#endif
+}
+
+/* see inner.h */
+unsigned
+br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	switch (key_len) {
+	case 8:
+		keysched_unit(skey, key);
+		return 1;
+	case 16:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		memcpy(skey + 64, skey, 32 * sizeof *skey);
+		return 3;
+	default:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		keysched_unit(skey + 64, (const unsigned char *)key + 16);
+		return 3;
+	}
+}
+
+/*
+ * DES confusion function. This function performs expansion E (32 to
+ * 48 bits), XOR with subkey, S-boxes, and permutation P.
+ */
+static inline uint32_t
+Fconf(uint32_t r0, const uint32_t *sk)
+{
+	/*
+	 * Each 6->4 S-box is virtually turned into four 6->1 boxes; we
+	 * thus end up with 32 boxes that we call "T-boxes" here. We will
+	 * evaluate them with bitslice code.
+	 *
+	 * Each T-box is a circuit of multiplexers (sort of) and thus
+	 * takes 70 inputs: the 6 actual T-box inputs, and 64 constants
+	 * that describe the T-box output for all combinations of the
+	 * 6 inputs. With this model, all T-boxes are identical (with
+	 * distinct inputs) and thus can be executed in parallel with
+	 * bitslice code.
+	 *
+	 * T-boxes are numbered from 0 to 31, in least-to-most
+	 * significant order. Thus, S-box S1 corresponds to T-boxes 31,
+	 * 30, 29 and 28, in that order. T-box 'n' is computed with the
+	 * bits at rank 'n' in the 32-bit words.
+	 *
+	 * Words x0 to x5 contain the T-box inputs 0 to 5.
+	 */
+	uint32_t x0, x1, x2, x3, x4, x5, z0;
+	uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
+	uint32_t y30;
+
+	/*
+	 * Spread input bits over the 6 input words x*.
+	 */
+	x1 = r0 & (uint32_t)0x11111111;
+	x2 = (r0 >> 1) & (uint32_t)0x11111111;
+	x3 = (r0 >> 2) & (uint32_t)0x11111111;
+	x4 = (r0 >> 3) & (uint32_t)0x11111111;
+	x1 = (x1 << 4) - x1;
+	x2 = (x2 << 4) - x2;
+	x3 = (x3 << 4) - x3;
+	x4 = (x4 << 4) - x4;
+	x0 = (x4 << 4) | (x4 >> 28);
+	x5 = (x1 >> 4) | (x1 << 28);
+
+	/*
+	 * XOR with the subkey for this round.
+	 */
+	x0 ^= sk[0];
+	x1 ^= sk[1];
+	x2 ^= sk[2];
+	x3 ^= sk[3];
+	x4 ^= sk[4];
+	x5 ^= sk[5];
+
+	/*
+	 * The T-boxes are done in parallel, since they all use a
+	 * "tree of multiplexer". We use "fake multiplexers":
+	 *
+	 *   y = a ^ (x & b)
+	 *
+	 * computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1).
+	 */
+	y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C);
+	y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821);
+	y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809);
+	y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28);
+	y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820);
+	y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801);
+	y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929);
+	y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1);
+	y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A);
+	y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B);
+	y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A);
+	y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572);
+	y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083);
+	y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000);
+	y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA);
+	/* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+	y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9);
+	y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE);
+	y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816);
+	y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B);
+	y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88);
+	y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25);
+	y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D);
+	y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0);
+	y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100);
+	y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011);
+	y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959);
+	y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A);
+	y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8);
+	y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B);
+	y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD);
+	/* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+
+	y0 = y0 ^ (x1 & y1);
+	y1 = y2 ^ (x1 & y3);
+	y2 = y4 ^ (x1 & y5);
+	y3 = y6 ^ (x1 & y7);
+	y4 = y8 ^ (x1 & y9);
+	y5 = y10 ^ (x1 & y11);
+	y6 = y12 ^ (x1 & y13);
+	y7 = y14; /* was: y14 ^ (x1 & y15) */
+	y8 = y16 ^ (x1 & y17);
+	y9 = y18 ^ (x1 & y19);
+	y10 = y20 ^ (x1 & y21);
+	y11 = y22 ^ (x1 & y23);
+	y12 = y24 ^ (x1 & y25);
+	y13 = y26 ^ (x1 & y27);
+	y14 = y28 ^ (x1 & y29);
+	y15 = y30; /* was: y30 ^ (x1 & y31) */
+
+	y0 = y0 ^ (x2 & y1);
+	y1 = y2 ^ (x2 & y3);
+	y2 = y4 ^ (x2 & y5);
+	y3 = y6 ^ (x2 & y7);
+	y4 = y8 ^ (x2 & y9);
+	y5 = y10 ^ (x2 & y11);
+	y6 = y12 ^ (x2 & y13);
+	y7 = y14 ^ (x2 & y15);
+
+	y0 = y0 ^ (x3 & y1);
+	y1 = y2 ^ (x3 & y3);
+	y2 = y4 ^ (x3 & y5);
+	y3 = y6 ^ (x3 & y7);
+
+	y0 = y0 ^ (x4 & y1);
+	y1 = y2 ^ (x4 & y3);
+
+	y0 = y0 ^ (x5 & y1);
+
+	/*
+	 * The P permutation:
+	 * -- Each bit move is converted into a mask + left rotation.
+	 * -- Rotations that use the same movement are coalesced together.
+	 * -- Left and right shifts are used as alternatives to a rotation
+	 * where appropriate (this will help architectures that do not have
+	 * a rotation opcode).
+	 */
+	z0 = (y0 & (uint32_t)0x00000004) << 3;
+	z0 |= (y0 & (uint32_t)0x00004000) << 4;
+	z0 |= rotl(y0 & 0x12020120, 5);
+	z0 |= (y0 & (uint32_t)0x00100000) << 6;
+	z0 |= (y0 & (uint32_t)0x00008000) << 9;
+	z0 |= (y0 & (uint32_t)0x04000000) >> 22;
+	z0 |= (y0 & (uint32_t)0x00000001) << 11;
+	z0 |= rotl(y0 & 0x20000200, 12);
+	z0 |= (y0 & (uint32_t)0x00200000) >> 19;
+	z0 |= (y0 & (uint32_t)0x00000040) << 14;
+	z0 |= (y0 & (uint32_t)0x00010000) << 15;
+	z0 |= (y0 & (uint32_t)0x00000002) << 16;
+	z0 |= rotl(y0 & 0x40801800, 17);
+	z0 |= (y0 & (uint32_t)0x00080000) >> 13;
+	z0 |= (y0 & (uint32_t)0x00000010) << 21;
+	z0 |= (y0 & (uint32_t)0x01000000) >> 10;
+	z0 |= rotl(y0 & 0x88000008, 24);
+	z0 |= (y0 & (uint32_t)0x00000480) >> 7;
+	z0 |= (y0 & (uint32_t)0x00442000) >> 6;
+	return z0;
+}
+
+/*
+ * Process one block through 16 successive rounds, omitting the swap
+ * in the final round.
+ */
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp)
+{
+	int i;
+	uint32_t l, r;
+
+	l = *pl;
+	r = *pr;
+	for (i = 0; i < 16; i ++) {
+		uint32_t t;
+
+		t = l ^ Fconf(r, sk_exp);
+		l = r;
+		r = t;
+		sk_exp += 6;
+	}
+	*pl = r;
+	*pr = l;
+}
+
+/* see inner.h */
+void
+br_des_ct_process_block(unsigned num_rounds,
+	const uint32_t *sk_exp, void *block)
+{
+	unsigned char *buf;
+	uint32_t l, r;
+
+	buf = block;
+	l = br_dec32be(buf);
+	r = br_dec32be(buf + 4);
+	br_des_do_IP(&l, &r);
+	while (num_rounds -- > 0) {
+		process_block_unit(&l, &r, sk_exp);
+		sk_exp += 96;
+	}
+	br_des_do_invIP(&l, &r);
+	br_enc32be(buf, l);
+	br_enc32be(buf + 4, r);
+}
+
+/* see inner.h */
+void
+br_des_ct_skey_expand(uint32_t *sk_exp,
+	unsigned num_rounds, const uint32_t *skey)
+{
+	num_rounds <<= 4;
+	while (num_rounds -- > 0) {
+		uint32_t v, w0, w1, w2, w3;
+
+		v = *skey ++;
+		w0 = v & 0x11111111;
+		w1 = (v >> 1) & 0x11111111;
+		w2 = (v >> 2) & 0x11111111;
+		w3 = (v >> 3) & 0x11111111;
+		*sk_exp ++ = (w0 << 4) - w0;
+		*sk_exp ++ = (w1 << 4) - w1;
+		*sk_exp ++ = (w2 << 4) - w2;
+		*sk_exp ++ = (w3 << 4) - w3;
+		v = *skey ++;
+		w0 = v & 0x11111111;
+		w1 = (v >> 1) & 0x11111111;
+		*sk_exp ++ = (w0 << 4) - w0;
+		*sk_exp ++ = (w1 << 4) - w1;
+	}
+}
diff --git a/third_party/bearssl/src/des_ct_cbcdec.c b/third_party/bearssl/src/des_ct_cbcdec.c
new file mode 100644
index 0000000..d208a3d
--- /dev/null
+++ b/third_party/bearssl/src/des_ct_cbcdec.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_ct_cbcdec_vtable;
+	ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+	if (len == 8) {
+		br_des_rev_skey(ctx->skey);
+	} else {
+		int i;
+
+		for (i = 0; i < 48; i += 2) {
+			uint32_t t;
+
+			t = ctx->skey[i];
+			ctx->skey[i] = ctx->skey[94 - i];
+			ctx->skey[94 - i] = t;
+			t = ctx->skey[i + 1];
+			ctx->skey[i + 1] = ctx->skey[95 - i];
+			ctx->skey[95 - i] = t;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t sk_exp[288];
+
+	br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[8];
+		int i;
+
+		memcpy(tmp, buf, 8);
+		br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_ct_cbcdec_vtable = {
+	sizeof(br_des_ct_cbcdec_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_des_ct_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_des_ct_cbcdec_run
+};
diff --git a/third_party/bearssl/src/des_ct_cbcenc.c b/third_party/bearssl/src/des_ct_cbcenc.c
new file mode 100644
index 0000000..4b3610e
--- /dev/null
+++ b/third_party/bearssl/src/des_ct_cbcenc.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_ct_cbcenc_vtable;
+	ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t sk_exp[288];
+
+	br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+		memcpy(ivbuf, buf, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_ct_cbcenc_vtable = {
+	sizeof(br_des_ct_cbcenc_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_des_ct_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_des_ct_cbcenc_run
+};
diff --git a/third_party/bearssl/src/des_support.c b/third_party/bearssl/src/des_support.c
new file mode 100644
index 0000000..37f6db3
--- /dev/null
+++ b/third_party/bearssl/src/des_support.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_des_do_IP(uint32_t *xl, uint32_t *xr)
+{
+	/*
+	 * Permutation algorithm is initially from Richard Outerbridge;
+	 * implementation here is adapted from Crypto++ "des.cpp" file
+	 * (which is in public domain).
+	 */
+	uint32_t l, r, t;
+
+	l = *xl;
+	r = *xr;
+	t = ((l >>  4) ^ r) & (uint32_t)0x0F0F0F0F;
+	r ^= t;
+	l ^= t <<  4;
+	t = ((l >> 16) ^ r) & (uint32_t)0x0000FFFF;
+	r ^= t;
+	l ^= t << 16;
+	t = ((r >>  2) ^ l) & (uint32_t)0x33333333;
+	l ^= t;
+	r ^= t <<  2;
+	t = ((r >>  8) ^ l) & (uint32_t)0x00FF00FF;
+	l ^= t;
+	r ^= t <<  8;
+	t = ((l >>  1) ^ r) & (uint32_t)0x55555555;
+	r ^= t;
+	l ^= t <<  1;
+	*xl = l;
+	*xr = r;
+}
+
+/* see inner.h */
+void
+br_des_do_invIP(uint32_t *xl, uint32_t *xr)
+{
+	/*
+	 * See br_des_do_IP().
+	 */
+	uint32_t l, r, t;
+
+	l = *xl;
+	r = *xr;
+	t = ((l >>  1) ^ r) & 0x55555555;
+	r ^= t;
+	l ^= t <<  1;
+	t = ((r >>  8) ^ l) & 0x00FF00FF;
+	l ^= t;
+	r ^= t <<  8;
+	t = ((r >>  2) ^ l) & 0x33333333;
+	l ^= t;
+	r ^= t <<  2;
+	t = ((l >> 16) ^ r) & 0x0000FFFF;
+	r ^= t;
+	l ^= t << 16;
+	t = ((l >>  4) ^ r) & 0x0F0F0F0F;
+	r ^= t;
+	l ^= t <<  4;
+	*xl = l;
+	*xr = r;
+}
+
+/* see inner.h */
+void
+br_des_keysched_unit(uint32_t *skey, const void *key)
+{
+	uint32_t xl, xr, kl, kr;
+	int i;
+
+	xl = br_dec32be(key);
+	xr = br_dec32be((const unsigned char *)key + 4);
+
+	/*
+	 * Permutation PC-1 is quite similar to the IP permutation.
+	 * Definition of IP (in FIPS 46-3 notations) is:
+	 *   58 50 42 34 26 18 10 2
+	 *   60 52 44 36 28 20 12 4
+	 *   62 54 46 38 30 22 14 6
+	 *   64 56 48 40 32 24 16 8
+	 *   57 49 41 33 25 17  9 1
+	 *   59 51 43 35 27 19 11 3
+	 *   61 53 45 37 29 21 13 5
+	 *   63 55 47 39 31 23 15 7
+	 *
+	 * Definition of PC-1 is:
+	 *   57 49 41 33 25 17  9 1
+	 *   58 50 42 34 26 18 10 2
+	 *   59 51 43 35 27 19 11 3
+	 *   60 52 44 36
+	 *   63 55 47 39 31 23 15 7
+	 *   62 54 46 38 30 22 14 6
+	 *   61 53 45 37 29 21 13 5
+	 *   28 20 12  4
+	 */
+	br_des_do_IP(&xl, &xr);
+	kl = ((xr & (uint32_t)0xFF000000) >> 4)
+		| ((xl & (uint32_t)0xFF000000) >> 12)
+		| ((xr & (uint32_t)0x00FF0000) >> 12)
+		| ((xl & (uint32_t)0x00FF0000) >> 20);
+	kr = ((xr & (uint32_t)0x000000FF) << 20)
+		| ((xl & (uint32_t)0x0000FF00) << 4)
+		| ((xr & (uint32_t)0x0000FF00) >> 4)
+		| ((xl & (uint32_t)0x000F0000) >> 16);
+
+	/*
+	 * For each round, rotate the two 28-bit words kl and kr.
+	 * The extraction of the 48-bit subkey (PC-2) is not done yet.
+	 */
+	for (i = 0; i < 16; i ++) {
+		if ((1 << i) & 0x8103) {
+			kl = (kl << 1) | (kl >> 27);
+			kr = (kr << 1) | (kr >> 27);
+		} else {
+			kl = (kl << 2) | (kl >> 26);
+			kr = (kr << 2) | (kr >> 26);
+		}
+		kl &= (uint32_t)0x0FFFFFFF;
+		kr &= (uint32_t)0x0FFFFFFF;
+		skey[(i << 1) + 0] = kl;
+		skey[(i << 1) + 1] = kr;
+	}
+}
+
+/* see inner.h */
+void
+br_des_rev_skey(uint32_t *skey)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 2) {
+		uint32_t t;
+
+		t = skey[i + 0];
+		skey[i + 0] = skey[30 - i];
+		skey[30 - i] = t;
+		t = skey[i + 1];
+		skey[i + 1] = skey[31 - i];
+		skey[31 - i] = t;
+	}
+}
diff --git a/third_party/bearssl/src/des_tab.c b/third_party/bearssl/src/des_tab.c
new file mode 100644
index 0000000..3f8e4f9
--- /dev/null
+++ b/third_party/bearssl/src/des_tab.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * PC2left[x] tells where bit x goes when applying PC-2. 'x' is a bit
+ * position in the left rotated key word. Both position are in normal
+ * order (rightmost bit is 0).
+ */
+static const unsigned char PC2left[] = {
+	16,  3,  7, 24, 20, 11, 24,
+	13,  2, 10, 24, 22,  5, 15,
+	23,  1,  9, 21, 12, 24,  6,
+	 4, 14, 18,  8, 17,  0, 19
+};
+
+/*
+ * Similar to PC2left[x], for the right rotated key word.
+ */
+static const unsigned char PC2right[] = {
+	 8, 18, 24,  6, 22, 15,  3,
+	10, 12, 19,  5, 14, 11, 24,
+	 4, 23, 16,  9, 24, 20,  2,
+	24,  7, 13,  0, 21, 17,  1
+};
+
+/*
+ * S-boxes and PC-1 merged.
+ */
+static const uint32_t S1[] = {
+	0x00808200, 0x00000000, 0x00008000, 0x00808202,
+	0x00808002, 0x00008202, 0x00000002, 0x00008000,
+	0x00000200, 0x00808200, 0x00808202, 0x00000200,
+	0x00800202, 0x00808002, 0x00800000, 0x00000002,
+	0x00000202, 0x00800200, 0x00800200, 0x00008200,
+	0x00008200, 0x00808000, 0x00808000, 0x00800202,
+	0x00008002, 0x00800002, 0x00800002, 0x00008002,
+	0x00000000, 0x00000202, 0x00008202, 0x00800000,
+	0x00008000, 0x00808202, 0x00000002, 0x00808000,
+	0x00808200, 0x00800000, 0x00800000, 0x00000200,
+	0x00808002, 0x00008000, 0x00008200, 0x00800002,
+	0x00000200, 0x00000002, 0x00800202, 0x00008202,
+	0x00808202, 0x00008002, 0x00808000, 0x00800202,
+	0x00800002, 0x00000202, 0x00008202, 0x00808200,
+	0x00000202, 0x00800200, 0x00800200, 0x00000000,
+	0x00008002, 0x00008200, 0x00000000, 0x00808002
+};
+
+static const uint32_t S2[] = {
+	0x40084010, 0x40004000, 0x00004000, 0x00084010,
+	0x00080000, 0x00000010, 0x40080010, 0x40004010,
+	0x40000010, 0x40084010, 0x40084000, 0x40000000,
+	0x40004000, 0x00080000, 0x00000010, 0x40080010,
+	0x00084000, 0x00080010, 0x40004010, 0x00000000,
+	0x40000000, 0x00004000, 0x00084010, 0x40080000,
+	0x00080010, 0x40000010, 0x00000000, 0x00084000,
+	0x00004010, 0x40084000, 0x40080000, 0x00004010,
+	0x00000000, 0x00084010, 0x40080010, 0x00080000,
+	0x40004010, 0x40080000, 0x40084000, 0x00004000,
+	0x40080000, 0x40004000, 0x00000010, 0x40084010,
+	0x00084010, 0x00000010, 0x00004000, 0x40000000,
+	0x00004010, 0x40084000, 0x00080000, 0x40000010,
+	0x00080010, 0x40004010, 0x40000010, 0x00080010,
+	0x00084000, 0x00000000, 0x40004000, 0x00004010,
+	0x40000000, 0x40080010, 0x40084010, 0x00084000
+};
+
+static const uint32_t S3[] = {
+	0x00000104, 0x04010100, 0x00000000, 0x04010004,
+	0x04000100, 0x00000000, 0x00010104, 0x04000100,
+	0x00010004, 0x04000004, 0x04000004, 0x00010000,
+	0x04010104, 0x00010004, 0x04010000, 0x00000104,
+	0x04000000, 0x00000004, 0x04010100, 0x00000100,
+	0x00010100, 0x04010000, 0x04010004, 0x00010104,
+	0x04000104, 0x00010100, 0x00010000, 0x04000104,
+	0x00000004, 0x04010104, 0x00000100, 0x04000000,
+	0x04010100, 0x04000000, 0x00010004, 0x00000104,
+	0x00010000, 0x04010100, 0x04000100, 0x00000000,
+	0x00000100, 0x00010004, 0x04010104, 0x04000100,
+	0x04000004, 0x00000100, 0x00000000, 0x04010004,
+	0x04000104, 0x00010000, 0x04000000, 0x04010104,
+	0x00000004, 0x00010104, 0x00010100, 0x04000004,
+	0x04010000, 0x04000104, 0x00000104, 0x04010000,
+	0x00010104, 0x00000004, 0x04010004, 0x00010100
+};
+
+static const uint32_t S4[] = {
+	0x80401000, 0x80001040, 0x80001040, 0x00000040,
+	0x00401040, 0x80400040, 0x80400000, 0x80001000,
+	0x00000000, 0x00401000, 0x00401000, 0x80401040,
+	0x80000040, 0x00000000, 0x00400040, 0x80400000,
+	0x80000000, 0x00001000, 0x00400000, 0x80401000,
+	0x00000040, 0x00400000, 0x80001000, 0x00001040,
+	0x80400040, 0x80000000, 0x00001040, 0x00400040,
+	0x00001000, 0x00401040, 0x80401040, 0x80000040,
+	0x00400040, 0x80400000, 0x00401000, 0x80401040,
+	0x80000040, 0x00000000, 0x00000000, 0x00401000,
+	0x00001040, 0x00400040, 0x80400040, 0x80000000,
+	0x80401000, 0x80001040, 0x80001040, 0x00000040,
+	0x80401040, 0x80000040, 0x80000000, 0x00001000,
+	0x80400000, 0x80001000, 0x00401040, 0x80400040,
+	0x80001000, 0x00001040, 0x00400000, 0x80401000,
+	0x00000040, 0x00400000, 0x00001000, 0x00401040
+};
+
+static const uint32_t S5[] = {
+	0x00000080, 0x01040080, 0x01040000, 0x21000080,
+	0x00040000, 0x00000080, 0x20000000, 0x01040000,
+	0x20040080, 0x00040000, 0x01000080, 0x20040080,
+	0x21000080, 0x21040000, 0x00040080, 0x20000000,
+	0x01000000, 0x20040000, 0x20040000, 0x00000000,
+	0x20000080, 0x21040080, 0x21040080, 0x01000080,
+	0x21040000, 0x20000080, 0x00000000, 0x21000000,
+	0x01040080, 0x01000000, 0x21000000, 0x00040080,
+	0x00040000, 0x21000080, 0x00000080, 0x01000000,
+	0x20000000, 0x01040000, 0x21000080, 0x20040080,
+	0x01000080, 0x20000000, 0x21040000, 0x01040080,
+	0x20040080, 0x00000080, 0x01000000, 0x21040000,
+	0x21040080, 0x00040080, 0x21000000, 0x21040080,
+	0x01040000, 0x00000000, 0x20040000, 0x21000000,
+	0x00040080, 0x01000080, 0x20000080, 0x00040000,
+	0x00000000, 0x20040000, 0x01040080, 0x20000080
+};
+
+static const uint32_t S6[] = {
+	0x10000008, 0x10200000, 0x00002000, 0x10202008,
+	0x10200000, 0x00000008, 0x10202008, 0x00200000,
+	0x10002000, 0x00202008, 0x00200000, 0x10000008,
+	0x00200008, 0x10002000, 0x10000000, 0x00002008,
+	0x00000000, 0x00200008, 0x10002008, 0x00002000,
+	0x00202000, 0x10002008, 0x00000008, 0x10200008,
+	0x10200008, 0x00000000, 0x00202008, 0x10202000,
+	0x00002008, 0x00202000, 0x10202000, 0x10000000,
+	0x10002000, 0x00000008, 0x10200008, 0x00202000,
+	0x10202008, 0x00200000, 0x00002008, 0x10000008,
+	0x00200000, 0x10002000, 0x10000000, 0x00002008,
+	0x10000008, 0x10202008, 0x00202000, 0x10200000,
+	0x00202008, 0x10202000, 0x00000000, 0x10200008,
+	0x00000008, 0x00002000, 0x10200000, 0x00202008,
+	0x00002000, 0x00200008, 0x10002008, 0x00000000,
+	0x10202000, 0x10000000, 0x00200008, 0x10002008
+};
+
+static const uint32_t S7[] = {
+	0x00100000, 0x02100001, 0x02000401, 0x00000000,
+	0x00000400, 0x02000401, 0x00100401, 0x02100400,
+	0x02100401, 0x00100000, 0x00000000, 0x02000001,
+	0x00000001, 0x02000000, 0x02100001, 0x00000401,
+	0x02000400, 0x00100401, 0x00100001, 0x02000400,
+	0x02000001, 0x02100000, 0x02100400, 0x00100001,
+	0x02100000, 0x00000400, 0x00000401, 0x02100401,
+	0x00100400, 0x00000001, 0x02000000, 0x00100400,
+	0x02000000, 0x00100400, 0x00100000, 0x02000401,
+	0x02000401, 0x02100001, 0x02100001, 0x00000001,
+	0x00100001, 0x02000000, 0x02000400, 0x00100000,
+	0x02100400, 0x00000401, 0x00100401, 0x02100400,
+	0x00000401, 0x02000001, 0x02100401, 0x02100000,
+	0x00100400, 0x00000000, 0x00000001, 0x02100401,
+	0x00000000, 0x00100401, 0x02100000, 0x00000400,
+	0x02000001, 0x02000400, 0x00000400, 0x00100001
+};
+
+static const uint32_t S8[] = {
+	0x08000820, 0x00000800, 0x00020000, 0x08020820,
+	0x08000000, 0x08000820, 0x00000020, 0x08000000,
+	0x00020020, 0x08020000, 0x08020820, 0x00020800,
+	0x08020800, 0x00020820, 0x00000800, 0x00000020,
+	0x08020000, 0x08000020, 0x08000800, 0x00000820,
+	0x00020800, 0x00020020, 0x08020020, 0x08020800,
+	0x00000820, 0x00000000, 0x00000000, 0x08020020,
+	0x08000020, 0x08000800, 0x00020820, 0x00020000,
+	0x00020820, 0x00020000, 0x08020800, 0x00000800,
+	0x00000020, 0x08020020, 0x00000800, 0x00020820,
+	0x08000800, 0x00000020, 0x08000020, 0x08020000,
+	0x08020020, 0x08000000, 0x00020000, 0x08000820,
+	0x00000000, 0x08020820, 0x00020020, 0x08000020,
+	0x08020000, 0x08000800, 0x08000820, 0x00000000,
+	0x08020820, 0x00020800, 0x00020800, 0x00000820,
+	0x00000820, 0x00020020, 0x08000000, 0x08020800
+};
+
+static inline uint32_t
+Fconf(uint32_t r0, uint32_t skl, uint32_t skr)
+{
+	uint32_t r1;
+
+	r1 = (r0 << 16) | (r0 >> 16);
+	return
+		  S1[((r1 >> 11) ^ (skl >> 18)) & 0x3F]
+		| S2[((r0 >> 23) ^ (skl >> 12)) & 0x3F]
+		| S3[((r0 >> 19) ^ (skl >>  6)) & 0x3F]
+		| S4[((r0 >> 15) ^ (skl      )) & 0x3F]
+		| S5[((r0 >> 11) ^ (skr >> 18)) & 0x3F]
+		| S6[((r0 >>  7) ^ (skr >> 12)) & 0x3F]
+		| S7[((r0 >>  3) ^ (skr >>  6)) & 0x3F]
+		| S8[((r1 >> 15) ^ (skr      )) & 0x3F];
+}
+
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *skey)
+{
+	int i;
+	uint32_t l, r;
+
+	l = *pl;
+	r = *pr;
+	for (i = 0; i < 16; i ++) {
+		uint32_t t;
+
+		t = l ^ Fconf(r, skey[(i << 1) + 0], skey[(i << 1) + 1]);
+		l = r;
+		r = t;
+	}
+	*pl = r;
+	*pr = l;
+}
+
+/* see inner.h */
+void
+br_des_tab_process_block(unsigned num_rounds, const uint32_t *skey, void *block)
+{
+	unsigned char *buf;
+	uint32_t l, r;
+
+	buf = block;
+	l = br_dec32be(buf);
+	r = br_dec32be(buf + 4);
+	br_des_do_IP(&l, &r);
+	while (num_rounds -- > 0) {
+		process_block_unit(&l, &r, skey);
+		skey += 32;
+	}
+	br_des_do_invIP(&l, &r);
+	br_enc32be(buf, l);
+	br_enc32be(buf + 4, r);
+}
+
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+	int i;
+
+	br_des_keysched_unit(skey, key);
+
+	/*
+	 * Apply PC-2 to get the 48-bit subkeys.
+	 */
+	for (i = 0; i < 16; i ++) {
+		uint32_t xl, xr, ul, ur;
+		int j;
+
+		xl = skey[(i << 1) + 0];
+		xr = skey[(i << 1) + 1];
+		ul = 0;
+		ur = 0;
+		for (j = 0; j < 28; j ++) {
+			ul |= (xl & 1) << PC2left[j];
+			ur |= (xr & 1) << PC2right[j];
+			xl >>= 1;
+			xr >>= 1;
+		}
+		skey[(i << 1) + 0] = ul;
+		skey[(i << 1) + 1] = ur;
+	}
+}
+
+/* see inner.h */
+unsigned
+br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	switch (key_len) {
+	case 8:
+		keysched_unit(skey, key);
+		return 1;
+	case 16:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		memcpy(skey + 64, skey, 32 * sizeof *skey);
+		return 3;
+	default:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		keysched_unit(skey + 64, (const unsigned char *)key + 16);
+		return 3;
+	}
+}
diff --git a/third_party/bearssl/src/des_tab_cbcdec.c b/third_party/bearssl/src/des_tab_cbcdec.c
new file mode 100644
index 0000000..e7eabe9
--- /dev/null
+++ b/third_party/bearssl/src/des_tab_cbcdec.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_tab_cbcdec_vtable;
+	ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+	if (len == 8) {
+		br_des_rev_skey(ctx->skey);
+	} else {
+		int i;
+
+		for (i = 0; i < 48; i += 2) {
+			uint32_t t;
+
+			t = ctx->skey[i];
+			ctx->skey[i] = ctx->skey[94 - i];
+			ctx->skey[94 - i] = t;
+			t = ctx->skey[i + 1];
+			ctx->skey[i + 1] = ctx->skey[95 - i];
+			ctx->skey[95 - i] = t;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[8];
+		int i;
+
+		memcpy(tmp, buf, 8);
+		br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_tab_cbcdec_vtable = {
+	sizeof(br_des_tab_cbcdec_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_des_tab_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_des_tab_cbcdec_run
+};
diff --git a/third_party/bearssl/src/des_tab_cbcenc.c b/third_party/bearssl/src/des_tab_cbcenc.c
new file mode 100644
index 0000000..3a45ba3
--- /dev/null
+++ b/third_party/bearssl/src/des_tab_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_tab_cbcenc_vtable;
+	ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_tab_cbcenc_vtable = {
+	sizeof(br_des_tab_cbcenc_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_des_tab_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_des_tab_cbcenc_run
+};
diff --git a/third_party/bearssl/src/dig_oid.c b/third_party/bearssl/src/dig_oid.c
new file mode 100644
index 0000000..cd9692c
--- /dev/null
+++ b/third_party/bearssl/src/dig_oid.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains the encoded OID for the standard hash functions.
+ * Such OID appear in, for instance, the PKCS#1 v1.5 padding for RSA
+ * signatures.
+ */
+
+static const unsigned char md5_OID[] = {
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05
+};
+
+static const unsigned char sha1_OID[] = {
+	0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char sha224_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char sha256_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char sha384_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char sha512_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+/* see inner.h */
+const unsigned char *
+br_digest_OID(int digest_id, size_t *len)
+{
+	switch (digest_id) {
+	case br_md5_ID:
+		*len = sizeof md5_OID;
+		return md5_OID;
+	case br_sha1_ID:
+		*len = sizeof sha1_OID;
+		return sha1_OID;
+	case br_sha224_ID:
+		*len = sizeof sha224_OID;
+		return sha224_OID;
+	case br_sha256_ID:
+		*len = sizeof sha256_OID;
+		return sha256_OID;
+	case br_sha384_ID:
+		*len = sizeof sha384_OID;
+		return sha384_OID;
+	case br_sha512_ID:
+		*len = sizeof sha512_OID;
+		return sha512_OID;
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
diff --git a/third_party/bearssl/src/dig_size.c b/third_party/bearssl/src/dig_size.c
new file mode 100644
index 0000000..4625d2c
--- /dev/null
+++ b/third_party/bearssl/src/dig_size.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+size_t
+br_digest_size_by_ID(int digest_id)
+{
+	switch (digest_id) {
+	case br_md5sha1_ID:
+		return br_md5_SIZE + br_sha1_SIZE;
+	case br_md5_ID:
+		return br_md5_SIZE;
+	case br_sha1_ID:
+		return br_sha1_SIZE;
+	case br_sha224_ID:
+		return br_sha224_SIZE;
+	case br_sha256_ID:
+		return br_sha256_SIZE;
+	case br_sha384_ID:
+		return br_sha384_SIZE;
+	case br_sha512_ID:
+		return br_sha512_SIZE;
+	default:
+		/* abort(); */
+		return 0;
+	}
+}
diff --git a/third_party/bearssl/src/eax.c b/third_party/bearssl/src/eax.c
new file mode 100644
index 0000000..bcc704a
--- /dev/null
+++ b/third_party/bearssl/src/eax.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary. Moreover, EAX has a special padding
+ * rule for CBC-MAC, which implies that we cannot compute the MAC over
+ * the last received full block until we know whether we are at the
+ * end of the data or not.
+ *
+ *  - 'ptr' contains a value from 1 to 16, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current OMAC computation. Beware that this can go to 16: a
+ *    complete block cannot be processed until it is known whether it
+ *    is the last block or not. However, it can never be 0, because
+ *    OMAC^t works on an input that is at least one-block long.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the encrypted bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ *
+ *  - The derived keys for padding are kept in L2 and L4 (double and
+ *    quadruple of Enc_K(0^n), in GF(2^128), respectively).
+ */
+
+/*
+ * Start an OMAC computation; the first block is the big-endian
+ * representation of the provided value ('val' must fit on one byte).
+ * We make it a delayed block because it may also be the last one,
+ */
+static void
+omac_start(br_eax_context *ctx, unsigned val)
+{
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	ctx->buf[15] = val;
+	ctx->ptr = 16;
+}
+
+/*
+ * Double a value in finite field GF(2^128), defined with modulus
+ * X^128+X^7+X^2+X+1.
+ */
+static void
+double_gf128(unsigned char *dst, const unsigned char *src)
+{
+	unsigned cc;
+	int i;
+
+	cc = 0x87 & -((unsigned)src[0] >> 7);
+	for (i = 15; i >= 0; i --) {
+		unsigned z;
+
+		z = (src[i] << 1) ^ cc;
+		cc = z >> 8;
+		dst[i] = (unsigned char)z;
+	}
+}
+
+/*
+ * Apply padding to the last block, currently in ctx->buf (with
+ * ctx->ptr bytes), and finalize OMAC computation.
+ */
+static void
+do_pad(br_eax_context *ctx)
+{
+	unsigned char *pad;
+	size_t ptr, u;
+
+	ptr = ctx->ptr;
+	if (ptr == 16) {
+		pad = ctx->L2;
+	} else {
+		ctx->buf[ptr ++] = 0x80;
+		memset(ctx->buf + ptr, 0x00, 16 - ptr);
+		pad = ctx->L4;
+	}
+	for (u = 0; u < sizeof ctx->buf; u ++) {
+		ctx->buf[u] ^= pad[u];
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+}
+
+/*
+ * Apply CBC-MAC on the provided data, with buffering management.
+ *
+ * Upon entry, two situations are acceptable:
+ *
+ *   ctx->ptr == 0: there is no data to process in ctx->buf
+ *   ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf
+ *
+ * Upon exit, ctx->ptr may be zero only if it was already zero on entry,
+ * and len == 0. In all other situations, ctx->ptr will be non-zero on
+ * exit (and may have value 16).
+ */
+static void
+do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	if (len == 0) {
+		return;
+	}
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (ctx->ptr == 16) {
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len);
+	memcpy(ctx->buf, (const unsigned char *)data + len, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	unsigned char tmp[16], iv[16];
+
+	ctx->vtable = &br_eax_vtable;
+	ctx->bctx = bctx;
+
+	/*
+	 * Encrypt a whole-zero block to compute L2 and L4.
+	 */
+	memset(tmp, 0, sizeof tmp);
+	memset(iv, 0, sizeof iv);
+	(*bctx)->ctr(bctx, iv, tmp, sizeof tmp);
+	double_gf128(ctx->L2, tmp);
+	double_gf128(ctx->L4, ctx->L2);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_capture(const br_eax_context *ctx, br_eax_state *st)
+{
+	/*
+	 * We capture the three OMAC* states _after_ processing the
+	 * initial block (assuming that nonce, message and AAD are
+	 * all non-empty).
+	 */
+	int i;
+
+	memset(st->st, 0, sizeof st->st);
+	for (i = 0; i < 3; i ++) {
+		unsigned char tmp[16];
+
+		memset(tmp, 0, sizeof tmp);
+		tmp[15] = (unsigned char)i;
+		(*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp);
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len)
+{
+	/*
+	 * Process nonce with OMAC^0.
+	 */
+	omac_start(ctx, 0);
+	do_cbcmac_chunk(ctx, nonce, len);
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^1 for the AAD ("header" in the EAX specification).
+	 */
+	omac_start(ctx, 1);
+
+	/*
+	 * We use ctx->head[0] as temporary flag to mark that we are
+	 * using a "normal" reset().
+	 */
+	ctx->head[0] = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len)
+{
+	if (len == 0) {
+		omac_start(ctx, 0);
+	} else {
+		memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+		do_cbcmac_chunk(ctx, nonce, len);
+	}
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac);
+	ctx->ptr = 0;
+
+	memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr);
+
+	/*
+	 * We use ctx->head[0] as a flag to indicate that we use a
+	 * a recorded state, with ctx->ctr containing the preprocessed
+	 * first block for OMAC^2.
+	 */
+	ctx->head[0] = 1;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len)
+{
+	if (len == 0) {
+		omac_start(ctx, 0);
+	} else {
+		memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+		do_cbcmac_chunk(ctx, nonce, len);
+	}
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+
+	memcpy(ctx->head, st->st[1], sizeof ctx->head);
+
+	memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac);
+	ctx->ptr = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	ptr = ctx->ptr;
+
+	/*
+	 * If there is a partial block, first complete it.
+	 */
+	if (ptr < 16) {
+		size_t clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			memcpy(ctx->buf + ptr, data, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a full block in buf[], and this is not the last
+	 * block.
+	 */
+	do_cbcmac_chunk(ctx, data, len);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_flip(br_eax_context *ctx)
+{
+	int from_capture;
+
+	/*
+	 * ctx->head[0] may be non-zero if the context was reset with
+	 * a pre-AAD captured state. In that case, ctx->ctr[] contains
+	 * the state for OMAC^2 _after_ processing the first block.
+	 */
+	from_capture = ctx->head[0];
+
+	/*
+	 * Complete the OMAC computation on the AAD.
+	 */
+	do_pad(ctx);
+	memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^2 for the encrypted data.
+	 * If the context was initialized from a captured state, then
+	 * the OMAC^2 value is in the ctr[] array.
+	 */
+	if (from_capture) {
+		memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+	} else {
+		omac_start(ctx, 2);
+	}
+
+	/*
+	 * Initial counter value for CTR is the processed nonce.
+	 */
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	/*
+	 * Ensure that there is actual data to process.
+	 */
+	if (len == 0) {
+		return;
+	}
+
+	dbuf = data;
+	ptr = ctx->ptr;
+
+	/*
+	 * We may have ptr == 0 here if we initialized from a captured
+	 * state. In that case, there is no partially consumed block
+	 * or unprocessed data.
+	 */
+	if (ptr != 0 && ptr != 16) {
+		/*
+		 * We have a partially consumed block.
+		 */
+		size_t u, clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				ctx->buf[ptr + u] ^= dbuf[u];
+			}
+			memcpy(dbuf, ctx->buf + ptr, clen);
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned dx, sx;
+
+				sx = ctx->buf[ptr + u];
+				dx = dbuf[u];
+				ctx->buf[ptr + u] = dx;
+				dbuf[u] = sx ^ dx;
+			}
+		}
+
+		if (len <= clen) {
+			ctx->ptr = ptr + clen;
+			return;
+		}
+		dbuf += clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a complete encrypted block in buf[] that must still
+	 * be processed with OMAC, and this is not the final buf.
+	 * Exception: when ptr == 0, no block has been produced yet.
+	 */
+	if (ptr != 0) {
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Do CTR encryption or decryption and CBC-MAC for all full blocks
+	 * except the last.
+	 */
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (encrypt) {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * Compute next block of CTR stream, and use it to finish
+	 * encrypting or decrypting the data.
+	 */
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf);
+	if (encrypt) {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			ctx->buf[u] ^= dbuf[u];
+		}
+		memcpy(dbuf, ctx->buf, ptr);
+	} else {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			unsigned dx, sx;
+
+			sx = ctx->buf[u];
+			dx = dbuf[u];
+			ctx->buf[u] = dx;
+			dbuf[u] = sx ^ dx;
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/*
+ * Complete tag computation. The final tag is written in ctx->cbcmac.
+ */
+static void
+do_final(br_eax_context *ctx)
+{
+	size_t u;
+
+	do_pad(ctx);
+
+	/*
+	 * Authentication tag is the XOR of the three OMAC outputs for
+	 * the nonce, AAD and encrypted data.
+	 */
+	for (u = 0; u < 16; u ++) {
+		ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u];
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag(br_eax_context *ctx, void *tag)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, len);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len)
+{
+	unsigned char tmp[16];
+	size_t u;
+	int x;
+
+	br_eax_get_tag(ctx, tmp);
+	x = 0;
+	for (u = 0; u < len; u ++) {
+		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(x);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag(br_eax_context *ctx, const void *tag)
+{
+	return br_eax_check_tag_trunc(ctx, tag, 16);
+}
+
+/* see bearssl_aead.h */
+const br_aead_class br_eax_vtable = {
+	16,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_reset,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_aad_inject,
+	(void (*)(const br_aead_class **))
+		&br_eax_flip,
+	(void (*)(const br_aead_class **, int, void *, size_t))
+		&br_eax_run,
+	(void (*)(const br_aead_class **, void *))
+		&br_eax_get_tag,
+	(uint32_t (*)(const br_aead_class **, const void *))
+		&br_eax_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_eax_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_check_tag_trunc
+};
diff --git a/third_party/bearssl/src/ec_all_m15.c b/third_party/bearssl/src/ec_all_m15.c
new file mode 100644
index 0000000..bb550e1
--- /dev/null
+++ b/third_party/bearssl/src/ec_all_m15.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.generator(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.generator(curve, len);
+	default:
+		return br_ec_prime_i15.generator(curve, len);
+	}
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.order(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.order(curve, len);
+	default:
+		return br_ec_prime_i15.order(curve, len);
+	}
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.xoff(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.xoff(curve, len);
+	default:
+		return br_ec_prime_i15.xoff(curve, len);
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.mul(G, Glen, kb, kblen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.mul(G, Glen, kb, kblen, curve);
+	default:
+		return br_ec_prime_i15.mul(G, Glen, kb, kblen, curve);
+	}
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.mulgen(R, x, xlen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.mulgen(R, x, xlen, curve);
+	default:
+		return br_ec_prime_i15.mulgen(R, x, xlen, curve);
+	}
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	default:
+		return br_ec_prime_i15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	}
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_all_m15 = {
+	(uint32_t)0x23800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_all_m31.c b/third_party/bearssl/src/ec_all_m31.c
new file mode 100644
index 0000000..8fd8c3c
--- /dev/null
+++ b/third_party/bearssl/src/ec_all_m31.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.generator(curve, len);
+#else
+		return br_ec_p256_m31.generator(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.generator(curve, len);
+#else
+		return br_ec_c25519_m31.generator(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.generator(curve, len);
+	}
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.order(curve, len);
+#else
+		return br_ec_p256_m31.order(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.order(curve, len);
+#else
+		return br_ec_c25519_m31.order(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.order(curve, len);
+	}
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.xoff(curve, len);
+#else
+		return br_ec_p256_m31.xoff(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.xoff(curve, len);
+#else
+		return br_ec_c25519_m31.xoff(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.xoff(curve, len);
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.mul(G, Glen, kb, kblen, curve);
+#else
+		return br_ec_p256_m31.mul(G, Glen, kb, kblen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.mul(G, Glen, kb, kblen, curve);
+#else
+		return br_ec_c25519_m31.mul(G, Glen, kb, kblen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.mul(G, Glen, kb, kblen, curve);
+	}
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.mulgen(R, x, xlen, curve);
+#else
+		return br_ec_p256_m31.mulgen(R, x, xlen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.mulgen(R, x, xlen, curve);
+#else
+		return br_ec_c25519_m31.mulgen(R, x, xlen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.mulgen(R, x, xlen, curve);
+	}
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#else
+		return br_ec_p256_m31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#else
+		return br_ec_c25519_m31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	}
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_all_m31 = {
+	(uint32_t)0x23800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_i15.c b/third_party/bearssl/src/ec_c25519_i15.c
new file mode 100644
index 0000000..8fadcf4
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_i15.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for the field:
+ *   - field modulus p = 2^255-19
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ */
+
+static const uint16_t C255_P[] = {
+	0x0110,
+	0x7FED, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF
+};
+
+#define P0I   0x4A1B
+
+static const uint16_t C255_R2[] = {
+	0x0110,
+	0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000
+};
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int_mont(const char *name, const uint16_t *x)
+{
+	uint16_t y[18];
+	unsigned char tmp[32];
+	size_t u;
+
+	printf("%s = ", name);
+	memcpy(y, x, sizeof y);
+	br_i15_from_monty(y, C255_P, P0I);
+	br_i15_encode(tmp, sizeof tmp, y);
+	for (u = 0; u < sizeof tmp; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+static const uint16_t C255_A24[] = {
+	0x0110,
+	0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000
+};
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint16_t *a, uint16_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 18; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static void
+c255_add(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint32_t ctl;
+	uint16_t t[18];
+
+	memcpy(t, a, sizeof t);
+	ctl = br_i15_add(t, b, 1);
+	ctl |= NOT(br_i15_sub(t, C255_P, 0));
+	br_i15_sub(t, C255_P, ctl);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_sub(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint16_t t[18];
+
+	memcpy(t, a, sizeof t);
+	br_i15_add(t, C255_P, br_i15_sub(t, b, 1));
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_mul(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint16_t t[18];
+
+	br_i15_montymul(t, a, b, C255_P, P0I);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+byteswap(unsigned char *G)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		unsigned char t;
+
+		t = G[i];
+		G[i] = G[31 - i];
+		G[31 - i] = t;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+#define ILEN   (18 * sizeof(uint16_t))
+
+	/*
+	 * The a[] and b[] arrays have an extra word to allow for
+	 * decoding without using br_i15_decode_reduce().
+	 */
+	uint16_t x1[18], x2[18], x3[18], z2[18], z3[18];
+	uint16_t a[19], aa[18], b[19], bb[18];
+	uint16_t c[18], d[18], e[18], da[18], cb[18];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Byteswap the point encoding, because it uses little-endian, and
+	 * the generic decoding routine uses big-endian.
+	 */
+	byteswap(G);
+
+	/*
+	 * Decode the point ('u' coordinate). This should be reduced
+	 * modulo p, but we prefer to avoid the dependency on
+	 * br_i15_decode_reduce(). Instead, we use br_i15_decode_mod()
+	 * with a synthetic modulus of value 2^255 (this must work
+	 * since G was truncated to 255 bits), then use a conditional
+	 * subtraction. We use br_i15_decode_mod() and not
+	 * br_i15_decode(), because the ec_prime_i15 implementation uses
+	 * the former but not the latter.
+	 *    br_i15_decode_reduce(a, G, 32, C255_P);
+	 */
+	br_i15_zero(b, 0x111);
+	b[18] = 1;
+	br_i15_decode_mod(a, G, 32, b);
+	a[0] = 0x110;
+	br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0)));
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	br_i15_montymul(x1, a, C255_R2, C255_P, P0I);
+	memcpy(x3, x1, ILEN);
+	br_i15_zero(z2, C255_P[0]);
+	memcpy(x2, z2, ILEN);
+	x2[1] = 19;
+	memcpy(z3, x2, ILEN);
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int_mont("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+
+		c255_add(a, x2, z2);
+		c255_mul(aa, a, a);
+		c255_sub(b, x2, z2);
+		c255_mul(bb, b, b);
+		c255_sub(e, aa, bb);
+		c255_add(c, x3, z3);
+		c255_sub(d, x3, z3);
+		c255_mul(da, d, a);
+		c255_mul(cb, c, b);
+
+		/* obsolete
+		print_int_mont("a ", a);
+		print_int_mont("aa", aa);
+		print_int_mont("b ", b);
+		print_int_mont("bb", bb);
+		print_int_mont("e ", e);
+		print_int_mont("c ", c);
+		print_int_mont("d ", d);
+		print_int_mont("da", da);
+		print_int_mont("cb", cb);
+		*/
+
+		c255_add(x3, da, cb);
+		c255_mul(x3, x3, x3);
+		c255_sub(z3, da, cb);
+		c255_mul(z3, z3, z3);
+		c255_mul(z3, z3, x1);
+		c255_mul(x2, aa, bb);
+		c255_mul(z2, C255_A24, e);
+		c255_add(z2, z2, aa);
+		c255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, ILEN);
+	for (i = 0; i < 15; i ++) {
+		c255_mul(a, a, a);
+		c255_mul(a, a, z2);
+	}
+	memcpy(b, a, ILEN);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			c255_mul(b, b, b);
+		}
+		c255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		c255_mul(b, b, b);
+		if ((0xFFEB >> i) & 1) {
+			c255_mul(b, z2, b);
+		}
+	}
+	c255_mul(b, x2, b);
+
+	/*
+	 * To avoid a dependency on br_i15_from_monty(), we use a
+	 * Montgomery multiplication with 1.
+	 *    memcpy(x2, b, ILEN);
+	 *    br_i15_from_monty(x2, C255_P, P0I);
+	 */
+	br_i15_zero(a, C255_P[0]);
+	a[1] = 1;
+	br_i15_montymul(x2, a, b, C255_P, P0I);
+
+	br_i15_encode(G, 32, x2);
+	byteswap(G);
+	return 1;
+
+#undef ILEN
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_i15 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_i31.c b/third_party/bearssl/src/ec_c25519_i31.c
new file mode 100644
index 0000000..f8ffc2c
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_i31.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for the field:
+ *   - field modulus p = 2^255-19
+ *   - R^2 mod p (R = 2^(31k) for the smallest k such that R >= p)
+ */
+
+static const uint32_t C255_P[] = {
+	0x00000107,
+	0x7FFFFFED, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0000007F
+};
+
+#define P0I   0x286BCA1B
+
+static const uint32_t C255_R2[] = {
+	0x00000107,
+	0x00000000, 0x02D20000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+static const uint32_t C255_A24[] = {
+	0x00000107,
+	0x53000000, 0x0000468B, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int_mont(const char *name, const uint32_t *x)
+{
+	uint32_t y[10];
+	unsigned char tmp[32];
+	size_t u;
+
+	printf("%s = ", name);
+	memcpy(y, x, sizeof y);
+	br_i31_from_monty(y, C255_P, P0I);
+	br_i31_encode(tmp, sizeof tmp, y);
+	for (u = 0; u < sizeof tmp; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 10; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static void
+c255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t ctl;
+	uint32_t t[10];
+
+	memcpy(t, a, sizeof t);
+	ctl = br_i31_add(t, b, 1);
+	ctl |= NOT(br_i31_sub(t, C255_P, 0));
+	br_i31_sub(t, C255_P, ctl);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[10];
+
+	memcpy(t, a, sizeof t);
+	br_i31_add(t, C255_P, br_i31_sub(t, b, 1));
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[10];
+
+	br_i31_montymul(t, a, b, C255_P, P0I);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+byteswap(unsigned char *G)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		unsigned char t;
+
+		t = G[i];
+		G[i] = G[31 - i];
+		G[31 - i] = t;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[10], x2[10], x3[10], z2[10], z3[10];
+	uint32_t a[10], aa[10], b[10], bb[10];
+	uint32_t c[10], d[10], e[10], da[10], cb[10];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Byteswap the point encoding, because it uses little-endian, and
+	 * the generic decoding routine uses big-endian.
+	 */
+	byteswap(G);
+
+	/*
+	 * Decode the point ('u' coordinate). This should be reduced
+	 * modulo p, but we prefer to avoid the dependency on
+	 * br_i31_decode_reduce(). Instead, we use br_i31_decode_mod()
+	 * with a synthetic modulus of value 2^255 (this must work
+	 * since G was truncated to 255 bits), then use a conditional
+	 * subtraction. We use br_i31_decode_mod() and not
+	 * br_i31_decode(), because the ec_prime_i31 implementation uses
+	 * the former but not the latter.
+	 *    br_i31_decode_reduce(a, G, 32, C255_P);
+	 */
+	br_i31_zero(b, 0x108);
+	b[9] = 0x0080;
+	br_i31_decode_mod(a, G, 32, b);
+	a[0] = 0x107;
+	br_i31_sub(a, C255_P, NOT(br_i31_sub(a, C255_P, 0)));
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	br_i31_montymul(x1, a, C255_R2, C255_P, P0I);
+	memcpy(x3, x1, sizeof x1);
+	br_i31_zero(z2, C255_P[0]);
+	memcpy(x2, z2, sizeof z2);
+	x2[1] = 0x13000000;
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * kb[] is in big-endian notation, but possibly shorter than k[].
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int_mont("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+
+		c255_add(a, x2, z2);
+		c255_mul(aa, a, a);
+		c255_sub(b, x2, z2);
+		c255_mul(bb, b, b);
+		c255_sub(e, aa, bb);
+		c255_add(c, x3, z3);
+		c255_sub(d, x3, z3);
+		c255_mul(da, d, a);
+		c255_mul(cb, c, b);
+
+		/* obsolete
+		print_int_mont("a ", a);
+		print_int_mont("aa", aa);
+		print_int_mont("b ", b);
+		print_int_mont("bb", bb);
+		print_int_mont("e ", e);
+		print_int_mont("c ", c);
+		print_int_mont("d ", d);
+		print_int_mont("da", da);
+		print_int_mont("cb", cb);
+		*/
+
+		c255_add(x3, da, cb);
+		c255_mul(x3, x3, x3);
+		c255_sub(z3, da, cb);
+		c255_mul(z3, z3, z3);
+		c255_mul(z3, z3, x1);
+		c255_mul(x2, aa, bb);
+		c255_mul(z2, C255_A24, e);
+		c255_add(z2, z2, aa);
+		c255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		c255_mul(a, a, a);
+		c255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			c255_mul(b, b, b);
+		}
+		c255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		c255_mul(b, b, b);
+		if ((0xFFEB >> i) & 1) {
+			c255_mul(b, z2, b);
+		}
+	}
+	c255_mul(b, x2, b);
+
+	/*
+	 * To avoid a dependency on br_i31_from_monty(), we use
+	 * a Montgomery multiplication with 1.
+	 *    memcpy(x2, b, sizeof b);
+	 *    br_i31_from_monty(x2, C255_P, P0I);
+	 */
+	br_i31_zero(a, C255_P[0]);
+	a[1] = 1;
+	br_i31_montymul(x2, a, b, C255_P, P0I);
+
+	br_i31_encode(G, 32, x2);
+	byteswap(G);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_i31 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_m15.c b/third_party/bearssl/src/ec_c25519_m15.c
new file mode 100644
index 0000000..deff55b
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m15.c
@@ -0,0 +1,1478 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int(const char *name, const uint32_t *x)
+{
+	size_t u;
+	unsigned char tmp[36];
+
+	printf("%s = ", name);
+	for (u = 0; u < 20; u ++) {
+		if (x[u] > 0x1FFF) {
+			printf("INVALID:");
+			for (u = 0; u < 20; u ++) {
+				printf(" %04X", x[u]);
+			}
+			printf("\n");
+			return;
+		}
+	}
+	memset(tmp, 0, sizeof tmp);
+	for (u = 0; u < 20; u ++) {
+		uint32_t w;
+		int j, k;
+
+		w = x[u];
+		j = 13 * (int)u;
+		k = j & 7;
+		if (k != 0) {
+			w <<= k;
+			j -= k;
+		}
+		k = j >> 3;
+		tmp[35 - k] |= (unsigned char)w;
+		tmp[34 - k] |= (unsigned char)(w >> 8);
+		tmp[33 - k] |= (unsigned char)(w >> 16);
+		tmp[32 - k] |= (unsigned char)(w >> 24);
+	}
+	for (u = 4; u < 36; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned little-endian encoding to a sequence of
+ * 13-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+le8_to_le13(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		acc |= (uint32_t)(*src ++) << acc_len;
+		acc_len += 8;
+		if (acc_len >= 13) {
+			*dst ++ = acc & 0x1FFF;
+			acc >>= 13;
+			acc_len -= 13;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (13-bit words, little-endian) to unsigned
+ * little-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le13_to_le8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			acc |= (*src ++) << acc_len;
+			acc_len += 13;
+		}
+		*dst ++ = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
+
+/*
+ * Normalise an array of words to a strict 13 bits per word. Returned
+ * value is the resulting carry. The source (w) and destination (d)
+ * arrays may be identical, but shall not overlap partially.
+ */
+static inline uint32_t
+norm13(uint32_t *d, const uint32_t *w, size_t len)
+{
+	size_t u;
+	uint32_t cc;
+
+	cc = 0;
+	for (u = 0; u < len; u ++) {
+		int32_t z;
+
+		z = w[u] + cc;
+		d[u] = z & 0x1FFF;
+		cc = ARSH(z, 13);
+	}
+	return cc;
+}
+
+/*
+ * mul20() multiplies two 260-bit integers together. Each word must fit
+ * on 13 bits; source operands use 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ *
+ * square20() computes the square of a 260-bit integer. Each word must
+ * fit on 13 bits; source operand uses 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ */
+
+#if BR_SLOW_MUL15
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Two-level Karatsuba: turns a 20x20 multiplication into
+	 * nine 5x5 multiplications. We use 13-bit words but do not
+	 * propagate carries immediately, so words may expand:
+	 *
+	 *  - First Karatsuba decomposition turns the 20x20 mul on
+	 *    13-bit words into three 10x10 muls, two on 13-bit words
+	 *    and one on 14-bit words.
+	 *
+	 *  - Second Karatsuba decomposition further splits these into:
+	 *
+	 *     * four 5x5 muls on 13-bit words
+	 *     * four 5x5 muls on 14-bit words
+	 *     * one 5x5 mul on 15-bit words
+	 *
+	 * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit
+	 * or 15-bit words, respectively.
+	 */
+	uint32_t u[45], v[45], w[90];
+	uint32_t cc;
+	int i;
+
+#define ZADD(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] = (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] = (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] = (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] = (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define ZADDT(dw, d_off, sw, s_off)   do { \
+		(dw)[5 * (d_off) + 0] += (sw)[5 * (s_off) + 0]; \
+		(dw)[5 * (d_off) + 1] += (sw)[5 * (s_off) + 1]; \
+		(dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \
+		(dw)[5 * (d_off) + 3] += (sw)[5 * (s_off) + 3]; \
+		(dw)[5 * (d_off) + 4] += (sw)[5 * (s_off) + 4]; \
+	} while (0)
+
+#define ZSUB2F(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define CPR1(w, cprcc)   do { \
+		uint32_t cprz = (w) + cprcc; \
+		(w) = cprz & 0x1FFF; \
+		cprcc = cprz >> 13; \
+	} while (0)
+
+#define CPR(dw, d_off)   do { \
+		uint32_t cprcc; \
+		cprcc = 0; \
+		CPR1((dw)[(d_off) + 0], cprcc); \
+		CPR1((dw)[(d_off) + 1], cprcc); \
+		CPR1((dw)[(d_off) + 2], cprcc); \
+		CPR1((dw)[(d_off) + 3], cprcc); \
+		CPR1((dw)[(d_off) + 4], cprcc); \
+		CPR1((dw)[(d_off) + 5], cprcc); \
+		CPR1((dw)[(d_off) + 6], cprcc); \
+		CPR1((dw)[(d_off) + 7], cprcc); \
+		CPR1((dw)[(d_off) + 8], cprcc); \
+		(dw)[(d_off) + 9] = cprcc; \
+	} while (0)
+
+	memcpy(u, a, 20 * sizeof *a);
+	ZADD(u, 4, a, 0, a, 1);
+	ZADD(u, 5, a, 2, a, 3);
+	ZADD(u, 6, a, 0, a, 2);
+	ZADD(u, 7, a, 1, a, 3);
+	ZADD(u, 8, u, 6, u, 7);
+
+	memcpy(v, b, 20 * sizeof *b);
+	ZADD(v, 4, b, 0, b, 1);
+	ZADD(v, 5, b, 2, b, 3);
+	ZADD(v, 6, b, 0, b, 2);
+	ZADD(v, 7, b, 1, b, 3);
+	ZADD(v, 8, v, 6, v, 7);
+
+	/*
+	 * Do the eight first 8x8 muls. Source words are at most 16382
+	 * each, so we can add product results together "as is" in 32-bit
+	 * words.
+	 */
+	for (i = 0; i < 40; i += 5) {
+		w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]);
+		w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1])
+			+ MUL15(u[i + 1], v[i + 0]);
+		w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2])
+			+ MUL15(u[i + 1], v[i + 1])
+			+ MUL15(u[i + 2], v[i + 0]);
+		w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3])
+			+ MUL15(u[i + 1], v[i + 2])
+			+ MUL15(u[i + 2], v[i + 1])
+			+ MUL15(u[i + 3], v[i + 0]);
+		w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4])
+			+ MUL15(u[i + 1], v[i + 3])
+			+ MUL15(u[i + 2], v[i + 2])
+			+ MUL15(u[i + 3], v[i + 1])
+			+ MUL15(u[i + 4], v[i + 0]);
+		w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4])
+			+ MUL15(u[i + 2], v[i + 3])
+			+ MUL15(u[i + 3], v[i + 2])
+			+ MUL15(u[i + 4], v[i + 1]);
+		w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4])
+			+ MUL15(u[i + 3], v[i + 3])
+			+ MUL15(u[i + 4], v[i + 2]);
+		w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4])
+			+ MUL15(u[i + 4], v[i + 3]);
+		w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]);
+		w[(i << 1) + 9] = 0;
+	}
+
+	/*
+	 * For the 9th multiplication, source words are up to 32764,
+	 * so we must do some carry propagation. If we add up to
+	 * 4 products and the carry is no more than 524224, then the
+	 * result fits in 32 bits, and the next carry will be no more
+	 * than 524224 (because 4*(32764^2)+524224 < 8192*524225).
+	 *
+	 * We thus just skip one of the products in the middle word,
+	 * then do a carry propagation (this reduces words to 13 bits
+	 * each, except possibly the last, which may use up to 17 bits
+	 * or so), then add the missing product.
+	 */
+	w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]);
+	w[80 + 1] = MUL15(u[40 + 0], v[40 + 1])
+		+ MUL15(u[40 + 1], v[40 + 0]);
+	w[80 + 2] = MUL15(u[40 + 0], v[40 + 2])
+		+ MUL15(u[40 + 1], v[40 + 1])
+		+ MUL15(u[40 + 2], v[40 + 0]);
+	w[80 + 3] = MUL15(u[40 + 0], v[40 + 3])
+		+ MUL15(u[40 + 1], v[40 + 2])
+		+ MUL15(u[40 + 2], v[40 + 1])
+		+ MUL15(u[40 + 3], v[40 + 0]);
+	w[80 + 4] = MUL15(u[40 + 0], v[40 + 4])
+		+ MUL15(u[40 + 1], v[40 + 3])
+		+ MUL15(u[40 + 2], v[40 + 2])
+		+ MUL15(u[40 + 3], v[40 + 1]);
+		/* + MUL15(u[40 + 4], v[40 + 0]) */
+	w[80 + 5] = MUL15(u[40 + 1], v[40 + 4])
+		+ MUL15(u[40 + 2], v[40 + 3])
+		+ MUL15(u[40 + 3], v[40 + 2])
+		+ MUL15(u[40 + 4], v[40 + 1]);
+	w[80 + 6] = MUL15(u[40 + 2], v[40 + 4])
+		+ MUL15(u[40 + 3], v[40 + 3])
+		+ MUL15(u[40 + 4], v[40 + 2]);
+	w[80 + 7] = MUL15(u[40 + 3], v[40 + 4])
+		+ MUL15(u[40 + 4], v[40 + 3]);
+	w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]);
+
+	CPR(w, 80);
+
+	w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]);
+
+	/*
+	 * The products on 14-bit words in slots 6 and 7 yield values
+	 * up to 5*(16382^2) each, and we need to subtract two such
+	 * values from the higher word. We need the subtraction to fit
+	 * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit.
+	 * However, 10*(16382^2) does not fit. So we must perform a
+	 * bit of reduction here.
+	 */
+	CPR(w, 60);
+	CPR(w, 70);
+
+	/*
+	 * Recompose results.
+	 */
+
+	/* 0..1*0..1 into 0..3 */
+	ZSUB2F(w, 8, w, 0, w, 2);
+	ZSUB2F(w, 9, w, 1, w, 3);
+	ZADDT(w, 1, w, 8);
+	ZADDT(w, 2, w, 9);
+
+	/* 2..3*2..3 into 4..7 */
+	ZSUB2F(w, 10, w, 4, w, 6);
+	ZSUB2F(w, 11, w, 5, w, 7);
+	ZADDT(w, 5, w, 10);
+	ZADDT(w, 6, w, 11);
+
+	/* (0..1+2..3)*(0..1+2..3) into 12..15 */
+	ZSUB2F(w, 16, w, 12, w, 14);
+	ZSUB2F(w, 17, w, 13, w, 15);
+	ZADDT(w, 13, w, 16);
+	ZADDT(w, 14, w, 17);
+
+	/* first-level recomposition */
+	ZSUB2F(w, 12, w, 0, w, 4);
+	ZSUB2F(w, 13, w, 1, w, 5);
+	ZSUB2F(w, 14, w, 2, w, 6);
+	ZSUB2F(w, 15, w, 3, w, 7);
+	ZADDT(w, 2, w, 12);
+	ZADDT(w, 3, w, 13);
+	ZADDT(w, 4, w, 14);
+	ZADDT(w, 5, w, 15);
+
+	/*
+	 * Perform carry propagation to bring all words down to 13 bits.
+	 */
+	cc = norm13(d, w, 40);
+	d[39] += (cc << 13);
+
+#undef ZADD
+#undef ZADDT
+#undef ZSUB2F
+#undef CPR1
+#undef CPR
+}
+
+static inline void
+square20(uint32_t *d, const uint32_t *a)
+{
+	mul20(d, a, a);
+}
+
+#else
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], b[ 0]);
+	t[ 1] = MUL15(a[ 0], b[ 1])
+		+ MUL15(a[ 1], b[ 0]);
+	t[ 2] = MUL15(a[ 0], b[ 2])
+		+ MUL15(a[ 1], b[ 1])
+		+ MUL15(a[ 2], b[ 0]);
+	t[ 3] = MUL15(a[ 0], b[ 3])
+		+ MUL15(a[ 1], b[ 2])
+		+ MUL15(a[ 2], b[ 1])
+		+ MUL15(a[ 3], b[ 0]);
+	t[ 4] = MUL15(a[ 0], b[ 4])
+		+ MUL15(a[ 1], b[ 3])
+		+ MUL15(a[ 2], b[ 2])
+		+ MUL15(a[ 3], b[ 1])
+		+ MUL15(a[ 4], b[ 0]);
+	t[ 5] = MUL15(a[ 0], b[ 5])
+		+ MUL15(a[ 1], b[ 4])
+		+ MUL15(a[ 2], b[ 3])
+		+ MUL15(a[ 3], b[ 2])
+		+ MUL15(a[ 4], b[ 1])
+		+ MUL15(a[ 5], b[ 0]);
+	t[ 6] = MUL15(a[ 0], b[ 6])
+		+ MUL15(a[ 1], b[ 5])
+		+ MUL15(a[ 2], b[ 4])
+		+ MUL15(a[ 3], b[ 3])
+		+ MUL15(a[ 4], b[ 2])
+		+ MUL15(a[ 5], b[ 1])
+		+ MUL15(a[ 6], b[ 0]);
+	t[ 7] = MUL15(a[ 0], b[ 7])
+		+ MUL15(a[ 1], b[ 6])
+		+ MUL15(a[ 2], b[ 5])
+		+ MUL15(a[ 3], b[ 4])
+		+ MUL15(a[ 4], b[ 3])
+		+ MUL15(a[ 5], b[ 2])
+		+ MUL15(a[ 6], b[ 1])
+		+ MUL15(a[ 7], b[ 0]);
+	t[ 8] = MUL15(a[ 0], b[ 8])
+		+ MUL15(a[ 1], b[ 7])
+		+ MUL15(a[ 2], b[ 6])
+		+ MUL15(a[ 3], b[ 5])
+		+ MUL15(a[ 4], b[ 4])
+		+ MUL15(a[ 5], b[ 3])
+		+ MUL15(a[ 6], b[ 2])
+		+ MUL15(a[ 7], b[ 1])
+		+ MUL15(a[ 8], b[ 0]);
+	t[ 9] = MUL15(a[ 0], b[ 9])
+		+ MUL15(a[ 1], b[ 8])
+		+ MUL15(a[ 2], b[ 7])
+		+ MUL15(a[ 3], b[ 6])
+		+ MUL15(a[ 4], b[ 5])
+		+ MUL15(a[ 5], b[ 4])
+		+ MUL15(a[ 6], b[ 3])
+		+ MUL15(a[ 7], b[ 2])
+		+ MUL15(a[ 8], b[ 1])
+		+ MUL15(a[ 9], b[ 0]);
+	t[10] = MUL15(a[ 0], b[10])
+		+ MUL15(a[ 1], b[ 9])
+		+ MUL15(a[ 2], b[ 8])
+		+ MUL15(a[ 3], b[ 7])
+		+ MUL15(a[ 4], b[ 6])
+		+ MUL15(a[ 5], b[ 5])
+		+ MUL15(a[ 6], b[ 4])
+		+ MUL15(a[ 7], b[ 3])
+		+ MUL15(a[ 8], b[ 2])
+		+ MUL15(a[ 9], b[ 1])
+		+ MUL15(a[10], b[ 0]);
+	t[11] = MUL15(a[ 0], b[11])
+		+ MUL15(a[ 1], b[10])
+		+ MUL15(a[ 2], b[ 9])
+		+ MUL15(a[ 3], b[ 8])
+		+ MUL15(a[ 4], b[ 7])
+		+ MUL15(a[ 5], b[ 6])
+		+ MUL15(a[ 6], b[ 5])
+		+ MUL15(a[ 7], b[ 4])
+		+ MUL15(a[ 8], b[ 3])
+		+ MUL15(a[ 9], b[ 2])
+		+ MUL15(a[10], b[ 1])
+		+ MUL15(a[11], b[ 0]);
+	t[12] = MUL15(a[ 0], b[12])
+		+ MUL15(a[ 1], b[11])
+		+ MUL15(a[ 2], b[10])
+		+ MUL15(a[ 3], b[ 9])
+		+ MUL15(a[ 4], b[ 8])
+		+ MUL15(a[ 5], b[ 7])
+		+ MUL15(a[ 6], b[ 6])
+		+ MUL15(a[ 7], b[ 5])
+		+ MUL15(a[ 8], b[ 4])
+		+ MUL15(a[ 9], b[ 3])
+		+ MUL15(a[10], b[ 2])
+		+ MUL15(a[11], b[ 1])
+		+ MUL15(a[12], b[ 0]);
+	t[13] = MUL15(a[ 0], b[13])
+		+ MUL15(a[ 1], b[12])
+		+ MUL15(a[ 2], b[11])
+		+ MUL15(a[ 3], b[10])
+		+ MUL15(a[ 4], b[ 9])
+		+ MUL15(a[ 5], b[ 8])
+		+ MUL15(a[ 6], b[ 7])
+		+ MUL15(a[ 7], b[ 6])
+		+ MUL15(a[ 8], b[ 5])
+		+ MUL15(a[ 9], b[ 4])
+		+ MUL15(a[10], b[ 3])
+		+ MUL15(a[11], b[ 2])
+		+ MUL15(a[12], b[ 1])
+		+ MUL15(a[13], b[ 0]);
+	t[14] = MUL15(a[ 0], b[14])
+		+ MUL15(a[ 1], b[13])
+		+ MUL15(a[ 2], b[12])
+		+ MUL15(a[ 3], b[11])
+		+ MUL15(a[ 4], b[10])
+		+ MUL15(a[ 5], b[ 9])
+		+ MUL15(a[ 6], b[ 8])
+		+ MUL15(a[ 7], b[ 7])
+		+ MUL15(a[ 8], b[ 6])
+		+ MUL15(a[ 9], b[ 5])
+		+ MUL15(a[10], b[ 4])
+		+ MUL15(a[11], b[ 3])
+		+ MUL15(a[12], b[ 2])
+		+ MUL15(a[13], b[ 1])
+		+ MUL15(a[14], b[ 0]);
+	t[15] = MUL15(a[ 0], b[15])
+		+ MUL15(a[ 1], b[14])
+		+ MUL15(a[ 2], b[13])
+		+ MUL15(a[ 3], b[12])
+		+ MUL15(a[ 4], b[11])
+		+ MUL15(a[ 5], b[10])
+		+ MUL15(a[ 6], b[ 9])
+		+ MUL15(a[ 7], b[ 8])
+		+ MUL15(a[ 8], b[ 7])
+		+ MUL15(a[ 9], b[ 6])
+		+ MUL15(a[10], b[ 5])
+		+ MUL15(a[11], b[ 4])
+		+ MUL15(a[12], b[ 3])
+		+ MUL15(a[13], b[ 2])
+		+ MUL15(a[14], b[ 1])
+		+ MUL15(a[15], b[ 0]);
+	t[16] = MUL15(a[ 0], b[16])
+		+ MUL15(a[ 1], b[15])
+		+ MUL15(a[ 2], b[14])
+		+ MUL15(a[ 3], b[13])
+		+ MUL15(a[ 4], b[12])
+		+ MUL15(a[ 5], b[11])
+		+ MUL15(a[ 6], b[10])
+		+ MUL15(a[ 7], b[ 9])
+		+ MUL15(a[ 8], b[ 8])
+		+ MUL15(a[ 9], b[ 7])
+		+ MUL15(a[10], b[ 6])
+		+ MUL15(a[11], b[ 5])
+		+ MUL15(a[12], b[ 4])
+		+ MUL15(a[13], b[ 3])
+		+ MUL15(a[14], b[ 2])
+		+ MUL15(a[15], b[ 1])
+		+ MUL15(a[16], b[ 0]);
+	t[17] = MUL15(a[ 0], b[17])
+		+ MUL15(a[ 1], b[16])
+		+ MUL15(a[ 2], b[15])
+		+ MUL15(a[ 3], b[14])
+		+ MUL15(a[ 4], b[13])
+		+ MUL15(a[ 5], b[12])
+		+ MUL15(a[ 6], b[11])
+		+ MUL15(a[ 7], b[10])
+		+ MUL15(a[ 8], b[ 9])
+		+ MUL15(a[ 9], b[ 8])
+		+ MUL15(a[10], b[ 7])
+		+ MUL15(a[11], b[ 6])
+		+ MUL15(a[12], b[ 5])
+		+ MUL15(a[13], b[ 4])
+		+ MUL15(a[14], b[ 3])
+		+ MUL15(a[15], b[ 2])
+		+ MUL15(a[16], b[ 1])
+		+ MUL15(a[17], b[ 0]);
+	t[18] = MUL15(a[ 0], b[18])
+		+ MUL15(a[ 1], b[17])
+		+ MUL15(a[ 2], b[16])
+		+ MUL15(a[ 3], b[15])
+		+ MUL15(a[ 4], b[14])
+		+ MUL15(a[ 5], b[13])
+		+ MUL15(a[ 6], b[12])
+		+ MUL15(a[ 7], b[11])
+		+ MUL15(a[ 8], b[10])
+		+ MUL15(a[ 9], b[ 9])
+		+ MUL15(a[10], b[ 8])
+		+ MUL15(a[11], b[ 7])
+		+ MUL15(a[12], b[ 6])
+		+ MUL15(a[13], b[ 5])
+		+ MUL15(a[14], b[ 4])
+		+ MUL15(a[15], b[ 3])
+		+ MUL15(a[16], b[ 2])
+		+ MUL15(a[17], b[ 1])
+		+ MUL15(a[18], b[ 0]);
+	t[19] = MUL15(a[ 0], b[19])
+		+ MUL15(a[ 1], b[18])
+		+ MUL15(a[ 2], b[17])
+		+ MUL15(a[ 3], b[16])
+		+ MUL15(a[ 4], b[15])
+		+ MUL15(a[ 5], b[14])
+		+ MUL15(a[ 6], b[13])
+		+ MUL15(a[ 7], b[12])
+		+ MUL15(a[ 8], b[11])
+		+ MUL15(a[ 9], b[10])
+		+ MUL15(a[10], b[ 9])
+		+ MUL15(a[11], b[ 8])
+		+ MUL15(a[12], b[ 7])
+		+ MUL15(a[13], b[ 6])
+		+ MUL15(a[14], b[ 5])
+		+ MUL15(a[15], b[ 4])
+		+ MUL15(a[16], b[ 3])
+		+ MUL15(a[17], b[ 2])
+		+ MUL15(a[18], b[ 1])
+		+ MUL15(a[19], b[ 0]);
+	t[20] = MUL15(a[ 1], b[19])
+		+ MUL15(a[ 2], b[18])
+		+ MUL15(a[ 3], b[17])
+		+ MUL15(a[ 4], b[16])
+		+ MUL15(a[ 5], b[15])
+		+ MUL15(a[ 6], b[14])
+		+ MUL15(a[ 7], b[13])
+		+ MUL15(a[ 8], b[12])
+		+ MUL15(a[ 9], b[11])
+		+ MUL15(a[10], b[10])
+		+ MUL15(a[11], b[ 9])
+		+ MUL15(a[12], b[ 8])
+		+ MUL15(a[13], b[ 7])
+		+ MUL15(a[14], b[ 6])
+		+ MUL15(a[15], b[ 5])
+		+ MUL15(a[16], b[ 4])
+		+ MUL15(a[17], b[ 3])
+		+ MUL15(a[18], b[ 2])
+		+ MUL15(a[19], b[ 1]);
+	t[21] = MUL15(a[ 2], b[19])
+		+ MUL15(a[ 3], b[18])
+		+ MUL15(a[ 4], b[17])
+		+ MUL15(a[ 5], b[16])
+		+ MUL15(a[ 6], b[15])
+		+ MUL15(a[ 7], b[14])
+		+ MUL15(a[ 8], b[13])
+		+ MUL15(a[ 9], b[12])
+		+ MUL15(a[10], b[11])
+		+ MUL15(a[11], b[10])
+		+ MUL15(a[12], b[ 9])
+		+ MUL15(a[13], b[ 8])
+		+ MUL15(a[14], b[ 7])
+		+ MUL15(a[15], b[ 6])
+		+ MUL15(a[16], b[ 5])
+		+ MUL15(a[17], b[ 4])
+		+ MUL15(a[18], b[ 3])
+		+ MUL15(a[19], b[ 2]);
+	t[22] = MUL15(a[ 3], b[19])
+		+ MUL15(a[ 4], b[18])
+		+ MUL15(a[ 5], b[17])
+		+ MUL15(a[ 6], b[16])
+		+ MUL15(a[ 7], b[15])
+		+ MUL15(a[ 8], b[14])
+		+ MUL15(a[ 9], b[13])
+		+ MUL15(a[10], b[12])
+		+ MUL15(a[11], b[11])
+		+ MUL15(a[12], b[10])
+		+ MUL15(a[13], b[ 9])
+		+ MUL15(a[14], b[ 8])
+		+ MUL15(a[15], b[ 7])
+		+ MUL15(a[16], b[ 6])
+		+ MUL15(a[17], b[ 5])
+		+ MUL15(a[18], b[ 4])
+		+ MUL15(a[19], b[ 3]);
+	t[23] = MUL15(a[ 4], b[19])
+		+ MUL15(a[ 5], b[18])
+		+ MUL15(a[ 6], b[17])
+		+ MUL15(a[ 7], b[16])
+		+ MUL15(a[ 8], b[15])
+		+ MUL15(a[ 9], b[14])
+		+ MUL15(a[10], b[13])
+		+ MUL15(a[11], b[12])
+		+ MUL15(a[12], b[11])
+		+ MUL15(a[13], b[10])
+		+ MUL15(a[14], b[ 9])
+		+ MUL15(a[15], b[ 8])
+		+ MUL15(a[16], b[ 7])
+		+ MUL15(a[17], b[ 6])
+		+ MUL15(a[18], b[ 5])
+		+ MUL15(a[19], b[ 4]);
+	t[24] = MUL15(a[ 5], b[19])
+		+ MUL15(a[ 6], b[18])
+		+ MUL15(a[ 7], b[17])
+		+ MUL15(a[ 8], b[16])
+		+ MUL15(a[ 9], b[15])
+		+ MUL15(a[10], b[14])
+		+ MUL15(a[11], b[13])
+		+ MUL15(a[12], b[12])
+		+ MUL15(a[13], b[11])
+		+ MUL15(a[14], b[10])
+		+ MUL15(a[15], b[ 9])
+		+ MUL15(a[16], b[ 8])
+		+ MUL15(a[17], b[ 7])
+		+ MUL15(a[18], b[ 6])
+		+ MUL15(a[19], b[ 5]);
+	t[25] = MUL15(a[ 6], b[19])
+		+ MUL15(a[ 7], b[18])
+		+ MUL15(a[ 8], b[17])
+		+ MUL15(a[ 9], b[16])
+		+ MUL15(a[10], b[15])
+		+ MUL15(a[11], b[14])
+		+ MUL15(a[12], b[13])
+		+ MUL15(a[13], b[12])
+		+ MUL15(a[14], b[11])
+		+ MUL15(a[15], b[10])
+		+ MUL15(a[16], b[ 9])
+		+ MUL15(a[17], b[ 8])
+		+ MUL15(a[18], b[ 7])
+		+ MUL15(a[19], b[ 6]);
+	t[26] = MUL15(a[ 7], b[19])
+		+ MUL15(a[ 8], b[18])
+		+ MUL15(a[ 9], b[17])
+		+ MUL15(a[10], b[16])
+		+ MUL15(a[11], b[15])
+		+ MUL15(a[12], b[14])
+		+ MUL15(a[13], b[13])
+		+ MUL15(a[14], b[12])
+		+ MUL15(a[15], b[11])
+		+ MUL15(a[16], b[10])
+		+ MUL15(a[17], b[ 9])
+		+ MUL15(a[18], b[ 8])
+		+ MUL15(a[19], b[ 7]);
+	t[27] = MUL15(a[ 8], b[19])
+		+ MUL15(a[ 9], b[18])
+		+ MUL15(a[10], b[17])
+		+ MUL15(a[11], b[16])
+		+ MUL15(a[12], b[15])
+		+ MUL15(a[13], b[14])
+		+ MUL15(a[14], b[13])
+		+ MUL15(a[15], b[12])
+		+ MUL15(a[16], b[11])
+		+ MUL15(a[17], b[10])
+		+ MUL15(a[18], b[ 9])
+		+ MUL15(a[19], b[ 8]);
+	t[28] = MUL15(a[ 9], b[19])
+		+ MUL15(a[10], b[18])
+		+ MUL15(a[11], b[17])
+		+ MUL15(a[12], b[16])
+		+ MUL15(a[13], b[15])
+		+ MUL15(a[14], b[14])
+		+ MUL15(a[15], b[13])
+		+ MUL15(a[16], b[12])
+		+ MUL15(a[17], b[11])
+		+ MUL15(a[18], b[10])
+		+ MUL15(a[19], b[ 9]);
+	t[29] = MUL15(a[10], b[19])
+		+ MUL15(a[11], b[18])
+		+ MUL15(a[12], b[17])
+		+ MUL15(a[13], b[16])
+		+ MUL15(a[14], b[15])
+		+ MUL15(a[15], b[14])
+		+ MUL15(a[16], b[13])
+		+ MUL15(a[17], b[12])
+		+ MUL15(a[18], b[11])
+		+ MUL15(a[19], b[10]);
+	t[30] = MUL15(a[11], b[19])
+		+ MUL15(a[12], b[18])
+		+ MUL15(a[13], b[17])
+		+ MUL15(a[14], b[16])
+		+ MUL15(a[15], b[15])
+		+ MUL15(a[16], b[14])
+		+ MUL15(a[17], b[13])
+		+ MUL15(a[18], b[12])
+		+ MUL15(a[19], b[11]);
+	t[31] = MUL15(a[12], b[19])
+		+ MUL15(a[13], b[18])
+		+ MUL15(a[14], b[17])
+		+ MUL15(a[15], b[16])
+		+ MUL15(a[16], b[15])
+		+ MUL15(a[17], b[14])
+		+ MUL15(a[18], b[13])
+		+ MUL15(a[19], b[12]);
+	t[32] = MUL15(a[13], b[19])
+		+ MUL15(a[14], b[18])
+		+ MUL15(a[15], b[17])
+		+ MUL15(a[16], b[16])
+		+ MUL15(a[17], b[15])
+		+ MUL15(a[18], b[14])
+		+ MUL15(a[19], b[13]);
+	t[33] = MUL15(a[14], b[19])
+		+ MUL15(a[15], b[18])
+		+ MUL15(a[16], b[17])
+		+ MUL15(a[17], b[16])
+		+ MUL15(a[18], b[15])
+		+ MUL15(a[19], b[14]);
+	t[34] = MUL15(a[15], b[19])
+		+ MUL15(a[16], b[18])
+		+ MUL15(a[17], b[17])
+		+ MUL15(a[18], b[16])
+		+ MUL15(a[19], b[15]);
+	t[35] = MUL15(a[16], b[19])
+		+ MUL15(a[17], b[18])
+		+ MUL15(a[18], b[17])
+		+ MUL15(a[19], b[16]);
+	t[36] = MUL15(a[17], b[19])
+		+ MUL15(a[18], b[18])
+		+ MUL15(a[19], b[17]);
+	t[37] = MUL15(a[18], b[19])
+		+ MUL15(a[19], b[18]);
+	t[38] = MUL15(a[19], b[19]);
+
+	d[39] = norm13(d, t, 39);
+}
+
+static void
+square20(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], a[ 0]);
+	t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1);
+	t[ 2] = MUL15(a[ 1], a[ 1])
+		+ ((MUL15(a[ 0], a[ 2])) << 1);
+	t[ 3] = ((MUL15(a[ 0], a[ 3])
+		+ MUL15(a[ 1], a[ 2])) << 1);
+	t[ 4] = MUL15(a[ 2], a[ 2])
+		+ ((MUL15(a[ 0], a[ 4])
+		+ MUL15(a[ 1], a[ 3])) << 1);
+	t[ 5] = ((MUL15(a[ 0], a[ 5])
+		+ MUL15(a[ 1], a[ 4])
+		+ MUL15(a[ 2], a[ 3])) << 1);
+	t[ 6] = MUL15(a[ 3], a[ 3])
+		+ ((MUL15(a[ 0], a[ 6])
+		+ MUL15(a[ 1], a[ 5])
+		+ MUL15(a[ 2], a[ 4])) << 1);
+	t[ 7] = ((MUL15(a[ 0], a[ 7])
+		+ MUL15(a[ 1], a[ 6])
+		+ MUL15(a[ 2], a[ 5])
+		+ MUL15(a[ 3], a[ 4])) << 1);
+	t[ 8] = MUL15(a[ 4], a[ 4])
+		+ ((MUL15(a[ 0], a[ 8])
+		+ MUL15(a[ 1], a[ 7])
+		+ MUL15(a[ 2], a[ 6])
+		+ MUL15(a[ 3], a[ 5])) << 1);
+	t[ 9] = ((MUL15(a[ 0], a[ 9])
+		+ MUL15(a[ 1], a[ 8])
+		+ MUL15(a[ 2], a[ 7])
+		+ MUL15(a[ 3], a[ 6])
+		+ MUL15(a[ 4], a[ 5])) << 1);
+	t[10] = MUL15(a[ 5], a[ 5])
+		+ ((MUL15(a[ 0], a[10])
+		+ MUL15(a[ 1], a[ 9])
+		+ MUL15(a[ 2], a[ 8])
+		+ MUL15(a[ 3], a[ 7])
+		+ MUL15(a[ 4], a[ 6])) << 1);
+	t[11] = ((MUL15(a[ 0], a[11])
+		+ MUL15(a[ 1], a[10])
+		+ MUL15(a[ 2], a[ 9])
+		+ MUL15(a[ 3], a[ 8])
+		+ MUL15(a[ 4], a[ 7])
+		+ MUL15(a[ 5], a[ 6])) << 1);
+	t[12] = MUL15(a[ 6], a[ 6])
+		+ ((MUL15(a[ 0], a[12])
+		+ MUL15(a[ 1], a[11])
+		+ MUL15(a[ 2], a[10])
+		+ MUL15(a[ 3], a[ 9])
+		+ MUL15(a[ 4], a[ 8])
+		+ MUL15(a[ 5], a[ 7])) << 1);
+	t[13] = ((MUL15(a[ 0], a[13])
+		+ MUL15(a[ 1], a[12])
+		+ MUL15(a[ 2], a[11])
+		+ MUL15(a[ 3], a[10])
+		+ MUL15(a[ 4], a[ 9])
+		+ MUL15(a[ 5], a[ 8])
+		+ MUL15(a[ 6], a[ 7])) << 1);
+	t[14] = MUL15(a[ 7], a[ 7])
+		+ ((MUL15(a[ 0], a[14])
+		+ MUL15(a[ 1], a[13])
+		+ MUL15(a[ 2], a[12])
+		+ MUL15(a[ 3], a[11])
+		+ MUL15(a[ 4], a[10])
+		+ MUL15(a[ 5], a[ 9])
+		+ MUL15(a[ 6], a[ 8])) << 1);
+	t[15] = ((MUL15(a[ 0], a[15])
+		+ MUL15(a[ 1], a[14])
+		+ MUL15(a[ 2], a[13])
+		+ MUL15(a[ 3], a[12])
+		+ MUL15(a[ 4], a[11])
+		+ MUL15(a[ 5], a[10])
+		+ MUL15(a[ 6], a[ 9])
+		+ MUL15(a[ 7], a[ 8])) << 1);
+	t[16] = MUL15(a[ 8], a[ 8])
+		+ ((MUL15(a[ 0], a[16])
+		+ MUL15(a[ 1], a[15])
+		+ MUL15(a[ 2], a[14])
+		+ MUL15(a[ 3], a[13])
+		+ MUL15(a[ 4], a[12])
+		+ MUL15(a[ 5], a[11])
+		+ MUL15(a[ 6], a[10])
+		+ MUL15(a[ 7], a[ 9])) << 1);
+	t[17] = ((MUL15(a[ 0], a[17])
+		+ MUL15(a[ 1], a[16])
+		+ MUL15(a[ 2], a[15])
+		+ MUL15(a[ 3], a[14])
+		+ MUL15(a[ 4], a[13])
+		+ MUL15(a[ 5], a[12])
+		+ MUL15(a[ 6], a[11])
+		+ MUL15(a[ 7], a[10])
+		+ MUL15(a[ 8], a[ 9])) << 1);
+	t[18] = MUL15(a[ 9], a[ 9])
+		+ ((MUL15(a[ 0], a[18])
+		+ MUL15(a[ 1], a[17])
+		+ MUL15(a[ 2], a[16])
+		+ MUL15(a[ 3], a[15])
+		+ MUL15(a[ 4], a[14])
+		+ MUL15(a[ 5], a[13])
+		+ MUL15(a[ 6], a[12])
+		+ MUL15(a[ 7], a[11])
+		+ MUL15(a[ 8], a[10])) << 1);
+	t[19] = ((MUL15(a[ 0], a[19])
+		+ MUL15(a[ 1], a[18])
+		+ MUL15(a[ 2], a[17])
+		+ MUL15(a[ 3], a[16])
+		+ MUL15(a[ 4], a[15])
+		+ MUL15(a[ 5], a[14])
+		+ MUL15(a[ 6], a[13])
+		+ MUL15(a[ 7], a[12])
+		+ MUL15(a[ 8], a[11])
+		+ MUL15(a[ 9], a[10])) << 1);
+	t[20] = MUL15(a[10], a[10])
+		+ ((MUL15(a[ 1], a[19])
+		+ MUL15(a[ 2], a[18])
+		+ MUL15(a[ 3], a[17])
+		+ MUL15(a[ 4], a[16])
+		+ MUL15(a[ 5], a[15])
+		+ MUL15(a[ 6], a[14])
+		+ MUL15(a[ 7], a[13])
+		+ MUL15(a[ 8], a[12])
+		+ MUL15(a[ 9], a[11])) << 1);
+	t[21] = ((MUL15(a[ 2], a[19])
+		+ MUL15(a[ 3], a[18])
+		+ MUL15(a[ 4], a[17])
+		+ MUL15(a[ 5], a[16])
+		+ MUL15(a[ 6], a[15])
+		+ MUL15(a[ 7], a[14])
+		+ MUL15(a[ 8], a[13])
+		+ MUL15(a[ 9], a[12])
+		+ MUL15(a[10], a[11])) << 1);
+	t[22] = MUL15(a[11], a[11])
+		+ ((MUL15(a[ 3], a[19])
+		+ MUL15(a[ 4], a[18])
+		+ MUL15(a[ 5], a[17])
+		+ MUL15(a[ 6], a[16])
+		+ MUL15(a[ 7], a[15])
+		+ MUL15(a[ 8], a[14])
+		+ MUL15(a[ 9], a[13])
+		+ MUL15(a[10], a[12])) << 1);
+	t[23] = ((MUL15(a[ 4], a[19])
+		+ MUL15(a[ 5], a[18])
+		+ MUL15(a[ 6], a[17])
+		+ MUL15(a[ 7], a[16])
+		+ MUL15(a[ 8], a[15])
+		+ MUL15(a[ 9], a[14])
+		+ MUL15(a[10], a[13])
+		+ MUL15(a[11], a[12])) << 1);
+	t[24] = MUL15(a[12], a[12])
+		+ ((MUL15(a[ 5], a[19])
+		+ MUL15(a[ 6], a[18])
+		+ MUL15(a[ 7], a[17])
+		+ MUL15(a[ 8], a[16])
+		+ MUL15(a[ 9], a[15])
+		+ MUL15(a[10], a[14])
+		+ MUL15(a[11], a[13])) << 1);
+	t[25] = ((MUL15(a[ 6], a[19])
+		+ MUL15(a[ 7], a[18])
+		+ MUL15(a[ 8], a[17])
+		+ MUL15(a[ 9], a[16])
+		+ MUL15(a[10], a[15])
+		+ MUL15(a[11], a[14])
+		+ MUL15(a[12], a[13])) << 1);
+	t[26] = MUL15(a[13], a[13])
+		+ ((MUL15(a[ 7], a[19])
+		+ MUL15(a[ 8], a[18])
+		+ MUL15(a[ 9], a[17])
+		+ MUL15(a[10], a[16])
+		+ MUL15(a[11], a[15])
+		+ MUL15(a[12], a[14])) << 1);
+	t[27] = ((MUL15(a[ 8], a[19])
+		+ MUL15(a[ 9], a[18])
+		+ MUL15(a[10], a[17])
+		+ MUL15(a[11], a[16])
+		+ MUL15(a[12], a[15])
+		+ MUL15(a[13], a[14])) << 1);
+	t[28] = MUL15(a[14], a[14])
+		+ ((MUL15(a[ 9], a[19])
+		+ MUL15(a[10], a[18])
+		+ MUL15(a[11], a[17])
+		+ MUL15(a[12], a[16])
+		+ MUL15(a[13], a[15])) << 1);
+	t[29] = ((MUL15(a[10], a[19])
+		+ MUL15(a[11], a[18])
+		+ MUL15(a[12], a[17])
+		+ MUL15(a[13], a[16])
+		+ MUL15(a[14], a[15])) << 1);
+	t[30] = MUL15(a[15], a[15])
+		+ ((MUL15(a[11], a[19])
+		+ MUL15(a[12], a[18])
+		+ MUL15(a[13], a[17])
+		+ MUL15(a[14], a[16])) << 1);
+	t[31] = ((MUL15(a[12], a[19])
+		+ MUL15(a[13], a[18])
+		+ MUL15(a[14], a[17])
+		+ MUL15(a[15], a[16])) << 1);
+	t[32] = MUL15(a[16], a[16])
+		+ ((MUL15(a[13], a[19])
+		+ MUL15(a[14], a[18])
+		+ MUL15(a[15], a[17])) << 1);
+	t[33] = ((MUL15(a[14], a[19])
+		+ MUL15(a[15], a[18])
+		+ MUL15(a[16], a[17])) << 1);
+	t[34] = MUL15(a[17], a[17])
+		+ ((MUL15(a[15], a[19])
+		+ MUL15(a[16], a[18])) << 1);
+	t[35] = ((MUL15(a[16], a[19])
+		+ MUL15(a[17], a[18])) << 1);
+	t[36] = MUL15(a[18], a[18])
+		+ ((MUL15(a[17], a[19])) << 1);
+	t[37] = ((MUL15(a[18], a[19])) << 1);
+	t[38] = MUL15(a[19], a[19]);
+
+	d[39] = norm13(d, t, 39);
+}
+
+#endif
+
+/*
+ * Perform a "final reduction" in field F255 (field for Curve25519)
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f255(uint32_t *d)
+{
+	uint32_t t[20];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 19;
+	for (i = 0; i < 20; i ++) {
+		uint32_t w;
+
+		w = t[i] + cc;
+		cc = w >> 13;
+		t[i] = w & 0x1FFF;
+	}
+	cc = t[19] >> 8;
+	t[19] &= 0xFF;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+static void
+f255_mulgen(uint32_t *d, const uint32_t *a, const uint32_t *b, int square)
+{
+	uint32_t t[40], cc, w;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 13 bits
+	 * each; upper word (t[39]) must fit on 5 bits, since the product
+	 * of two 256-bit integers must fit on 512 bits.
+	 */
+	if (square) {
+		square20(t, a);
+	} else {
+		mul20(t, a, b);
+	}
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * Since the modulus is 2^255-19 and word 20 corresponds to
+	 * offset 20*13 = 260, word 20+k must be added to word k with
+	 * a factor of 19*2^5 = 608. The extra bits in word 19 are also
+	 * added that way.
+	 */
+	cc = MUL15(t[19] >> 8, 19);
+	t[19] &= 0xFF;
+
+#define MM1(x)   do { \
+		w = t[x] + cc + MUL15(t[(x) + 20], 608); \
+		t[x] = w & 0x1FFF; \
+		cc = w >> 13; \
+	} while (0)
+
+	MM1( 0);
+	MM1( 1);
+	MM1( 2);
+	MM1( 3);
+	MM1( 4);
+	MM1( 5);
+	MM1( 6);
+	MM1( 7);
+	MM1( 8);
+	MM1( 9);
+	MM1(10);
+	MM1(11);
+	MM1(12);
+	MM1(13);
+	MM1(14);
+	MM1(15);
+	MM1(16);
+	MM1(17);
+	MM1(18);
+	MM1(19);
+
+#undef MM1
+
+	cc = MUL15(w >> 8, 19);
+	t[19] &= 0xFF;
+
+#define MM2(x)   do { \
+		w = t[x] + cc; \
+		d[x] = w & 0x1FFF; \
+		cc = w >> 13; \
+	} while (0)
+
+	MM2( 0);
+	MM2( 1);
+	MM2( 2);
+	MM2( 3);
+	MM2( 4);
+	MM2( 5);
+	MM2( 6);
+	MM2( 7);
+	MM2( 8);
+	MM2( 9);
+	MM2(10);
+	MM2(11);
+	MM2(12);
+	MM2(13);
+	MM2(14);
+	MM2(15);
+	MM2(16);
+	MM2(17);
+	MM2(18);
+	MM2(19);
+
+#undef MM2
+}
+
+/*
+ * Perform a multiplication of two integers modulo 2^255-19.
+ * Operands are arrays of 20 words, each containing 13 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ *
+ * f255_mul() is the general multiplication, f255_square() is specialised
+ * for squarings.
+ */
+#define f255_mul(d, a, b)   f255_mulgen(d, a, b, 0)
+#define f255_square(d, a)   f255_mulgen(d, a, a, 1)
+
+/*
+ * Add two values in F255. Partial reduction is performed (down to less
+ * than twice the modulus).
+ */
+static void
+f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 20; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+	cc = MUL15(w >> 8, 19);
+	d[19] &= 0xFF;
+	for (i = 0; i < 20; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+}
+
+/*
+ * Subtract one value from another in F255. Partial reduction is
+ * performed (down to less than twice the modulus).
+ */
+static void
+f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * We actually compute a - b + 2*p, so that the final value is
+	 * necessarily positive.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = (uint32_t)-38;
+	for (i = 0; i < 20; i ++) {
+		w = a[i] - b[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = ARSH(w, 13);
+	}
+	cc = MUL15((w + 0x200) >> 8, 19);
+	d[19] &= 0xFF;
+	for (i = 0; i < 20; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+}
+
+/*
+ * Multiply an integer by the 'A24' constant (121665). Partial reduction
+ * is performed (down to less than twice the modulus).
+ */
+static void
+f255_mul_a24(uint32_t *d, const uint32_t *a)
+{
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 20; i ++) {
+		w = MUL15(a[i], 121665) + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+	cc = MUL15(w >> 8, 19);
+	d[19] &= 0xFF;
+	for (i = 0; i < 20; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+}
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 20; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[20], x2[20], x3[20], z2[20], z3[20];
+	uint32_t a[20], aa[20], b[20], bb[20];
+	uint32_t c[20], d[20], e[20], da[20], cb[20];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	x1[19] = le8_to_le13(x1, G, 32);
+	memcpy(x3, x1, sizeof x1);
+	memset(z2, 0, sizeof z2);
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z3, 0, sizeof z3);
+	z3[0] = 1;
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+
+		f255_add(a, x2, z2);
+		f255_square(aa, a);
+		f255_sub(b, x2, z2);
+		f255_square(bb, b);
+		f255_sub(e, aa, bb);
+		f255_add(c, x3, z3);
+		f255_sub(d, x3, z3);
+		f255_mul(da, d, a);
+		f255_mul(cb, c, b);
+
+		/* obsolete
+		print_int("a ", a);
+		print_int("aa", aa);
+		print_int("b ", b);
+		print_int("bb", bb);
+		print_int("e ", e);
+		print_int("c ", c);
+		print_int("d ", d);
+		print_int("da", da);
+		print_int("cb", cb);
+		*/
+
+		f255_add(x3, da, cb);
+		f255_square(x3, x3);
+		f255_sub(z3, da, cb);
+		f255_square(z3, z3);
+		f255_mul(z3, z3, x1);
+		f255_mul(x2, aa, bb);
+		f255_mul_a24(z2, e);
+		f255_add(z2, z2, aa);
+		f255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_square(a, a);
+		f255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_square(b, b);
+		}
+		f255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_square(b, b);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(b, z2, b);
+		}
+	}
+	f255_mul(x2, x2, b);
+	reduce_final_f255(x2);
+	le13_to_le8(G, 32, x2);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m15 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_m31.c b/third_party/bearssl/src/ec_c25519_m31.c
new file mode 100644
index 0000000..1dd6d51
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m31.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int(const char *name, const uint32_t *x)
+{
+	size_t u;
+	unsigned char tmp[40];
+
+	printf("%s = ", name);
+	for (u = 0; u < 9; u ++) {
+		if (x[u] > 0x3FFFFFFF) {
+			printf("INVALID:");
+			for (u = 0; u < 9; u ++) {
+				printf(" %08X", x[u]);
+			}
+			printf("\n");
+			return;
+		}
+	}
+	memset(tmp, 0, sizeof tmp);
+	for (u = 0; u < 9; u ++) {
+		uint64_t w;
+		int j, k;
+
+		w = x[u];
+		j = 30 * (int)u;
+		k = j & 7;
+		if (k != 0) {
+			w <<= k;
+			j -= k;
+		}
+		k = j >> 3;
+		for (j = 0; j < 8; j ++) {
+			tmp[39 - k - j] |= (unsigned char)w;
+			w >>= 8;
+		}
+	}
+	for (u = 8; u < 40; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned little-endian encoding to a sequence of
+ * 30-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+le8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = *src ++;
+		if (acc_len < 22) {
+			acc |= b << acc_len;
+			acc_len += 8;
+		} else {
+			*dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
+			acc = b >> (30 - acc_len);
+			acc_len -= 22;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (30-bit words, little-endian) to unsigned
+ * little-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le30_to_le8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			uint32_t w;
+
+			w = *src ++;
+			*dst ++ = (unsigned char)(acc | (w << acc_len));
+			acc = w >> (8 - acc_len);
+			acc_len += 22;
+		} else {
+			*dst ++ = (unsigned char)acc;
+			acc >>= 8;
+			acc_len -= 8;
+		}
+	}
+}
+
+/*
+ * Multiply two integers. Source integers are represented as arrays of
+ * nine 30-bit words, for values up to 2^270-1. Result is encoded over
+ * 18 words of 30 bits each.
+ */
+static void
+mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Maximum intermediate result is no more than
+	 * 10376293531797946367, which fits in 64 bits. Reason:
+	 *
+	 *   10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
+	 *   10376293531797946367 < 9663676407 * 2^30
+	 *
+	 * Thus, adding together 9 products of 30-bit integers, with
+	 * a carry of at most 9663676406, yields an integer that fits
+	 * on 64 bits and generates a carry of at most 9663676406.
+	 */
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], b[0]);
+	t[ 1] = MUL31(a[0], b[1])
+		+ MUL31(a[1], b[0]);
+	t[ 2] = MUL31(a[0], b[2])
+		+ MUL31(a[1], b[1])
+		+ MUL31(a[2], b[0]);
+	t[ 3] = MUL31(a[0], b[3])
+		+ MUL31(a[1], b[2])
+		+ MUL31(a[2], b[1])
+		+ MUL31(a[3], b[0]);
+	t[ 4] = MUL31(a[0], b[4])
+		+ MUL31(a[1], b[3])
+		+ MUL31(a[2], b[2])
+		+ MUL31(a[3], b[1])
+		+ MUL31(a[4], b[0]);
+	t[ 5] = MUL31(a[0], b[5])
+		+ MUL31(a[1], b[4])
+		+ MUL31(a[2], b[3])
+		+ MUL31(a[3], b[2])
+		+ MUL31(a[4], b[1])
+		+ MUL31(a[5], b[0]);
+	t[ 6] = MUL31(a[0], b[6])
+		+ MUL31(a[1], b[5])
+		+ MUL31(a[2], b[4])
+		+ MUL31(a[3], b[3])
+		+ MUL31(a[4], b[2])
+		+ MUL31(a[5], b[1])
+		+ MUL31(a[6], b[0]);
+	t[ 7] = MUL31(a[0], b[7])
+		+ MUL31(a[1], b[6])
+		+ MUL31(a[2], b[5])
+		+ MUL31(a[3], b[4])
+		+ MUL31(a[4], b[3])
+		+ MUL31(a[5], b[2])
+		+ MUL31(a[6], b[1])
+		+ MUL31(a[7], b[0]);
+	t[ 8] = MUL31(a[0], b[8])
+		+ MUL31(a[1], b[7])
+		+ MUL31(a[2], b[6])
+		+ MUL31(a[3], b[5])
+		+ MUL31(a[4], b[4])
+		+ MUL31(a[5], b[3])
+		+ MUL31(a[6], b[2])
+		+ MUL31(a[7], b[1])
+		+ MUL31(a[8], b[0]);
+	t[ 9] = MUL31(a[1], b[8])
+		+ MUL31(a[2], b[7])
+		+ MUL31(a[3], b[6])
+		+ MUL31(a[4], b[5])
+		+ MUL31(a[5], b[4])
+		+ MUL31(a[6], b[3])
+		+ MUL31(a[7], b[2])
+		+ MUL31(a[8], b[1]);
+	t[10] = MUL31(a[2], b[8])
+		+ MUL31(a[3], b[7])
+		+ MUL31(a[4], b[6])
+		+ MUL31(a[5], b[5])
+		+ MUL31(a[6], b[4])
+		+ MUL31(a[7], b[3])
+		+ MUL31(a[8], b[2]);
+	t[11] = MUL31(a[3], b[8])
+		+ MUL31(a[4], b[7])
+		+ MUL31(a[5], b[6])
+		+ MUL31(a[6], b[5])
+		+ MUL31(a[7], b[4])
+		+ MUL31(a[8], b[3]);
+	t[12] = MUL31(a[4], b[8])
+		+ MUL31(a[5], b[7])
+		+ MUL31(a[6], b[6])
+		+ MUL31(a[7], b[5])
+		+ MUL31(a[8], b[4]);
+	t[13] = MUL31(a[5], b[8])
+		+ MUL31(a[6], b[7])
+		+ MUL31(a[7], b[6])
+		+ MUL31(a[8], b[5]);
+	t[14] = MUL31(a[6], b[8])
+		+ MUL31(a[7], b[7])
+		+ MUL31(a[8], b[6]);
+	t[15] = MUL31(a[7], b[8])
+		+ MUL31(a[8], b[7]);
+	t[16] = MUL31(a[8], b[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Square a 270-bit integer, represented as an array of nine 30-bit words.
+ * Result uses 18 words of 30 bits each.
+ */
+static void
+square9(uint32_t *d, const uint32_t *a)
+{
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], a[0]);
+	t[ 1] = ((MUL31(a[0], a[1])) << 1);
+	t[ 2] = MUL31(a[1], a[1])
+		+ ((MUL31(a[0], a[2])) << 1);
+	t[ 3] = ((MUL31(a[0], a[3])
+		+ MUL31(a[1], a[2])) << 1);
+	t[ 4] = MUL31(a[2], a[2])
+		+ ((MUL31(a[0], a[4])
+		+ MUL31(a[1], a[3])) << 1);
+	t[ 5] = ((MUL31(a[0], a[5])
+		+ MUL31(a[1], a[4])
+		+ MUL31(a[2], a[3])) << 1);
+	t[ 6] = MUL31(a[3], a[3])
+		+ ((MUL31(a[0], a[6])
+		+ MUL31(a[1], a[5])
+		+ MUL31(a[2], a[4])) << 1);
+	t[ 7] = ((MUL31(a[0], a[7])
+		+ MUL31(a[1], a[6])
+		+ MUL31(a[2], a[5])
+		+ MUL31(a[3], a[4])) << 1);
+	t[ 8] = MUL31(a[4], a[4])
+		+ ((MUL31(a[0], a[8])
+		+ MUL31(a[1], a[7])
+		+ MUL31(a[2], a[6])
+		+ MUL31(a[3], a[5])) << 1);
+	t[ 9] = ((MUL31(a[1], a[8])
+		+ MUL31(a[2], a[7])
+		+ MUL31(a[3], a[6])
+		+ MUL31(a[4], a[5])) << 1);
+	t[10] = MUL31(a[5], a[5])
+		+ ((MUL31(a[2], a[8])
+		+ MUL31(a[3], a[7])
+		+ MUL31(a[4], a[6])) << 1);
+	t[11] = ((MUL31(a[3], a[8])
+		+ MUL31(a[4], a[7])
+		+ MUL31(a[5], a[6])) << 1);
+	t[12] = MUL31(a[6], a[6])
+		+ ((MUL31(a[4], a[8])
+		+ MUL31(a[5], a[7])) << 1);
+	t[13] = ((MUL31(a[5], a[8])
+		+ MUL31(a[6], a[7])) << 1);
+	t[14] = MUL31(a[7], a[7])
+		+ ((MUL31(a[6], a[8])) << 1);
+	t[15] = ((MUL31(a[7], a[8])) << 1);
+	t[16] = MUL31(a[8], a[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Perform a "final reduction" in field F255 (field for Curve25519)
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f255(uint32_t *d)
+{
+	uint32_t t[9];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 19;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = t[i] + cc;
+		cc = w >> 30;
+		t[i] = w & 0x3FFFFFFF;
+	}
+	cc = t[8] >> 15;
+	t[8] &= 0x7FFF;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Perform a multiplication of two integers modulo 2^255-19.
+ * Operands are arrays of 9 words, each containing 30 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ */
+static void
+f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[18], cc;
+	int i;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 30 bits
+	 * each; upper word (t[17]) must fit on 2 bits, since the product
+	 * of two 256-bit integers must fit on 512 bits.
+	 */
+	mul9(t, a, b);
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * Since the modulus is 2^255-19 and word 9 corresponds to
+	 * offset 9*30 = 270, word 9+k must be added to word k with
+	 * a factor of 19*2^15 = 622592. The extra bits in word 8 are also
+	 * added that way.
+	 *
+	 * Keeping the carry on 32 bits helps with 32-bit architectures,
+	 * and does not noticeably impact performance on 64-bit systems.
+	 */
+	cc = MUL15(t[8] >> 15, 19);  /* at most 19*(2^15-1) = 622573 */
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint64_t w;
+
+		w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
+		t[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);  /* at most 622592 */
+	}
+
+	/*
+	 * Original product was up to (2^256-1)^2, i.e. a 512-bit integer.
+	 * This was split into two parts (upper of 257 bits, lower of 255
+	 * bits), and the upper was added to the lower with a factor 19,
+	 * which means that the intermediate value is less than 77*2^255
+	 * (19*2^257 + 2^255). Therefore, the extra bits "t[8] >> 15" are
+	 * less than 77, and the initial carry cc is at most 76*19 = 1444.
+	 */
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = t[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+
+	/*
+	 * Final result is at most 2^255 + 1443. In particular, the last
+	 * carry is necessarily 0, since t[8] was truncated to 15 bits.
+	 */
+}
+
+/*
+ * Perform a squaring of an integer modulo 2^255-19.
+ * Operands are arrays of 9 words, each containing 30 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ */
+static void
+f255_square(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[18], cc;
+	int i;
+
+	/*
+	 * Compute raw squaring. All result words fit in 30 bits
+	 * each; upper word (t[17]) must fit on 2 bits, since the square
+	 * of a 256-bit integers must fit on 512 bits.
+	 */
+	square9(t, a);
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * See f255_mul() for details on the reduction and carry limits.
+	 */
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint64_t w;
+
+		w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
+		t[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);
+	}
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = t[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+}
+
+/*
+ * Add two values in F255. Partial reduction is performed (down to less
+ * than twice the modulus).
+ */
+static void
+f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Since operand words fit on 30 bits, we can use 32-bit
+	 * variables throughout.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	cc = MUL15(w >> 15, 19);
+	d[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+}
+
+/*
+ * Subtract one value from another in F255. Partial reduction is
+ * performed (down to less than twice the modulus).
+ */
+static void
+f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * We actually compute a - b + 2*p, so that the final value is
+	 * necessarily positive.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = (uint32_t)-38;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] - b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+	cc = MUL15((w + 0x10000) >> 15, 19);
+	d[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+}
+
+/*
+ * Multiply an integer by the 'A24' constant (121665). Partial reduction
+ * is performed (down to less than twice the modulus).
+ */
+static void
+f255_mul_a24(uint32_t *d, const uint32_t *a)
+{
+	int i;
+	uint64_t w;
+	uint32_t cc;
+
+	/*
+	 * a[] is over 256 bits, thus a[8] has length at most 16 bits.
+	 * We single out the processing of the last word: intermediate
+	 * value w is up to 121665*2^16, yielding a carry for the next
+	 * loop of at most 19*(121665*2^16/2^15) = 4623289.
+	 */
+	cc = 0;
+	for (i = 0; i < 8; i ++) {
+		w = MUL31(a[i], 121665) + (uint64_t)cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);
+	}
+	w = MUL31(a[8], 121665) + (uint64_t)cc;
+	d[8] = (uint32_t)w & 0x7FFF;
+	cc = MUL15((uint32_t)(w >> 15), 19);
+
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = d[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+}
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 9; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[9], x2[9], x3[9], z2[9], z3[9];
+	uint32_t a[9], aa[9], b[9], bb[9];
+	uint32_t c[9], d[9], e[9], da[9], cb[9];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	x1[8] = le8_to_le30(x1, G, 32);
+	memcpy(x3, x1, sizeof x1);
+	memset(z2, 0, sizeof z2);
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z3, 0, sizeof z3);
+	z3[0] = 1;
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+
+		f255_add(a, x2, z2);
+		f255_square(aa, a);
+		f255_sub(b, x2, z2);
+		f255_square(bb, b);
+		f255_sub(e, aa, bb);
+		f255_add(c, x3, z3);
+		f255_sub(d, x3, z3);
+		f255_mul(da, d, a);
+		f255_mul(cb, c, b);
+
+		/* obsolete
+		print_int("a ", a);
+		print_int("aa", aa);
+		print_int("b ", b);
+		print_int("bb", bb);
+		print_int("e ", e);
+		print_int("c ", c);
+		print_int("d ", d);
+		print_int("da", da);
+		print_int("cb", cb);
+		*/
+
+		f255_add(x3, da, cb);
+		f255_square(x3, x3);
+		f255_sub(z3, da, cb);
+		f255_square(z3, z3);
+		f255_mul(z3, z3, x1);
+		f255_mul(x2, aa, bb);
+		f255_mul_a24(z2, e);
+		f255_add(z2, z2, aa);
+		f255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_square(a, a);
+		f255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_square(b, b);
+		}
+		f255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_square(b, b);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(b, z2, b);
+		}
+	}
+	f255_mul(x2, x2, b);
+	reduce_final_f255(x2);
+	le30_to_le8(G, 32, x2);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m31 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_m62.c b/third_party/bearssl/src/ec_c25519_m62.c
new file mode 100644
index 0000000..6b058eb
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m62.c
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+/*
+ * A field element is encoded as five 64-bit integers, in basis 2^51.
+ * Limbs may be occasionally larger than 2^51, to save on carry
+ * propagation costs.
+ */
+
+#define MASK51   (((uint64_t)1 << 51) - (uint64_t)1)
+
+/*
+ * Swap two field elements, conditionally on a flag.
+ */
+static inline void
+f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
+{
+	uint64_t m, w;
+
+	m = -(uint64_t)ctl;
+	w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
+	w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
+	w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
+	w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
+	w = m & (a[4] ^ b[4]); a[4] ^= w; b[4] ^= w;
+}
+
+/*
+ * Addition with no carry propagation. Limbs double in size.
+ */
+static inline void
+f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	d[0] = a[0] + b[0];
+	d[1] = a[1] + b[1];
+	d[2] = a[2] + b[2];
+	d[3] = a[3] + b[3];
+	d[4] = a[4] + b[4];
+}
+
+/*
+ * Subtraction.
+ * On input, limbs must fit on 60 bits each. On output, result is
+ * partially reduced, with max value 2^255+19456; moreover, all
+ * limbs will fit on 51 bits, except the low limb, which may have
+ * value up to 2^51+19455.
+ */
+static inline void
+f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	uint64_t cc, w;
+
+	/*
+	 * We compute d = (2^255-19)*1024 + a - b. Since the limbs
+	 * fit on 60 bits, the maximum value of operands are slightly
+	 * more than 2^264, but much less than 2^265-19456. This
+	 * ensures that the result is positive.
+	 */
+
+	/*
+	 * Initial carry is 19456, since we add 2^265-19456. Each
+	 * individual subtraction may yield a carry up to 513.
+	 */
+	w = a[0] - b[0] - 19456;
+	d[0] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[1] - b[1] - cc;
+	d[1] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[2] - b[2] - cc;
+	d[2] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[3] - b[3] - cc;
+	d[3] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	d[4] = ((uint64_t)1 << 61) + a[4] - b[4] - cc;
+
+	/*
+	 * Partial reduction. The intermediate result may be up to
+	 * slightly above 2^265, but less than 2^265+2^255. When we
+	 * truncate to 255 bits, the upper bits will be at most 1024.
+	 */
+	d[0] += 19 * (d[4] >> 51);
+	d[4] &= MASK51;
+}
+
+/*
+ * UMUL51(hi, lo, x, y) computes:
+ *
+ *   hi = floor((x * y) / (2^51))
+ *   lo = x * y mod 2^51
+ *
+ * Note that lo < 2^51, but "hi" may be larger, if the input operands are
+ * larger.
+ */
+#if BR_INT128
+
+#define UMUL51(hi, lo, x, y)   do { \
+		unsigned __int128 umul_tmp; \
+		umul_tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
+		(hi) = (uint64_t)(umul_tmp >> 51); \
+		(lo) = (uint64_t)umul_tmp & MASK51; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#define UMUL51(hi, lo, x, y)   do { \
+		uint64_t umul_hi, umul_lo; \
+		umul_lo = _umul128((x), (y), &umul_hi); \
+		(hi) = (umul_hi << 13) | (umul_lo >> 51); \
+		(lo) = umul_lo & MASK51; \
+	} while (0)
+
+#endif
+
+/*
+ * Multiplication.
+ * On input, limbs must fit on 54 bits each.
+ * On output, limb 0 is at most 2^51 + 155647, and other limbs fit
+ * on 51 bits each.
+ */
+static inline void
+f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
+{
+	uint64_t t[10], hi, lo, w, cc;
+
+	/*
+	 * Perform cross products, accumulating values without carry
+	 * propagation.
+	 *
+	 * Since input limbs fit on 54 bits each, each individual
+	 * UMUL51 will produce a "hi" of less than 2^57. The maximum
+	 * sum will be at most 5*(2^57-1) + 4*(2^51-1) (for t[5]),
+	 * i.e. less than 324*2^51.
+	 */
+
+	UMUL51(t[1], t[0], a[0], b[0]);
+
+	UMUL51(t[2], lo, a[1], b[0]); t[1] += lo;
+	UMUL51(hi, lo, a[0], b[1]); t[1] += lo; t[2] += hi;
+
+	UMUL51(t[3], lo, a[2], b[0]); t[2] += lo;
+	UMUL51(hi, lo, a[1], b[1]); t[2] += lo; t[3] += hi;
+	UMUL51(hi, lo, a[0], b[2]); t[2] += lo; t[3] += hi;
+
+	UMUL51(t[4], lo, a[3], b[0]); t[3] += lo;
+	UMUL51(hi, lo, a[2], b[1]); t[3] += lo; t[4] += hi;
+	UMUL51(hi, lo, a[1], b[2]); t[3] += lo; t[4] += hi;
+	UMUL51(hi, lo, a[0], b[3]); t[3] += lo; t[4] += hi;
+
+	UMUL51(t[5], lo, a[4], b[0]); t[4] += lo;
+	UMUL51(hi, lo, a[3], b[1]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[2], b[2]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[1], b[3]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[0], b[4]); t[4] += lo; t[5] += hi;
+
+	UMUL51(t[6], lo, a[4], b[1]); t[5] += lo;
+	UMUL51(hi, lo, a[3], b[2]); t[5] += lo; t[6] += hi;
+	UMUL51(hi, lo, a[2], b[3]); t[5] += lo; t[6] += hi;
+	UMUL51(hi, lo, a[1], b[4]); t[5] += lo; t[6] += hi;
+
+	UMUL51(t[7], lo, a[4], b[2]); t[6] += lo;
+	UMUL51(hi, lo, a[3], b[3]); t[6] += lo; t[7] += hi;
+	UMUL51(hi, lo, a[2], b[4]); t[6] += lo; t[7] += hi;
+
+	UMUL51(t[8], lo, a[4], b[3]); t[7] += lo;
+	UMUL51(hi, lo, a[3], b[4]); t[7] += lo; t[8] += hi;
+
+	UMUL51(t[9], lo, a[4], b[4]); t[8] += lo;
+
+	/*
+	 * The upper words t[5]..t[9] are folded back into the lower
+	 * words, using the rule that 2^255 = 19 in the field.
+	 *
+	 * Since each t[i] is less than 324*2^51, the additions below
+	 * will yield less than 6480*2^51 in each limb; this fits in
+	 * 64 bits (6480*2^51 < 8192*2^51 = 2^64), hence there is
+	 * no overflow.
+	 */
+	t[0] += 19 * t[5];
+	t[1] += 19 * t[6];
+	t[2] += 19 * t[7];
+	t[3] += 19 * t[8];
+	t[4] += 19 * t[9];
+
+	/*
+	 * Propagate carries.
+	 */
+	w = t[0];
+	d[0] = w & MASK51;
+	cc = w >> 51;
+	w = t[1] + cc;
+	d[1] = w & MASK51;
+	cc = w >> 51;
+	w = t[2] + cc;
+	d[2] = w & MASK51;
+	cc = w >> 51;
+	w = t[3] + cc;
+	d[3] = w & MASK51;
+	cc = w >> 51;
+	w = t[4] + cc;
+	d[4] = w & MASK51;
+	cc = w >> 51;
+
+	/*
+	 * Since the limbs were 64-bit values, the top carry is at
+	 * most 8192 (in practice, that cannot be reached). We simply
+	 * performed a partial reduction.
+	 */
+	d[0] += 19 * cc;
+}
+
+/*
+ * Multiplication by A24 = 121665.
+ * Input must have limbs of 60 bits at most.
+ */
+static inline void
+f255_mul_a24(uint64_t *d, const uint64_t *a)
+{
+	uint64_t t[5], cc, w;
+
+	/*
+	 * 121665 = 15 * 8111. We first multiply by 15, with carry
+	 * propagation and partial reduction.
+	 */
+	w = a[0] * 15;
+	t[0] = w & MASK51;
+	cc = w >> 51;
+	w = a[1] * 15 + cc;
+	t[1] = w & MASK51;
+	cc = w >> 51;
+	w = a[2] * 15 + cc;
+	t[2] = w & MASK51;
+	cc = w >> 51;
+	w = a[3] * 15 + cc;
+	t[3] = w & MASK51;
+	cc = w >> 51;
+	w = a[4] * 15 + cc;
+	t[4] = w & MASK51;
+	t[0] += 19 * (w >> 51);
+
+	/*
+	 * Then multiplication by 8111. At that point, we known that
+	 * t[0] is less than 2^51 + 19*8192, and other limbs are less
+	 * than 2^51; thus, there will be no overflow.
+	 */
+	w = t[0] * 8111;
+	d[0] = w & MASK51;
+	cc = w >> 51;
+	w = t[1] * 8111 + cc;
+	d[1] = w & MASK51;
+	cc = w >> 51;
+	w = t[2] * 8111 + cc;
+	d[2] = w & MASK51;
+	cc = w >> 51;
+	w = t[3] * 8111 + cc;
+	d[3] = w & MASK51;
+	cc = w >> 51;
+	w = t[4] * 8111 + cc;
+	d[4] = w & MASK51;
+	d[0] += 19 * (w >> 51);
+}
+
+/*
+ * Finalize reduction.
+ * On input, limbs must fit on 51 bits, except possibly the low limb,
+ * which may be slightly above 2^51.
+ */
+static inline void
+f255_final_reduce(uint64_t *a)
+{
+	uint64_t t[5], cc, w;
+
+	/*
+	 * We add 19. If the result (in t[]) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	w = a[0] + 19;
+	t[0] = w & MASK51;
+	cc = w >> 51;
+	w = a[1] + cc;
+	t[1] = w & MASK51;
+	cc = w >> 51;
+	w = a[2] + cc;
+	t[2] = w & MASK51;
+	cc = w >> 51;
+	w = a[3] + cc;
+	t[3] = w & MASK51;
+	cc = w >> 51;
+	w = a[4] + cc;
+	t[4] = w & MASK51;
+	cc = w >> 51;
+
+	/*
+	 * The bit 255 of t is in cc. If that bit is 0, when a[] must
+	 * be unchanged; otherwise, it must be replaced with t[].
+	 */
+	cc = -cc;
+	a[0] ^= cc & (a[0] ^ t[0]);
+	a[1] ^= cc & (a[1] ^ t[1]);
+	a[2] ^= cc & (a[2] ^ t[2]);
+	a[3] ^= cc & (a[3] ^ t[3]);
+	a[4] ^= cc & (a[4] ^ t[4]);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	unsigned char k[32];
+	uint64_t x1[5], x2[5], z2[5], x3[5], z3[5];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+
+	/*
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared; the "& MASK51" in the initialization for
+	 * x1[4] clears that bit.
+	 */
+	x1[0] = br_dec64le(&G[0]) & MASK51;
+	x1[1] = (br_dec64le(&G[6]) >> 3) & MASK51;
+	x1[2] = (br_dec64le(&G[12]) >> 6) & MASK51;
+	x1[3] = (br_dec64le(&G[19]) >> 1) & MASK51;
+	x1[4] = (br_dec64le(&G[24]) >> 12) & MASK51;
+
+	/*
+	 * We can use memset() to clear values, because exact-width types
+	 * like uint64_t are guaranteed to have no padding bits or
+	 * trap representations.
+	 */
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z2, 0, sizeof z2);
+	memcpy(x3, x1, sizeof x1);
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * The multiplier is provided in big-endian notation, and
+	 * possibly shorter than 32 bytes.
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	swap = 0;
+
+	for (i = 254; i >= 0; i --) {
+		uint64_t a[5], aa[5], b[5], bb[5], e[5];
+		uint64_t c[5], d[5], da[5], cb[5];
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		f255_cswap(x2, x3, swap);
+		f255_cswap(z2, z3, swap);
+		swap = kt;
+
+		/*
+		 * At that point, limbs of x_2 and z_2 are assumed to fit
+		 * on at most 52 bits each.
+		 *
+		 * Each f255_add() adds one bit to the maximum range of
+		 * the values, but f255_sub() and f255_mul() bring back
+		 * the limbs into 52 bits. All f255_add() outputs are
+		 * used only as inputs for f255_mul(), which ensures
+		 * that limbs remain in the proper range.
+		 */
+
+		/* A = x_2 + z_2   -- limbs fit on 53 bits each */
+		f255_add(a, x2, z2);
+
+		/* AA = A^2 */
+		f255_mul(aa, a, a);
+
+		/* B = x_2 - z_2 */
+		f255_sub(b, x2, z2);
+
+		/* BB = B^2 */
+		f255_mul(bb, b, b);
+
+		/* E = AA - BB */
+		f255_sub(e, aa, bb);
+
+		/* C = x_3 + z_3   -- limbs fit on 53 bits each */
+		f255_add(c, x3, z3);
+
+		/* D = x_3 - z_3 */
+		f255_sub(d, x3, z3);
+
+		/* DA = D * A */
+		f255_mul(da, d, a);
+
+		/* CB = C * B */
+		f255_mul(cb, c, b);
+
+		/* x_3 = (DA + CB)^2 */
+		f255_add(x3, da, cb);
+		f255_mul(x3, x3, x3);
+
+		/* z_3 = x_1 * (DA - CB)^2 */
+		f255_sub(z3, da, cb);
+		f255_mul(z3, z3, z3);
+		f255_mul(z3, x1, z3);
+
+		/* x_2 = AA * BB */
+		f255_mul(x2, aa, bb);
+
+		/* z_2 = E * (AA + a24 * E) */
+		f255_mul_a24(z2, e);
+		f255_add(z2, aa, z2);
+		f255_mul(z2, e, z2);
+	}
+
+	f255_cswap(x2, x3, swap);
+	f255_cswap(z2, z3, swap);
+
+	/*
+	 * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
+	 * most non-squarings. We use x1 and x3, now useless, as temporaries.
+	 */
+	memcpy(x1, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_mul(x1, x1, x1);
+		f255_mul(x1, x1, z2);
+	}
+	memcpy(x3, x1, sizeof x1);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_mul(x3, x3, x3);
+		}
+		f255_mul(x3, x3, x1);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_mul(x3, x3, x3);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(x3, z2, x3);
+		}
+	}
+
+	/*
+	 * Compute x2/z2. We have 1/z2 in x3.
+	 */
+	f255_mul(x2, x2, x3);
+	f255_final_reduce(x2);
+
+	/*
+	 * Encode the final x2 value in little-endian. We first assemble
+	 * the limbs into 64-bit values.
+	 */
+	x2[0] |= x2[1] << 51;
+	x2[1] = (x2[1] >> 13) | (x2[2] << 38);
+	x2[2] = (x2[2] >> 26) | (x2[3] << 25);
+	x2[3] = (x2[3] >> 39) | (x2[4] << 12);
+	br_enc64le(G, x2[0]);
+	br_enc64le(G + 8, x2[1]);
+	br_enc64le(G + 16, x2[2]);
+	br_enc64le(G + 24, x2[3]);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m62 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m62_get(void)
+{
+	return &br_ec_c25519_m62;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m62_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_c25519_m64.c b/third_party/bearssl/src/ec_c25519_m64.c
new file mode 100644
index 0000000..df48834
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m64.c
@@ -0,0 +1,831 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+/*
+ * A field element is encoded as four 64-bit integers, in basis 2^63.
+ * Operations return partially reduced values, which may range up to
+ * 2^255+37.
+ */
+
+#define MASK63   (((uint64_t)1 << 63) - (uint64_t)1)
+
+/*
+ * Swap two field elements, conditionally on a flag.
+ */
+static inline void
+f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
+{
+	uint64_t m, w;
+
+	m = -(uint64_t)ctl;
+	w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
+	w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
+	w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
+	w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
+}
+
+/*
+ * Addition in the field.
+ */
+static inline void
+f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] + (unsigned __int128)b[0];
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (unsigned __int128)b[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] + (unsigned __int128)b[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] + (unsigned __int128)b[3] + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+	cc = (uint64_t)(z >> 63);
+
+	/*
+	 * Since operands are at most 2^255+37, the sum is at most
+	 * 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
+	 *
+	 * We use: 2^255 = 19 mod p.
+	 * Since we add 0, 19 or 38 to a value that fits on 255 bits,
+	 * the result is at most 2^255+37.
+	 */
+	z = (unsigned __int128)t0 + (unsigned __int128)(19 * cc);
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned char k;
+
+	k = _addcarry_u64(0, a[0], b[0], &t0);
+	k = _addcarry_u64(k, a[1], b[1], &t1);
+	k = _addcarry_u64(k, a[2], b[2], &t2);
+	k = _addcarry_u64(k, a[3], b[3], &t3);
+	cc = (k << 1) + (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Since operands are at most 2^255+37, the sum is at most
+	 * 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
+	 *
+	 * We use: 2^255 = 19 mod p.
+	 * Since we add 0, 19 or 38 to a value that fits on 255 bits,
+	 * the result is at most 2^255+37.
+	 */
+	k = _addcarry_u64(0, t0, 19 * cc, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Subtraction.
+ */
+static inline void
+f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	/*
+	 * We compute t = 2^256 - 38 + a - b, which is necessarily
+	 * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
+	 * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
+	 * on the two upper bits of t (bits 255 and 256).
+	 */
+
+	uint64_t t0, t1, t2, t3, t4, cc;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] - (unsigned __int128)b[0] - 38;
+	t0 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[1] - (unsigned __int128)b[1]
+		- (unsigned __int128)cc;
+	t1 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[2] - (unsigned __int128)b[2]
+		- (unsigned __int128)cc;
+	t2 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[3] - (unsigned __int128)b[3]
+		- (unsigned __int128)cc;
+	t3 = (uint64_t)z;
+	t4 = 1 + (uint64_t)(z >> 64);
+
+	/*
+	 * We have a 257-bit result. The two top bits can be 00, 01 or 10,
+	 * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
+	 * Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
+	 * This guarantees that the result is at most 2^255+37.
+	 */
+	cc = (38 & -t4) + (19 & -(t3 >> 63));
+	t3 &= MASK63;
+	z = (unsigned __int128)t0 + (unsigned __int128)cc;
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	/*
+	 * We compute t = 2^256 - 38 + a - b, which is necessarily
+	 * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
+	 * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
+	 * on the two upper bits of t (bits 255 and 256).
+	 */
+
+	uint64_t t0, t1, t2, t3, t4;
+	unsigned char k;
+
+	k = _subborrow_u64(0, a[0], b[0], &t0);
+	k = _subborrow_u64(k, a[1], b[1], &t1);
+	k = _subborrow_u64(k, a[2], b[2], &t2);
+	k = _subborrow_u64(k, a[3], b[3], &t3);
+	(void)_subborrow_u64(k, 1, 0, &t4);
+
+	k = _subborrow_u64(0, t0, 38, &t0);
+	k = _subborrow_u64(k, t1, 0, &t1);
+	k = _subborrow_u64(k, t2, 0, &t2);
+	k = _subborrow_u64(k, t3, 0, &t3);
+	(void)_subborrow_u64(k, t4, 0, &t4);
+
+	/*
+	 * We have a 257-bit result. The two top bits can be 00, 01 or 10,
+	 * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
+	 * Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
+	 * This guarantees that the result is at most 2^255+37.
+	 */
+	t4 = (38 & -t4) + (19 & -(t3 >> 63));
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, t4, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Multiplication.
+ */
+static inline void
+f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
+{
+#if BR_INT128
+
+	unsigned __int128 z;
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
+
+	/*
+	 * Compute the product a*b over plain integers.
+	 */
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[0];
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[3] + (z >> 64);
+	t3 = (uint64_t)z;
+	t4 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t1;
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t2 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t3 + (z >> 64);
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	t5 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t2;
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t3 + (z >> 64);
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t5 + (z >> 64);
+	t5 = (uint64_t)z;
+	t6 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t3;
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t5 + (z >> 64);
+	t5 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t6 + (z >> 64);
+	t6 = (uint64_t)z;
+	t7 = (uint64_t)(z >> 64);
+
+	/*
+	 * Modulo p, we have:
+	 *
+	 *   2^255 = 19
+	 *   2^510 = 19*19 = 361
+	 *
+	 * We split the intermediate t into three parts, in basis
+	 * 2^255. The low one will be in t0..t3; the middle one in t4..t7.
+	 * The upper one can only be a single bit (th), since the
+	 * multiplication operands are at most 2^255+37 each.
+	 */
+	th = t7 >> 62;
+	t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
+	t6 = (t6 << 1) | (t5 >> 63);
+	t5 = (t5 << 1) | (t4 >> 63);
+	t4 = (t4 << 1) | (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Multiply the middle part (t4..t7) by 19. We truncate it to
+	 * 255 bits; the extra bits will go along with th.
+	 */
+	z = (unsigned __int128)t4 * 19;
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)t5 * 19 + (z >> 64);
+	t5 = (uint64_t)z;
+	z = (unsigned __int128)t6 * 19 + (z >> 64);
+	t6 = (uint64_t)z;
+	z = (unsigned __int128)t7 * 19 + (z >> 64);
+	t7 = (uint64_t)z & MASK63;
+
+	th = (361 & -th) + (19 * (uint64_t)(z >> 63));
+
+	/*
+	 * Add elements together.
+	 * At this point:
+	 *   t0..t3 fits on 255 bits.
+	 *   t4..t7 fits on 255 bits.
+	 *   th <= 361 + 342 = 703.
+	 */
+	z = (unsigned __int128)t0 + (unsigned __int128)t4
+		+ (unsigned __int128)th;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 + (unsigned __int128)t5 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 + (unsigned __int128)t6 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)t3 + (unsigned __int128)t7 + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+	th = (uint64_t)(z >> 63);
+
+	/*
+	 * Since the sum is at most 2^256 + 703, the two upper bits, in th,
+	 * can only have value 0, 1 or 2. We just add th*19, which
+	 * guarantees a result of at most 2^255+37.
+	 */
+	z = (unsigned __int128)t0 + (19 * th);
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
+	uint64_t h0, h1, h2, h3;
+	unsigned char k;
+
+	/*
+	 * Compute the product a*b over plain integers.
+	 */
+	t0 = _umul128(a[0], b[0], &h0);
+	t1 = _umul128(a[0], b[1], &h1);
+	k = _addcarry_u64(0, t1, h0, &t1);
+	t2 = _umul128(a[0], b[2], &h2);
+	k = _addcarry_u64(k, t2, h1, &t2);
+	t3 = _umul128(a[0], b[3], &h3);
+	k = _addcarry_u64(k, t3, h2, &t3);
+	(void)_addcarry_u64(k, h3, 0, &t4);
+
+	k = _addcarry_u64(0, _umul128(a[1], b[0], &h0), t1, &t1);
+	k = _addcarry_u64(k, _umul128(a[1], b[1], &h1), t2, &t2);
+	k = _addcarry_u64(k, _umul128(a[1], b[2], &h2), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[1], b[3], &h3), t4, &t4);
+	t5 = k;
+	k = _addcarry_u64(0, t2, h0, &t2);
+	k = _addcarry_u64(k, t3, h1, &t3);
+	k = _addcarry_u64(k, t4, h2, &t4);
+	(void)_addcarry_u64(k, t5, h3, &t5);
+
+	k = _addcarry_u64(0, _umul128(a[2], b[0], &h0), t2, &t2);
+	k = _addcarry_u64(k, _umul128(a[2], b[1], &h1), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[2], b[2], &h2), t4, &t4);
+	k = _addcarry_u64(k, _umul128(a[2], b[3], &h3), t5, &t5);
+	t6 = k;
+	k = _addcarry_u64(0, t3, h0, &t3);
+	k = _addcarry_u64(k, t4, h1, &t4);
+	k = _addcarry_u64(k, t5, h2, &t5);
+	(void)_addcarry_u64(k, t6, h3, &t6);
+
+	k = _addcarry_u64(0, _umul128(a[3], b[0], &h0), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[3], b[1], &h1), t4, &t4);
+	k = _addcarry_u64(k, _umul128(a[3], b[2], &h2), t5, &t5);
+	k = _addcarry_u64(k, _umul128(a[3], b[3], &h3), t6, &t6);
+	t7 = k;
+	k = _addcarry_u64(0, t4, h0, &t4);
+	k = _addcarry_u64(k, t5, h1, &t5);
+	k = _addcarry_u64(k, t6, h2, &t6);
+	(void)_addcarry_u64(k, t7, h3, &t7);
+
+	/*
+	 * Modulo p, we have:
+	 *
+	 *   2^255 = 19
+	 *   2^510 = 19*19 = 361
+	 *
+	 * We split the intermediate t into three parts, in basis
+	 * 2^255. The low one will be in t0..t3; the middle one in t4..t7.
+	 * The upper one can only be a single bit (th), since the
+	 * multiplication operands are at most 2^255+37 each.
+	 */
+	th = t7 >> 62;
+	t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
+	t6 = (t6 << 1) | (t5 >> 63);
+	t5 = (t5 << 1) | (t4 >> 63);
+	t4 = (t4 << 1) | (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Multiply the middle part (t4..t7) by 19. We truncate it to
+	 * 255 bits; the extra bits will go along with th.
+	 */
+	t4 = _umul128(t4, 19, &h0);
+	t5 = _umul128(t5, 19, &h1);
+	t6 = _umul128(t6, 19, &h2);
+	t7 = _umul128(t7, 19, &h3);
+	k = _addcarry_u64(0, t5, h0, &t5);
+	k = _addcarry_u64(k, t6, h1, &t6);
+	k = _addcarry_u64(k, t7, h2, &t7);
+	(void)_addcarry_u64(k, h3, 0, &h3);
+	th = (361 & -th) + (19 * ((h3 << 1) + (t7 >> 63)));
+	t7 &= MASK63;
+
+	/*
+	 * Add elements together.
+	 * At this point:
+	 *   t0..t3 fits on 255 bits.
+	 *   t4..t7 fits on 255 bits.
+	 *   th <= 361 + 342 = 703.
+	 */
+	k = _addcarry_u64(0, t0, t4, &t0);
+	k = _addcarry_u64(k, t1, t5, &t1);
+	k = _addcarry_u64(k, t2, t6, &t2);
+	k = _addcarry_u64(k, t3, t7, &t3);
+	t4 = k;
+	k = _addcarry_u64(0, t0, th, &t0);
+	k = _addcarry_u64(k, t1, 0, &t1);
+	k = _addcarry_u64(k, t2, 0, &t2);
+	k = _addcarry_u64(k, t3, 0, &t3);
+	(void)_addcarry_u64(k, t4, 0, &t4);
+
+	th = (t4 << 1) + (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Since the sum is at most 2^256 + 703, the two upper bits, in th,
+	 * can only have value 0, 1 or 2. We just add th*19, which
+	 * guarantees a result of at most 2^255+37.
+	 */
+	k = _addcarry_u64(0, t0, 19 * th, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Multiplication by A24 = 121665.
+ */
+static inline void
+f255_mul_a24(uint64_t *d, const uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] * 121665;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * 121665 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * 121665 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * 121665 + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+
+	z = (unsigned __int128)t0 + (19 * (uint64_t)(z >> 63));
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	t2 = (uint64_t)z;
+	t3 = t3 + (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)t0 + (19 & -(t3 >> 63));
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = (t3 & MASK63) + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, t4, h0, h1, h2, h3;
+	unsigned char k;
+
+	t0 = _umul128(a[0], 121665, &h0);
+	t1 = _umul128(a[1], 121665, &h1);
+	k = _addcarry_u64(0, t1, h0, &t1);
+	t2 = _umul128(a[2], 121665, &h2);
+	k = _addcarry_u64(k, t2, h1, &t2);
+	t3 = _umul128(a[3], 121665, &h3);
+	k = _addcarry_u64(k, t3, h2, &t3);
+	(void)_addcarry_u64(k, h3, 0, &t4);
+
+	t4 = (t4 << 1) + (t3 >> 63);
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, 19 * t4, &t0);
+	k = _addcarry_u64(k, t1, 0, &t1);
+	k = _addcarry_u64(k, t2, 0, &t2);
+	(void)_addcarry_u64(k, t3, 0, &t3);
+
+	t4 = 19 & -(t3 >> 63);
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, t4, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Finalize reduction.
+ */
+static inline void
+f255_final_reduce(uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned __int128 z;
+
+	/*
+	 * We add 19. If the result (in t) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	z = (unsigned __int128)a[0] + 19;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	t3 = a[3] + (uint64_t)(z >> 64);
+
+	m = -(t3 >> 63);
+	t3 &= MASK63;
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned char k;
+
+	/*
+	 * We add 19. If the result (in t) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	k = _addcarry_u64(0, a[0], 19, &t0);
+	k = _addcarry_u64(k, a[1], 0, &t1);
+	k = _addcarry_u64(k, a[2], 0, &t2);
+	(void)_addcarry_u64(k, a[3], 0, &t3);
+
+	m = -(t3 >> 63);
+	t3 &= MASK63;
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#endif
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	unsigned char k[32];
+	uint64_t x1[4], x2[4], z2[4], x3[4], z3[4];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+
+	/*
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	x1[0] = br_dec64le(&G[ 0]);
+	x1[1] = br_dec64le(&G[ 8]);
+	x1[2] = br_dec64le(&G[16]);
+	x1[3] = br_dec64le(&G[24]) & MASK63;
+
+	/*
+	 * We can use memset() to clear values, because exact-width types
+	 * like uint64_t are guaranteed to have no padding bits or
+	 * trap representations.
+	 */
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z2, 0, sizeof z2);
+	memcpy(x3, x1, sizeof x1);
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * The multiplier is provided in big-endian notation, and
+	 * possibly shorter than 32 bytes.
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	swap = 0;
+
+	for (i = 254; i >= 0; i --) {
+		uint64_t a[4], aa[4], b[4], bb[4], e[4];
+		uint64_t c[4], d[4], da[4], cb[4];
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		f255_cswap(x2, x3, swap);
+		f255_cswap(z2, z3, swap);
+		swap = kt;
+
+		/* A = x_2 + z_2 */
+		f255_add(a, x2, z2);
+
+		/* AA = A^2 */
+		f255_mul(aa, a, a);
+
+		/* B = x_2 - z_2 */
+		f255_sub(b, x2, z2);
+
+		/* BB = B^2 */
+		f255_mul(bb, b, b);
+
+		/* E = AA - BB */
+		f255_sub(e, aa, bb);
+
+		/* C = x_3 + z_3 */
+		f255_add(c, x3, z3);
+
+		/* D = x_3 - z_3 */
+		f255_sub(d, x3, z3);
+
+		/* DA = D * A */
+		f255_mul(da, d, a);
+
+		/* CB = C * B */
+		f255_mul(cb, c, b);
+
+		/* x_3 = (DA + CB)^2 */
+		f255_add(x3, da, cb);
+		f255_mul(x3, x3, x3);
+
+		/* z_3 = x_1 * (DA - CB)^2 */
+		f255_sub(z3, da, cb);
+		f255_mul(z3, z3, z3);
+		f255_mul(z3, x1, z3);
+
+		/* x_2 = AA * BB */
+		f255_mul(x2, aa, bb);
+
+		/* z_2 = E * (AA + a24 * E) */
+		f255_mul_a24(z2, e);
+		f255_add(z2, aa, z2);
+		f255_mul(z2, e, z2);
+	}
+
+	f255_cswap(x2, x3, swap);
+	f255_cswap(z2, z3, swap);
+
+	/*
+	 * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
+	 * most non-squarings. We use x1 and x3, now useless, as temporaries.
+	 */
+	memcpy(x1, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_mul(x1, x1, x1);
+		f255_mul(x1, x1, z2);
+	}
+	memcpy(x3, x1, sizeof x1);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_mul(x3, x3, x3);
+		}
+		f255_mul(x3, x3, x1);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_mul(x3, x3, x3);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(x3, z2, x3);
+		}
+	}
+
+	/*
+	 * Compute x2/z2. We have 1/z2 in x3.
+	 */
+	f255_mul(x2, x2, x3);
+	f255_final_reduce(x2);
+
+	/*
+	 * Encode the final x2 value in little-endian.
+	 */
+	br_enc64le(G,      x2[0]);
+	br_enc64le(G +  8, x2[1]);
+	br_enc64le(G + 16, x2[2]);
+	br_enc64le(G + 24, x2[3]);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m64 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m64_get(void)
+{
+	return &br_ec_c25519_m64;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m64_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_curve25519.c b/third_party/bearssl/src/ec_curve25519.c
new file mode 100644
index 0000000..a47d215
--- /dev/null
+++ b/third_party/bearssl/src/ec_curve25519.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+/* see inner.h */
+const br_ec_curve_def br_curve25519 = {
+	BR_EC_curve25519,
+	ORDER, sizeof ORDER,
+	GEN, sizeof GEN
+};
diff --git a/third_party/bearssl/src/ec_default.c b/third_party/bearssl/src/ec_default.c
new file mode 100644
index 0000000..7bb6e0c
--- /dev/null
+++ b/third_party/bearssl/src/ec_default.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ec_all_m15;
+#else
+	return &br_ec_all_m31;
+#endif
+}
diff --git a/third_party/bearssl/src/ec_keygen.c b/third_party/bearssl/src/ec_keygen.c
new file mode 100644
index 0000000..02a3096
--- /dev/null
+++ b/third_party/bearssl/src/ec_keygen.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+size_t
+br_ec_keygen(const br_prng_class **rng_ctx,
+	const br_ec_impl *impl, br_ec_private_key *sk,
+	void *kbuf, int curve)
+{
+	const unsigned char *order;
+	unsigned char *buf;
+	size_t len;
+	unsigned mask;
+
+	if (curve < 0 || curve >= 32
+		|| ((impl->supported_curves >> curve) & 1) == 0)
+	{
+		return 0;
+	}
+	order = impl->order(curve, &len);
+	while (len > 0 && *order == 0) {
+		order ++;
+		len --;
+	}
+	if (kbuf == NULL || len == 0) {
+		return len;
+	}
+	mask = order[0];
+	mask |= (mask >> 1);
+	mask |= (mask >> 2);
+	mask |= (mask >> 4);
+
+	/*
+	 * We generate sequences of random bits of the right size, until
+	 * the value is strictly lower than the curve order (we also
+	 * check for all-zero values, which are invalid).
+	 */
+	buf = kbuf;
+	for (;;) {
+		size_t u;
+		unsigned cc, zz;
+
+		(*rng_ctx)->generate(rng_ctx, buf, len);
+		buf[0] &= mask;
+		cc = 0;
+		u = len;
+		zz = 0;
+		while (u -- > 0) {
+			cc = ((unsigned)(buf[u] - order[u] - cc) >> 8) & 1;
+			zz |= buf[u];
+		}
+		if (cc != 0 && zz != 0) {
+			break;
+		}
+	}
+
+	if (sk != NULL) {
+		sk->curve = curve;
+		sk->x = buf;
+		sk->xlen = len;
+	}
+	return len;
+}
diff --git a/third_party/bearssl/src/ec_p256_m15.c b/third_party/bearssl/src/ec_p256_m15.c
new file mode 100644
index 0000000..05800d8
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m15.c
@@ -0,0 +1,2124 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned big-endian encoding to a sequence of
+ * 13-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+be8_to_le13(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		acc |= (uint32_t)src[len] << acc_len;
+		acc_len += 8;
+		if (acc_len >= 13) {
+			*dst ++ = acc & 0x1FFF;
+			acc >>= 13;
+			acc_len -= 13;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (13-bit words, little-endian) to unsigned
+ * big-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le13_to_be8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			acc |= (*src ++) << acc_len;
+			acc_len += 13;
+		}
+		dst[len] = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
+
+/*
+ * Normalise an array of words to a strict 13 bits per word. Returned
+ * value is the resulting carry. The source (w) and destination (d)
+ * arrays may be identical, but shall not overlap partially.
+ */
+static inline uint32_t
+norm13(uint32_t *d, const uint32_t *w, size_t len)
+{
+	size_t u;
+	uint32_t cc;
+
+	cc = 0;
+	for (u = 0; u < len; u ++) {
+		int32_t z;
+
+		z = w[u] + cc;
+		d[u] = z & 0x1FFF;
+		cc = ARSH(z, 13);
+	}
+	return cc;
+}
+
+/*
+ * mul20() multiplies two 260-bit integers together. Each word must fit
+ * on 13 bits; source operands use 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ *
+ * square20() computes the square of a 260-bit integer. Each word must
+ * fit on 13 bits; source operand uses 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ */
+
+#if BR_SLOW_MUL15
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Two-level Karatsuba: turns a 20x20 multiplication into
+	 * nine 5x5 multiplications. We use 13-bit words but do not
+	 * propagate carries immediately, so words may expand:
+	 *
+	 *  - First Karatsuba decomposition turns the 20x20 mul on
+	 *    13-bit words into three 10x10 muls, two on 13-bit words
+	 *    and one on 14-bit words.
+	 *
+	 *  - Second Karatsuba decomposition further splits these into:
+	 *
+	 *     * four 5x5 muls on 13-bit words
+	 *     * four 5x5 muls on 14-bit words
+	 *     * one 5x5 mul on 15-bit words
+	 *
+	 * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit
+	 * or 15-bit words, respectively.
+	 */
+	uint32_t u[45], v[45], w[90];
+	uint32_t cc;
+	int i;
+
+#define ZADD(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] = (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] = (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] = (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] = (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define ZADDT(dw, d_off, sw, s_off)   do { \
+		(dw)[5 * (d_off) + 0] += (sw)[5 * (s_off) + 0]; \
+		(dw)[5 * (d_off) + 1] += (sw)[5 * (s_off) + 1]; \
+		(dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \
+		(dw)[5 * (d_off) + 3] += (sw)[5 * (s_off) + 3]; \
+		(dw)[5 * (d_off) + 4] += (sw)[5 * (s_off) + 4]; \
+	} while (0)
+
+#define ZSUB2F(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define CPR1(w, cprcc)   do { \
+		uint32_t cprz = (w) + cprcc; \
+		(w) = cprz & 0x1FFF; \
+		cprcc = cprz >> 13; \
+	} while (0)
+
+#define CPR(dw, d_off)   do { \
+		uint32_t cprcc; \
+		cprcc = 0; \
+		CPR1((dw)[(d_off) + 0], cprcc); \
+		CPR1((dw)[(d_off) + 1], cprcc); \
+		CPR1((dw)[(d_off) + 2], cprcc); \
+		CPR1((dw)[(d_off) + 3], cprcc); \
+		CPR1((dw)[(d_off) + 4], cprcc); \
+		CPR1((dw)[(d_off) + 5], cprcc); \
+		CPR1((dw)[(d_off) + 6], cprcc); \
+		CPR1((dw)[(d_off) + 7], cprcc); \
+		CPR1((dw)[(d_off) + 8], cprcc); \
+		(dw)[(d_off) + 9] = cprcc; \
+	} while (0)
+
+	memcpy(u, a, 20 * sizeof *a);
+	ZADD(u, 4, a, 0, a, 1);
+	ZADD(u, 5, a, 2, a, 3);
+	ZADD(u, 6, a, 0, a, 2);
+	ZADD(u, 7, a, 1, a, 3);
+	ZADD(u, 8, u, 6, u, 7);
+
+	memcpy(v, b, 20 * sizeof *b);
+	ZADD(v, 4, b, 0, b, 1);
+	ZADD(v, 5, b, 2, b, 3);
+	ZADD(v, 6, b, 0, b, 2);
+	ZADD(v, 7, b, 1, b, 3);
+	ZADD(v, 8, v, 6, v, 7);
+
+	/*
+	 * Do the eight first 8x8 muls. Source words are at most 16382
+	 * each, so we can add product results together "as is" in 32-bit
+	 * words.
+	 */
+	for (i = 0; i < 40; i += 5) {
+		w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]);
+		w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1])
+			+ MUL15(u[i + 1], v[i + 0]);
+		w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2])
+			+ MUL15(u[i + 1], v[i + 1])
+			+ MUL15(u[i + 2], v[i + 0]);
+		w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3])
+			+ MUL15(u[i + 1], v[i + 2])
+			+ MUL15(u[i + 2], v[i + 1])
+			+ MUL15(u[i + 3], v[i + 0]);
+		w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4])
+			+ MUL15(u[i + 1], v[i + 3])
+			+ MUL15(u[i + 2], v[i + 2])
+			+ MUL15(u[i + 3], v[i + 1])
+			+ MUL15(u[i + 4], v[i + 0]);
+		w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4])
+			+ MUL15(u[i + 2], v[i + 3])
+			+ MUL15(u[i + 3], v[i + 2])
+			+ MUL15(u[i + 4], v[i + 1]);
+		w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4])
+			+ MUL15(u[i + 3], v[i + 3])
+			+ MUL15(u[i + 4], v[i + 2]);
+		w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4])
+			+ MUL15(u[i + 4], v[i + 3]);
+		w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]);
+		w[(i << 1) + 9] = 0;
+	}
+
+	/*
+	 * For the 9th multiplication, source words are up to 32764,
+	 * so we must do some carry propagation. If we add up to
+	 * 4 products and the carry is no more than 524224, then the
+	 * result fits in 32 bits, and the next carry will be no more
+	 * than 524224 (because 4*(32764^2)+524224 < 8192*524225).
+	 *
+	 * We thus just skip one of the products in the middle word,
+	 * then do a carry propagation (this reduces words to 13 bits
+	 * each, except possibly the last, which may use up to 17 bits
+	 * or so), then add the missing product.
+	 */
+	w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]);
+	w[80 + 1] = MUL15(u[40 + 0], v[40 + 1])
+		+ MUL15(u[40 + 1], v[40 + 0]);
+	w[80 + 2] = MUL15(u[40 + 0], v[40 + 2])
+		+ MUL15(u[40 + 1], v[40 + 1])
+		+ MUL15(u[40 + 2], v[40 + 0]);
+	w[80 + 3] = MUL15(u[40 + 0], v[40 + 3])
+		+ MUL15(u[40 + 1], v[40 + 2])
+		+ MUL15(u[40 + 2], v[40 + 1])
+		+ MUL15(u[40 + 3], v[40 + 0]);
+	w[80 + 4] = MUL15(u[40 + 0], v[40 + 4])
+		+ MUL15(u[40 + 1], v[40 + 3])
+		+ MUL15(u[40 + 2], v[40 + 2])
+		+ MUL15(u[40 + 3], v[40 + 1]);
+		/* + MUL15(u[40 + 4], v[40 + 0]) */
+	w[80 + 5] = MUL15(u[40 + 1], v[40 + 4])
+		+ MUL15(u[40 + 2], v[40 + 3])
+		+ MUL15(u[40 + 3], v[40 + 2])
+		+ MUL15(u[40 + 4], v[40 + 1]);
+	w[80 + 6] = MUL15(u[40 + 2], v[40 + 4])
+		+ MUL15(u[40 + 3], v[40 + 3])
+		+ MUL15(u[40 + 4], v[40 + 2]);
+	w[80 + 7] = MUL15(u[40 + 3], v[40 + 4])
+		+ MUL15(u[40 + 4], v[40 + 3]);
+	w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]);
+
+	CPR(w, 80);
+
+	w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]);
+
+	/*
+	 * The products on 14-bit words in slots 6 and 7 yield values
+	 * up to 5*(16382^2) each, and we need to subtract two such
+	 * values from the higher word. We need the subtraction to fit
+	 * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit.
+	 * However, 10*(16382^2) does not fit. So we must perform a
+	 * bit of reduction here.
+	 */
+	CPR(w, 60);
+	CPR(w, 70);
+
+	/*
+	 * Recompose results.
+	 */
+
+	/* 0..1*0..1 into 0..3 */
+	ZSUB2F(w, 8, w, 0, w, 2);
+	ZSUB2F(w, 9, w, 1, w, 3);
+	ZADDT(w, 1, w, 8);
+	ZADDT(w, 2, w, 9);
+
+	/* 2..3*2..3 into 4..7 */
+	ZSUB2F(w, 10, w, 4, w, 6);
+	ZSUB2F(w, 11, w, 5, w, 7);
+	ZADDT(w, 5, w, 10);
+	ZADDT(w, 6, w, 11);
+
+	/* (0..1+2..3)*(0..1+2..3) into 12..15 */
+	ZSUB2F(w, 16, w, 12, w, 14);
+	ZSUB2F(w, 17, w, 13, w, 15);
+	ZADDT(w, 13, w, 16);
+	ZADDT(w, 14, w, 17);
+
+	/* first-level recomposition */
+	ZSUB2F(w, 12, w, 0, w, 4);
+	ZSUB2F(w, 13, w, 1, w, 5);
+	ZSUB2F(w, 14, w, 2, w, 6);
+	ZSUB2F(w, 15, w, 3, w, 7);
+	ZADDT(w, 2, w, 12);
+	ZADDT(w, 3, w, 13);
+	ZADDT(w, 4, w, 14);
+	ZADDT(w, 5, w, 15);
+
+	/*
+	 * Perform carry propagation to bring all words down to 13 bits.
+	 */
+	cc = norm13(d, w, 40);
+	d[39] += (cc << 13);
+
+#undef ZADD
+#undef ZADDT
+#undef ZSUB2F
+#undef CPR1
+#undef CPR
+}
+
+static inline void
+square20(uint32_t *d, const uint32_t *a)
+{
+	mul20(d, a, a);
+}
+
+#else
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], b[ 0]);
+	t[ 1] = MUL15(a[ 0], b[ 1])
+		+ MUL15(a[ 1], b[ 0]);
+	t[ 2] = MUL15(a[ 0], b[ 2])
+		+ MUL15(a[ 1], b[ 1])
+		+ MUL15(a[ 2], b[ 0]);
+	t[ 3] = MUL15(a[ 0], b[ 3])
+		+ MUL15(a[ 1], b[ 2])
+		+ MUL15(a[ 2], b[ 1])
+		+ MUL15(a[ 3], b[ 0]);
+	t[ 4] = MUL15(a[ 0], b[ 4])
+		+ MUL15(a[ 1], b[ 3])
+		+ MUL15(a[ 2], b[ 2])
+		+ MUL15(a[ 3], b[ 1])
+		+ MUL15(a[ 4], b[ 0]);
+	t[ 5] = MUL15(a[ 0], b[ 5])
+		+ MUL15(a[ 1], b[ 4])
+		+ MUL15(a[ 2], b[ 3])
+		+ MUL15(a[ 3], b[ 2])
+		+ MUL15(a[ 4], b[ 1])
+		+ MUL15(a[ 5], b[ 0]);
+	t[ 6] = MUL15(a[ 0], b[ 6])
+		+ MUL15(a[ 1], b[ 5])
+		+ MUL15(a[ 2], b[ 4])
+		+ MUL15(a[ 3], b[ 3])
+		+ MUL15(a[ 4], b[ 2])
+		+ MUL15(a[ 5], b[ 1])
+		+ MUL15(a[ 6], b[ 0]);
+	t[ 7] = MUL15(a[ 0], b[ 7])
+		+ MUL15(a[ 1], b[ 6])
+		+ MUL15(a[ 2], b[ 5])
+		+ MUL15(a[ 3], b[ 4])
+		+ MUL15(a[ 4], b[ 3])
+		+ MUL15(a[ 5], b[ 2])
+		+ MUL15(a[ 6], b[ 1])
+		+ MUL15(a[ 7], b[ 0]);
+	t[ 8] = MUL15(a[ 0], b[ 8])
+		+ MUL15(a[ 1], b[ 7])
+		+ MUL15(a[ 2], b[ 6])
+		+ MUL15(a[ 3], b[ 5])
+		+ MUL15(a[ 4], b[ 4])
+		+ MUL15(a[ 5], b[ 3])
+		+ MUL15(a[ 6], b[ 2])
+		+ MUL15(a[ 7], b[ 1])
+		+ MUL15(a[ 8], b[ 0]);
+	t[ 9] = MUL15(a[ 0], b[ 9])
+		+ MUL15(a[ 1], b[ 8])
+		+ MUL15(a[ 2], b[ 7])
+		+ MUL15(a[ 3], b[ 6])
+		+ MUL15(a[ 4], b[ 5])
+		+ MUL15(a[ 5], b[ 4])
+		+ MUL15(a[ 6], b[ 3])
+		+ MUL15(a[ 7], b[ 2])
+		+ MUL15(a[ 8], b[ 1])
+		+ MUL15(a[ 9], b[ 0]);
+	t[10] = MUL15(a[ 0], b[10])
+		+ MUL15(a[ 1], b[ 9])
+		+ MUL15(a[ 2], b[ 8])
+		+ MUL15(a[ 3], b[ 7])
+		+ MUL15(a[ 4], b[ 6])
+		+ MUL15(a[ 5], b[ 5])
+		+ MUL15(a[ 6], b[ 4])
+		+ MUL15(a[ 7], b[ 3])
+		+ MUL15(a[ 8], b[ 2])
+		+ MUL15(a[ 9], b[ 1])
+		+ MUL15(a[10], b[ 0]);
+	t[11] = MUL15(a[ 0], b[11])
+		+ MUL15(a[ 1], b[10])
+		+ MUL15(a[ 2], b[ 9])
+		+ MUL15(a[ 3], b[ 8])
+		+ MUL15(a[ 4], b[ 7])
+		+ MUL15(a[ 5], b[ 6])
+		+ MUL15(a[ 6], b[ 5])
+		+ MUL15(a[ 7], b[ 4])
+		+ MUL15(a[ 8], b[ 3])
+		+ MUL15(a[ 9], b[ 2])
+		+ MUL15(a[10], b[ 1])
+		+ MUL15(a[11], b[ 0]);
+	t[12] = MUL15(a[ 0], b[12])
+		+ MUL15(a[ 1], b[11])
+		+ MUL15(a[ 2], b[10])
+		+ MUL15(a[ 3], b[ 9])
+		+ MUL15(a[ 4], b[ 8])
+		+ MUL15(a[ 5], b[ 7])
+		+ MUL15(a[ 6], b[ 6])
+		+ MUL15(a[ 7], b[ 5])
+		+ MUL15(a[ 8], b[ 4])
+		+ MUL15(a[ 9], b[ 3])
+		+ MUL15(a[10], b[ 2])
+		+ MUL15(a[11], b[ 1])
+		+ MUL15(a[12], b[ 0]);
+	t[13] = MUL15(a[ 0], b[13])
+		+ MUL15(a[ 1], b[12])
+		+ MUL15(a[ 2], b[11])
+		+ MUL15(a[ 3], b[10])
+		+ MUL15(a[ 4], b[ 9])
+		+ MUL15(a[ 5], b[ 8])
+		+ MUL15(a[ 6], b[ 7])
+		+ MUL15(a[ 7], b[ 6])
+		+ MUL15(a[ 8], b[ 5])
+		+ MUL15(a[ 9], b[ 4])
+		+ MUL15(a[10], b[ 3])
+		+ MUL15(a[11], b[ 2])
+		+ MUL15(a[12], b[ 1])
+		+ MUL15(a[13], b[ 0]);
+	t[14] = MUL15(a[ 0], b[14])
+		+ MUL15(a[ 1], b[13])
+		+ MUL15(a[ 2], b[12])
+		+ MUL15(a[ 3], b[11])
+		+ MUL15(a[ 4], b[10])
+		+ MUL15(a[ 5], b[ 9])
+		+ MUL15(a[ 6], b[ 8])
+		+ MUL15(a[ 7], b[ 7])
+		+ MUL15(a[ 8], b[ 6])
+		+ MUL15(a[ 9], b[ 5])
+		+ MUL15(a[10], b[ 4])
+		+ MUL15(a[11], b[ 3])
+		+ MUL15(a[12], b[ 2])
+		+ MUL15(a[13], b[ 1])
+		+ MUL15(a[14], b[ 0]);
+	t[15] = MUL15(a[ 0], b[15])
+		+ MUL15(a[ 1], b[14])
+		+ MUL15(a[ 2], b[13])
+		+ MUL15(a[ 3], b[12])
+		+ MUL15(a[ 4], b[11])
+		+ MUL15(a[ 5], b[10])
+		+ MUL15(a[ 6], b[ 9])
+		+ MUL15(a[ 7], b[ 8])
+		+ MUL15(a[ 8], b[ 7])
+		+ MUL15(a[ 9], b[ 6])
+		+ MUL15(a[10], b[ 5])
+		+ MUL15(a[11], b[ 4])
+		+ MUL15(a[12], b[ 3])
+		+ MUL15(a[13], b[ 2])
+		+ MUL15(a[14], b[ 1])
+		+ MUL15(a[15], b[ 0]);
+	t[16] = MUL15(a[ 0], b[16])
+		+ MUL15(a[ 1], b[15])
+		+ MUL15(a[ 2], b[14])
+		+ MUL15(a[ 3], b[13])
+		+ MUL15(a[ 4], b[12])
+		+ MUL15(a[ 5], b[11])
+		+ MUL15(a[ 6], b[10])
+		+ MUL15(a[ 7], b[ 9])
+		+ MUL15(a[ 8], b[ 8])
+		+ MUL15(a[ 9], b[ 7])
+		+ MUL15(a[10], b[ 6])
+		+ MUL15(a[11], b[ 5])
+		+ MUL15(a[12], b[ 4])
+		+ MUL15(a[13], b[ 3])
+		+ MUL15(a[14], b[ 2])
+		+ MUL15(a[15], b[ 1])
+		+ MUL15(a[16], b[ 0]);
+	t[17] = MUL15(a[ 0], b[17])
+		+ MUL15(a[ 1], b[16])
+		+ MUL15(a[ 2], b[15])
+		+ MUL15(a[ 3], b[14])
+		+ MUL15(a[ 4], b[13])
+		+ MUL15(a[ 5], b[12])
+		+ MUL15(a[ 6], b[11])
+		+ MUL15(a[ 7], b[10])
+		+ MUL15(a[ 8], b[ 9])
+		+ MUL15(a[ 9], b[ 8])
+		+ MUL15(a[10], b[ 7])
+		+ MUL15(a[11], b[ 6])
+		+ MUL15(a[12], b[ 5])
+		+ MUL15(a[13], b[ 4])
+		+ MUL15(a[14], b[ 3])
+		+ MUL15(a[15], b[ 2])
+		+ MUL15(a[16], b[ 1])
+		+ MUL15(a[17], b[ 0]);
+	t[18] = MUL15(a[ 0], b[18])
+		+ MUL15(a[ 1], b[17])
+		+ MUL15(a[ 2], b[16])
+		+ MUL15(a[ 3], b[15])
+		+ MUL15(a[ 4], b[14])
+		+ MUL15(a[ 5], b[13])
+		+ MUL15(a[ 6], b[12])
+		+ MUL15(a[ 7], b[11])
+		+ MUL15(a[ 8], b[10])
+		+ MUL15(a[ 9], b[ 9])
+		+ MUL15(a[10], b[ 8])
+		+ MUL15(a[11], b[ 7])
+		+ MUL15(a[12], b[ 6])
+		+ MUL15(a[13], b[ 5])
+		+ MUL15(a[14], b[ 4])
+		+ MUL15(a[15], b[ 3])
+		+ MUL15(a[16], b[ 2])
+		+ MUL15(a[17], b[ 1])
+		+ MUL15(a[18], b[ 0]);
+	t[19] = MUL15(a[ 0], b[19])
+		+ MUL15(a[ 1], b[18])
+		+ MUL15(a[ 2], b[17])
+		+ MUL15(a[ 3], b[16])
+		+ MUL15(a[ 4], b[15])
+		+ MUL15(a[ 5], b[14])
+		+ MUL15(a[ 6], b[13])
+		+ MUL15(a[ 7], b[12])
+		+ MUL15(a[ 8], b[11])
+		+ MUL15(a[ 9], b[10])
+		+ MUL15(a[10], b[ 9])
+		+ MUL15(a[11], b[ 8])
+		+ MUL15(a[12], b[ 7])
+		+ MUL15(a[13], b[ 6])
+		+ MUL15(a[14], b[ 5])
+		+ MUL15(a[15], b[ 4])
+		+ MUL15(a[16], b[ 3])
+		+ MUL15(a[17], b[ 2])
+		+ MUL15(a[18], b[ 1])
+		+ MUL15(a[19], b[ 0]);
+	t[20] = MUL15(a[ 1], b[19])
+		+ MUL15(a[ 2], b[18])
+		+ MUL15(a[ 3], b[17])
+		+ MUL15(a[ 4], b[16])
+		+ MUL15(a[ 5], b[15])
+		+ MUL15(a[ 6], b[14])
+		+ MUL15(a[ 7], b[13])
+		+ MUL15(a[ 8], b[12])
+		+ MUL15(a[ 9], b[11])
+		+ MUL15(a[10], b[10])
+		+ MUL15(a[11], b[ 9])
+		+ MUL15(a[12], b[ 8])
+		+ MUL15(a[13], b[ 7])
+		+ MUL15(a[14], b[ 6])
+		+ MUL15(a[15], b[ 5])
+		+ MUL15(a[16], b[ 4])
+		+ MUL15(a[17], b[ 3])
+		+ MUL15(a[18], b[ 2])
+		+ MUL15(a[19], b[ 1]);
+	t[21] = MUL15(a[ 2], b[19])
+		+ MUL15(a[ 3], b[18])
+		+ MUL15(a[ 4], b[17])
+		+ MUL15(a[ 5], b[16])
+		+ MUL15(a[ 6], b[15])
+		+ MUL15(a[ 7], b[14])
+		+ MUL15(a[ 8], b[13])
+		+ MUL15(a[ 9], b[12])
+		+ MUL15(a[10], b[11])
+		+ MUL15(a[11], b[10])
+		+ MUL15(a[12], b[ 9])
+		+ MUL15(a[13], b[ 8])
+		+ MUL15(a[14], b[ 7])
+		+ MUL15(a[15], b[ 6])
+		+ MUL15(a[16], b[ 5])
+		+ MUL15(a[17], b[ 4])
+		+ MUL15(a[18], b[ 3])
+		+ MUL15(a[19], b[ 2]);
+	t[22] = MUL15(a[ 3], b[19])
+		+ MUL15(a[ 4], b[18])
+		+ MUL15(a[ 5], b[17])
+		+ MUL15(a[ 6], b[16])
+		+ MUL15(a[ 7], b[15])
+		+ MUL15(a[ 8], b[14])
+		+ MUL15(a[ 9], b[13])
+		+ MUL15(a[10], b[12])
+		+ MUL15(a[11], b[11])
+		+ MUL15(a[12], b[10])
+		+ MUL15(a[13], b[ 9])
+		+ MUL15(a[14], b[ 8])
+		+ MUL15(a[15], b[ 7])
+		+ MUL15(a[16], b[ 6])
+		+ MUL15(a[17], b[ 5])
+		+ MUL15(a[18], b[ 4])
+		+ MUL15(a[19], b[ 3]);
+	t[23] = MUL15(a[ 4], b[19])
+		+ MUL15(a[ 5], b[18])
+		+ MUL15(a[ 6], b[17])
+		+ MUL15(a[ 7], b[16])
+		+ MUL15(a[ 8], b[15])
+		+ MUL15(a[ 9], b[14])
+		+ MUL15(a[10], b[13])
+		+ MUL15(a[11], b[12])
+		+ MUL15(a[12], b[11])
+		+ MUL15(a[13], b[10])
+		+ MUL15(a[14], b[ 9])
+		+ MUL15(a[15], b[ 8])
+		+ MUL15(a[16], b[ 7])
+		+ MUL15(a[17], b[ 6])
+		+ MUL15(a[18], b[ 5])
+		+ MUL15(a[19], b[ 4]);
+	t[24] = MUL15(a[ 5], b[19])
+		+ MUL15(a[ 6], b[18])
+		+ MUL15(a[ 7], b[17])
+		+ MUL15(a[ 8], b[16])
+		+ MUL15(a[ 9], b[15])
+		+ MUL15(a[10], b[14])
+		+ MUL15(a[11], b[13])
+		+ MUL15(a[12], b[12])
+		+ MUL15(a[13], b[11])
+		+ MUL15(a[14], b[10])
+		+ MUL15(a[15], b[ 9])
+		+ MUL15(a[16], b[ 8])
+		+ MUL15(a[17], b[ 7])
+		+ MUL15(a[18], b[ 6])
+		+ MUL15(a[19], b[ 5]);
+	t[25] = MUL15(a[ 6], b[19])
+		+ MUL15(a[ 7], b[18])
+		+ MUL15(a[ 8], b[17])
+		+ MUL15(a[ 9], b[16])
+		+ MUL15(a[10], b[15])
+		+ MUL15(a[11], b[14])
+		+ MUL15(a[12], b[13])
+		+ MUL15(a[13], b[12])
+		+ MUL15(a[14], b[11])
+		+ MUL15(a[15], b[10])
+		+ MUL15(a[16], b[ 9])
+		+ MUL15(a[17], b[ 8])
+		+ MUL15(a[18], b[ 7])
+		+ MUL15(a[19], b[ 6]);
+	t[26] = MUL15(a[ 7], b[19])
+		+ MUL15(a[ 8], b[18])
+		+ MUL15(a[ 9], b[17])
+		+ MUL15(a[10], b[16])
+		+ MUL15(a[11], b[15])
+		+ MUL15(a[12], b[14])
+		+ MUL15(a[13], b[13])
+		+ MUL15(a[14], b[12])
+		+ MUL15(a[15], b[11])
+		+ MUL15(a[16], b[10])
+		+ MUL15(a[17], b[ 9])
+		+ MUL15(a[18], b[ 8])
+		+ MUL15(a[19], b[ 7]);
+	t[27] = MUL15(a[ 8], b[19])
+		+ MUL15(a[ 9], b[18])
+		+ MUL15(a[10], b[17])
+		+ MUL15(a[11], b[16])
+		+ MUL15(a[12], b[15])
+		+ MUL15(a[13], b[14])
+		+ MUL15(a[14], b[13])
+		+ MUL15(a[15], b[12])
+		+ MUL15(a[16], b[11])
+		+ MUL15(a[17], b[10])
+		+ MUL15(a[18], b[ 9])
+		+ MUL15(a[19], b[ 8]);
+	t[28] = MUL15(a[ 9], b[19])
+		+ MUL15(a[10], b[18])
+		+ MUL15(a[11], b[17])
+		+ MUL15(a[12], b[16])
+		+ MUL15(a[13], b[15])
+		+ MUL15(a[14], b[14])
+		+ MUL15(a[15], b[13])
+		+ MUL15(a[16], b[12])
+		+ MUL15(a[17], b[11])
+		+ MUL15(a[18], b[10])
+		+ MUL15(a[19], b[ 9]);
+	t[29] = MUL15(a[10], b[19])
+		+ MUL15(a[11], b[18])
+		+ MUL15(a[12], b[17])
+		+ MUL15(a[13], b[16])
+		+ MUL15(a[14], b[15])
+		+ MUL15(a[15], b[14])
+		+ MUL15(a[16], b[13])
+		+ MUL15(a[17], b[12])
+		+ MUL15(a[18], b[11])
+		+ MUL15(a[19], b[10]);
+	t[30] = MUL15(a[11], b[19])
+		+ MUL15(a[12], b[18])
+		+ MUL15(a[13], b[17])
+		+ MUL15(a[14], b[16])
+		+ MUL15(a[15], b[15])
+		+ MUL15(a[16], b[14])
+		+ MUL15(a[17], b[13])
+		+ MUL15(a[18], b[12])
+		+ MUL15(a[19], b[11]);
+	t[31] = MUL15(a[12], b[19])
+		+ MUL15(a[13], b[18])
+		+ MUL15(a[14], b[17])
+		+ MUL15(a[15], b[16])
+		+ MUL15(a[16], b[15])
+		+ MUL15(a[17], b[14])
+		+ MUL15(a[18], b[13])
+		+ MUL15(a[19], b[12]);
+	t[32] = MUL15(a[13], b[19])
+		+ MUL15(a[14], b[18])
+		+ MUL15(a[15], b[17])
+		+ MUL15(a[16], b[16])
+		+ MUL15(a[17], b[15])
+		+ MUL15(a[18], b[14])
+		+ MUL15(a[19], b[13]);
+	t[33] = MUL15(a[14], b[19])
+		+ MUL15(a[15], b[18])
+		+ MUL15(a[16], b[17])
+		+ MUL15(a[17], b[16])
+		+ MUL15(a[18], b[15])
+		+ MUL15(a[19], b[14]);
+	t[34] = MUL15(a[15], b[19])
+		+ MUL15(a[16], b[18])
+		+ MUL15(a[17], b[17])
+		+ MUL15(a[18], b[16])
+		+ MUL15(a[19], b[15]);
+	t[35] = MUL15(a[16], b[19])
+		+ MUL15(a[17], b[18])
+		+ MUL15(a[18], b[17])
+		+ MUL15(a[19], b[16]);
+	t[36] = MUL15(a[17], b[19])
+		+ MUL15(a[18], b[18])
+		+ MUL15(a[19], b[17]);
+	t[37] = MUL15(a[18], b[19])
+		+ MUL15(a[19], b[18]);
+	t[38] = MUL15(a[19], b[19]);
+	d[39] = norm13(d, t, 39);
+}
+
+static void
+square20(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], a[ 0]);
+	t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1);
+	t[ 2] = MUL15(a[ 1], a[ 1])
+		+ ((MUL15(a[ 0], a[ 2])) << 1);
+	t[ 3] = ((MUL15(a[ 0], a[ 3])
+		+ MUL15(a[ 1], a[ 2])) << 1);
+	t[ 4] = MUL15(a[ 2], a[ 2])
+		+ ((MUL15(a[ 0], a[ 4])
+		+ MUL15(a[ 1], a[ 3])) << 1);
+	t[ 5] = ((MUL15(a[ 0], a[ 5])
+		+ MUL15(a[ 1], a[ 4])
+		+ MUL15(a[ 2], a[ 3])) << 1);
+	t[ 6] = MUL15(a[ 3], a[ 3])
+		+ ((MUL15(a[ 0], a[ 6])
+		+ MUL15(a[ 1], a[ 5])
+		+ MUL15(a[ 2], a[ 4])) << 1);
+	t[ 7] = ((MUL15(a[ 0], a[ 7])
+		+ MUL15(a[ 1], a[ 6])
+		+ MUL15(a[ 2], a[ 5])
+		+ MUL15(a[ 3], a[ 4])) << 1);
+	t[ 8] = MUL15(a[ 4], a[ 4])
+		+ ((MUL15(a[ 0], a[ 8])
+		+ MUL15(a[ 1], a[ 7])
+		+ MUL15(a[ 2], a[ 6])
+		+ MUL15(a[ 3], a[ 5])) << 1);
+	t[ 9] = ((MUL15(a[ 0], a[ 9])
+		+ MUL15(a[ 1], a[ 8])
+		+ MUL15(a[ 2], a[ 7])
+		+ MUL15(a[ 3], a[ 6])
+		+ MUL15(a[ 4], a[ 5])) << 1);
+	t[10] = MUL15(a[ 5], a[ 5])
+		+ ((MUL15(a[ 0], a[10])
+		+ MUL15(a[ 1], a[ 9])
+		+ MUL15(a[ 2], a[ 8])
+		+ MUL15(a[ 3], a[ 7])
+		+ MUL15(a[ 4], a[ 6])) << 1);
+	t[11] = ((MUL15(a[ 0], a[11])
+		+ MUL15(a[ 1], a[10])
+		+ MUL15(a[ 2], a[ 9])
+		+ MUL15(a[ 3], a[ 8])
+		+ MUL15(a[ 4], a[ 7])
+		+ MUL15(a[ 5], a[ 6])) << 1);
+	t[12] = MUL15(a[ 6], a[ 6])
+		+ ((MUL15(a[ 0], a[12])
+		+ MUL15(a[ 1], a[11])
+		+ MUL15(a[ 2], a[10])
+		+ MUL15(a[ 3], a[ 9])
+		+ MUL15(a[ 4], a[ 8])
+		+ MUL15(a[ 5], a[ 7])) << 1);
+	t[13] = ((MUL15(a[ 0], a[13])
+		+ MUL15(a[ 1], a[12])
+		+ MUL15(a[ 2], a[11])
+		+ MUL15(a[ 3], a[10])
+		+ MUL15(a[ 4], a[ 9])
+		+ MUL15(a[ 5], a[ 8])
+		+ MUL15(a[ 6], a[ 7])) << 1);
+	t[14] = MUL15(a[ 7], a[ 7])
+		+ ((MUL15(a[ 0], a[14])
+		+ MUL15(a[ 1], a[13])
+		+ MUL15(a[ 2], a[12])
+		+ MUL15(a[ 3], a[11])
+		+ MUL15(a[ 4], a[10])
+		+ MUL15(a[ 5], a[ 9])
+		+ MUL15(a[ 6], a[ 8])) << 1);
+	t[15] = ((MUL15(a[ 0], a[15])
+		+ MUL15(a[ 1], a[14])
+		+ MUL15(a[ 2], a[13])
+		+ MUL15(a[ 3], a[12])
+		+ MUL15(a[ 4], a[11])
+		+ MUL15(a[ 5], a[10])
+		+ MUL15(a[ 6], a[ 9])
+		+ MUL15(a[ 7], a[ 8])) << 1);
+	t[16] = MUL15(a[ 8], a[ 8])
+		+ ((MUL15(a[ 0], a[16])
+		+ MUL15(a[ 1], a[15])
+		+ MUL15(a[ 2], a[14])
+		+ MUL15(a[ 3], a[13])
+		+ MUL15(a[ 4], a[12])
+		+ MUL15(a[ 5], a[11])
+		+ MUL15(a[ 6], a[10])
+		+ MUL15(a[ 7], a[ 9])) << 1);
+	t[17] = ((MUL15(a[ 0], a[17])
+		+ MUL15(a[ 1], a[16])
+		+ MUL15(a[ 2], a[15])
+		+ MUL15(a[ 3], a[14])
+		+ MUL15(a[ 4], a[13])
+		+ MUL15(a[ 5], a[12])
+		+ MUL15(a[ 6], a[11])
+		+ MUL15(a[ 7], a[10])
+		+ MUL15(a[ 8], a[ 9])) << 1);
+	t[18] = MUL15(a[ 9], a[ 9])
+		+ ((MUL15(a[ 0], a[18])
+		+ MUL15(a[ 1], a[17])
+		+ MUL15(a[ 2], a[16])
+		+ MUL15(a[ 3], a[15])
+		+ MUL15(a[ 4], a[14])
+		+ MUL15(a[ 5], a[13])
+		+ MUL15(a[ 6], a[12])
+		+ MUL15(a[ 7], a[11])
+		+ MUL15(a[ 8], a[10])) << 1);
+	t[19] = ((MUL15(a[ 0], a[19])
+		+ MUL15(a[ 1], a[18])
+		+ MUL15(a[ 2], a[17])
+		+ MUL15(a[ 3], a[16])
+		+ MUL15(a[ 4], a[15])
+		+ MUL15(a[ 5], a[14])
+		+ MUL15(a[ 6], a[13])
+		+ MUL15(a[ 7], a[12])
+		+ MUL15(a[ 8], a[11])
+		+ MUL15(a[ 9], a[10])) << 1);
+	t[20] = MUL15(a[10], a[10])
+		+ ((MUL15(a[ 1], a[19])
+		+ MUL15(a[ 2], a[18])
+		+ MUL15(a[ 3], a[17])
+		+ MUL15(a[ 4], a[16])
+		+ MUL15(a[ 5], a[15])
+		+ MUL15(a[ 6], a[14])
+		+ MUL15(a[ 7], a[13])
+		+ MUL15(a[ 8], a[12])
+		+ MUL15(a[ 9], a[11])) << 1);
+	t[21] = ((MUL15(a[ 2], a[19])
+		+ MUL15(a[ 3], a[18])
+		+ MUL15(a[ 4], a[17])
+		+ MUL15(a[ 5], a[16])
+		+ MUL15(a[ 6], a[15])
+		+ MUL15(a[ 7], a[14])
+		+ MUL15(a[ 8], a[13])
+		+ MUL15(a[ 9], a[12])
+		+ MUL15(a[10], a[11])) << 1);
+	t[22] = MUL15(a[11], a[11])
+		+ ((MUL15(a[ 3], a[19])
+		+ MUL15(a[ 4], a[18])
+		+ MUL15(a[ 5], a[17])
+		+ MUL15(a[ 6], a[16])
+		+ MUL15(a[ 7], a[15])
+		+ MUL15(a[ 8], a[14])
+		+ MUL15(a[ 9], a[13])
+		+ MUL15(a[10], a[12])) << 1);
+	t[23] = ((MUL15(a[ 4], a[19])
+		+ MUL15(a[ 5], a[18])
+		+ MUL15(a[ 6], a[17])
+		+ MUL15(a[ 7], a[16])
+		+ MUL15(a[ 8], a[15])
+		+ MUL15(a[ 9], a[14])
+		+ MUL15(a[10], a[13])
+		+ MUL15(a[11], a[12])) << 1);
+	t[24] = MUL15(a[12], a[12])
+		+ ((MUL15(a[ 5], a[19])
+		+ MUL15(a[ 6], a[18])
+		+ MUL15(a[ 7], a[17])
+		+ MUL15(a[ 8], a[16])
+		+ MUL15(a[ 9], a[15])
+		+ MUL15(a[10], a[14])
+		+ MUL15(a[11], a[13])) << 1);
+	t[25] = ((MUL15(a[ 6], a[19])
+		+ MUL15(a[ 7], a[18])
+		+ MUL15(a[ 8], a[17])
+		+ MUL15(a[ 9], a[16])
+		+ MUL15(a[10], a[15])
+		+ MUL15(a[11], a[14])
+		+ MUL15(a[12], a[13])) << 1);
+	t[26] = MUL15(a[13], a[13])
+		+ ((MUL15(a[ 7], a[19])
+		+ MUL15(a[ 8], a[18])
+		+ MUL15(a[ 9], a[17])
+		+ MUL15(a[10], a[16])
+		+ MUL15(a[11], a[15])
+		+ MUL15(a[12], a[14])) << 1);
+	t[27] = ((MUL15(a[ 8], a[19])
+		+ MUL15(a[ 9], a[18])
+		+ MUL15(a[10], a[17])
+		+ MUL15(a[11], a[16])
+		+ MUL15(a[12], a[15])
+		+ MUL15(a[13], a[14])) << 1);
+	t[28] = MUL15(a[14], a[14])
+		+ ((MUL15(a[ 9], a[19])
+		+ MUL15(a[10], a[18])
+		+ MUL15(a[11], a[17])
+		+ MUL15(a[12], a[16])
+		+ MUL15(a[13], a[15])) << 1);
+	t[29] = ((MUL15(a[10], a[19])
+		+ MUL15(a[11], a[18])
+		+ MUL15(a[12], a[17])
+		+ MUL15(a[13], a[16])
+		+ MUL15(a[14], a[15])) << 1);
+	t[30] = MUL15(a[15], a[15])
+		+ ((MUL15(a[11], a[19])
+		+ MUL15(a[12], a[18])
+		+ MUL15(a[13], a[17])
+		+ MUL15(a[14], a[16])) << 1);
+	t[31] = ((MUL15(a[12], a[19])
+		+ MUL15(a[13], a[18])
+		+ MUL15(a[14], a[17])
+		+ MUL15(a[15], a[16])) << 1);
+	t[32] = MUL15(a[16], a[16])
+		+ ((MUL15(a[13], a[19])
+		+ MUL15(a[14], a[18])
+		+ MUL15(a[15], a[17])) << 1);
+	t[33] = ((MUL15(a[14], a[19])
+		+ MUL15(a[15], a[18])
+		+ MUL15(a[16], a[17])) << 1);
+	t[34] = MUL15(a[17], a[17])
+		+ ((MUL15(a[15], a[19])
+		+ MUL15(a[16], a[18])) << 1);
+	t[35] = ((MUL15(a[16], a[19])
+		+ MUL15(a[17], a[18])) << 1);
+	t[36] = MUL15(a[18], a[18])
+		+ ((MUL15(a[17], a[19])) << 1);
+	t[37] = ((MUL15(a[18], a[19])) << 1);
+	t[38] = MUL15(a[19], a[19]);
+	d[39] = norm13(d, t, 39);
+}
+
+#endif
+
+/*
+ * Modulus for field F256 (field for point coordinates in curve P-256).
+ */
+static const uint32_t F256[] = {
+	0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x001F,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0400, 0x0000,
+	0x0000, 0x1FF8, 0x1FFF, 0x01FF
+};
+
+/*
+ * The 'b' curve equation coefficient for P-256.
+ */
+static const uint32_t P256_B[] = {
+	0x004B, 0x1E93, 0x0F89, 0x1C78, 0x03BC, 0x187B, 0x114E, 0x1619,
+	0x1D06, 0x0328, 0x01AF, 0x0D31, 0x1557, 0x15DE, 0x1ECF, 0x127C,
+	0x0A3A, 0x0EC5, 0x118D, 0x00B5
+};
+
+/*
+ * Perform a "short reduction" in field F256 (field for curve P-256).
+ * The source value should be less than 262 bits; on output, it will
+ * be at most 257 bits, and less than twice the modulus.
+ */
+static void
+reduce_f256(uint32_t *d)
+{
+	uint32_t x;
+
+	x = d[19] >> 9;
+	d[19] &= 0x01FF;
+	d[17] += x << 3;
+	d[14] -= x << 10;
+	d[7] -= x << 5;
+	d[0] += x;
+	norm13(d, d, 20);
+}
+
+/*
+ * Perform a "final reduction" in field F256 (field for curve P-256).
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f256(uint32_t *d)
+{
+	uint32_t t[20];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 0;
+	for (i = 0; i < 20; i ++) {
+		uint32_t w;
+
+		w = t[i] - F256[i] - cc;
+		cc = w >> 31;
+		t[i] = w & 0x1FFF;
+	}
+	cc ^= 1;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Perform a multiplication of two integers modulo
+ * 2^256-2^224+2^192+2^96-1 (for NIST curve P-256). Operands are arrays
+ * of 20 words, each containing 13 bits of data, in little-endian order.
+ * On input, upper word may be up to 13 bits (hence value up to 2^260-1);
+ * on output, value fits on 257 bits and is lower than twice the modulus.
+ */
+static void
+mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[40], cc;
+	int i;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 13 bits
+	 * each.
+	 */
+	mul20(t, a, b);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 */
+	for (i = 39; i >= 20; i --) {
+		uint32_t x;
+
+		x = t[i];
+		t[i - 2] += ARSH(x, 6);
+		t[i - 3] += (x << 7) & 0x1FFF;
+		t[i - 4] -= ARSH(x, 12);
+		t[i - 5] -= (x << 1) & 0x1FFF;
+		t[i - 12] -= ARSH(x, 4);
+		t[i - 13] -= (x << 9) & 0x1FFF;
+		t[i - 19] += ARSH(x, 9);
+		t[i - 20] += (x << 4) & 0x1FFF;
+	}
+
+	/*
+	 * Propagate carries. This is a signed propagation, and the
+	 * result may be negative. The loop above may enlarge values,
+	 * but not two much: worst case is the chain involving t[i - 3],
+	 * in which a value may be added to itself up to 7 times. Since
+	 * starting values are 13-bit each, all words fit on 20 bits
+	 * (21 to account for the sign bit).
+	 */
+	cc = norm13(t, t, 20);
+
+	/*
+	 * Perform modular reduction again for the bits beyond 256 (the carry
+	 * and the bits 256..259). Since the largest shift below is by 10
+	 * bits, and the values fit on 21 bits, values fit in 32-bit words,
+	 * thereby allowing injecting full word values.
+	 */
+	cc = (cc << 4) | (t[19] >> 9);
+	t[19] &= 0x01FF;
+	t[17] += cc << 3;
+	t[14] -= cc << 10;
+	t[7] -= cc << 5;
+	t[0] += cc;
+
+	/*
+	 * If the carry is negative, then after carry propagation, we may
+	 * end up with a value which is negative, and we don't want that.
+	 * Thus, in that case, we add the modulus. Note that the subtraction
+	 * result, when the carry is negative, is always smaller than the
+	 * modulus, so the extra addition will not make the value exceed
+	 * twice the modulus.
+	 */
+	cc >>= 31;
+	t[0] -= cc;
+	t[7] += cc << 5;
+	t[14] += cc << 10;
+	t[17] -= cc << 3;
+	t[19] += cc << 9;
+
+	norm13(d, t, 20);
+}
+
+/*
+ * Square an integer modulo 2^256-2^224+2^192+2^96-1 (for NIST curve
+ * P-256). Operand is an array of 20 words, each containing 13 bits of
+ * data, in little-endian order. On input, upper word may be up to 13
+ * bits (hence value up to 2^260-1); on output, value fits on 257 bits
+ * and is lower than twice the modulus.
+ */
+static void
+square_f256(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[40], cc;
+	int i;
+
+	/*
+	 * Compute raw square. All result words fit in 13 bits each.
+	 */
+	square20(t, a);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 */
+	for (i = 39; i >= 20; i --) {
+		uint32_t x;
+
+		x = t[i];
+		t[i - 2] += ARSH(x, 6);
+		t[i - 3] += (x << 7) & 0x1FFF;
+		t[i - 4] -= ARSH(x, 12);
+		t[i - 5] -= (x << 1) & 0x1FFF;
+		t[i - 12] -= ARSH(x, 4);
+		t[i - 13] -= (x << 9) & 0x1FFF;
+		t[i - 19] += ARSH(x, 9);
+		t[i - 20] += (x << 4) & 0x1FFF;
+	}
+
+	/*
+	 * Propagate carries. This is a signed propagation, and the
+	 * result may be negative. The loop above may enlarge values,
+	 * but not two much: worst case is the chain involving t[i - 3],
+	 * in which a value may be added to itself up to 7 times. Since
+	 * starting values are 13-bit each, all words fit on 20 bits
+	 * (21 to account for the sign bit).
+	 */
+	cc = norm13(t, t, 20);
+
+	/*
+	 * Perform modular reduction again for the bits beyond 256 (the carry
+	 * and the bits 256..259). Since the largest shift below is by 10
+	 * bits, and the values fit on 21 bits, values fit in 32-bit words,
+	 * thereby allowing injecting full word values.
+	 */
+	cc = (cc << 4) | (t[19] >> 9);
+	t[19] &= 0x01FF;
+	t[17] += cc << 3;
+	t[14] -= cc << 10;
+	t[7] -= cc << 5;
+	t[0] += cc;
+
+	/*
+	 * If the carry is negative, then after carry propagation, we may
+	 * end up with a value which is negative, and we don't want that.
+	 * Thus, in that case, we add the modulus. Note that the subtraction
+	 * result, when the carry is negative, is always smaller than the
+	 * modulus, so the extra addition will not make the value exceed
+	 * twice the modulus.
+	 */
+	cc >>= 31;
+	t[0] -= cc;
+	t[7] += cc << 5;
+	t[14] += cc << 10;
+	t[17] -= cc << 3;
+	t[19] += cc << 9;
+
+	norm13(d, t, 20);
+}
+
+/*
+ * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
+ * are such that:
+ *   X = x / z^2
+ *   Y = y / z^3
+ * For the point at infinity, z = 0.
+ * Each point thus admits many possible representations.
+ *
+ * Coordinates are represented in arrays of 32-bit integers, each holding
+ * 13 bits of data. Values may also be slightly greater than the modulus,
+ * but they will always be lower than twice the modulus.
+ */
+typedef struct {
+	uint32_t x[20];
+	uint32_t y[20];
+	uint32_t z[20];
+} p256_jacobian;
+
+/*
+ * Convert a point to affine coordinates:
+ *  - If the point is the point at infinity, then all three coordinates
+ *    are set to 0.
+ *  - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
+ *    coordinates are the 'X' and 'Y' affine coordinates.
+ * The coordinates are guaranteed to be lower than the modulus.
+ */
+static void
+p256_to_affine(p256_jacobian *P)
+{
+	uint32_t t1[20], t2[20];
+	int i;
+
+	/*
+	 * Invert z with a modular exponentiation: the modulus is
+	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is
+	 * p-2. Exponent bit pattern (from high to low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * Thus, we precompute z^(2^31-1) to speed things up.
+	 *
+	 * If z = 0 (point at infinity) then the modular exponentiation
+	 * will yield 0, which leads to the expected result (all three
+	 * coordinates set to 0).
+	 */
+
+	/*
+	 * A simple square-and-multiply for z^(2^31-1). We could save about
+	 * two dozen multiplications here with an addition chain, but
+	 * this would require a bit more code, and extra stack buffers.
+	 */
+	memcpy(t1, P->z, sizeof P->z);
+	for (i = 0; i < 30; i ++) {
+		square_f256(t1, t1);
+		mul_f256(t1, t1, P->z);
+	}
+
+	/*
+	 * Square-and-multiply. Apart from the squarings, we have a few
+	 * multiplications to set bits to 1; we multiply by the original z
+	 * for setting 1 bit, and by t1 for setting 31 bits.
+	 */
+	memcpy(t2, P->z, sizeof P->z);
+	for (i = 1; i < 256; i ++) {
+		square_f256(t2, t2);
+		switch (i) {
+		case 31:
+		case 190:
+		case 221:
+		case 252:
+			mul_f256(t2, t2, t1);
+			break;
+		case 63:
+		case 253:
+		case 255:
+			mul_f256(t2, t2, P->z);
+			break;
+		}
+	}
+
+	/*
+	 * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3.
+	 */
+	mul_f256(t1, t2, t2);
+	mul_f256(P->x, t1, P->x);
+	mul_f256(t1, t1, t2);
+	mul_f256(P->y, t1, P->y);
+	reduce_final_f256(P->x);
+	reduce_final_f256(P->y);
+
+	/*
+	 * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise
+	 * this will set z to 1.
+	 */
+	mul_f256(P->z, P->z, t2);
+	reduce_final_f256(P->z);
+}
+
+/*
+ * Double a point in P-256. This function works for all valid points,
+ * including the point at infinity.
+ */
+static void
+p256_double(p256_jacobian *Q)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint32_t t1[20], t2[20], t3[20], t4[20];
+	int i;
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	square_f256(t1, Q->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] = (F256[i] << 1) + Q->x[i] - t1[i];
+		t1[i] += Q->x[i];
+	}
+	norm13(t1, t1, 20);
+	norm13(t2, t2, 20);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	mul_f256(t3, t1, t2);
+	for (i = 0; i < 20; i ++) {
+		t1[i] = MUL15(3, t3[i]);
+	}
+	norm13(t1, t1, 20);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	square_f256(t3, Q->y);
+	for (i = 0; i < 20; i ++) {
+		t3[i] <<= 1;
+	}
+	norm13(t3, t3, 20);
+	mul_f256(t2, Q->x, t3);
+	for (i = 0; i < 20; i ++) {
+		t2[i] <<= 1;
+	}
+	norm13(t2, t2, 20);
+	reduce_f256(t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	square_f256(Q->x, t1);
+	for (i = 0; i < 20; i ++) {
+		Q->x[i] += (F256[i] << 2) - (t2[i] << 1);
+	}
+	norm13(Q->x, Q->x, 20);
+	reduce_f256(Q->x);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	mul_f256(t4, Q->y, Q->z);
+	for (i = 0; i < 20; i ++) {
+		Q->z[i] = t4[i] << 1;
+	}
+	norm13(Q->z, Q->z, 20);
+	reduce_f256(Q->z);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] += (F256[i] << 1) - Q->x[i];
+	}
+	norm13(t2, t2, 20);
+	mul_f256(Q->y, t1, t2);
+	square_f256(t4, t3);
+	for (i = 0; i < 20; i ++) {
+		Q->y[i] += (F256[i] << 2) - (t4[i] << 1);
+	}
+	norm13(Q->y, Q->y, 20);
+	reduce_f256(Q->y);
+}
+
+/*
+ * Add point P2 to point P1.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - P1 == 0 and P2 == 0
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint32_t t1[20], t2[20], t3[20], t4[20], t5[20], t6[20], t7[20];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	square_f256(t3, P2->z);
+	mul_f256(t1, P1->x, t3);
+	mul_f256(t4, P2->z, t3);
+	mul_f256(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] += (F256[i] << 1) - t1[i];
+		t4[i] += (F256[i] << 1) - t3[i];
+	}
+	norm13(t2, t2, 20);
+	norm13(t4, t4, 20);
+	reduce_f256(t4);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 20; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	for (i = 0; i < 20; i ++) {
+		P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1);
+	}
+	norm13(P1->x, P1->x, 20);
+	reduce_f256(P1->x);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t6[i] += (F256[i] << 1) - P1->x[i];
+	}
+	norm13(t6, t6, 20);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	for (i = 0; i < 20; i ++) {
+		P1->y[i] += (F256[i] << 1) - t1[i];
+	}
+	norm13(P1->y, P1->y, 20);
+	reduce_f256(P1->y);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(t1, P1->z, P2->z);
+	mul_f256(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Add point P2 to point P1. This is a specialised function for the
+ * case when P2 is a non-zero point in affine coordinate.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint32_t t1[20], t2[20], t3[20], t4[20], t5[20], t6[20], t7[20];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] += (F256[i] << 1) - t1[i];
+		t4[i] += (F256[i] << 1) - t3[i];
+	}
+	norm13(t2, t2, 20);
+	norm13(t4, t4, 20);
+	reduce_f256(t4);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 20; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	for (i = 0; i < 20; i ++) {
+		P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1);
+	}
+	norm13(P1->x, P1->x, 20);
+	reduce_f256(P1->x);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t6[i] += (F256[i] << 1) - P1->x[i];
+	}
+	norm13(t6, t6, 20);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	for (i = 0; i < 20; i ++) {
+		P1->y[i] += (F256[i] << 1) - t1[i];
+	}
+	norm13(P1->y, P1->y, 20);
+	reduce_f256(P1->y);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+/*
+ * Decode a P-256 point. This function does not support the point at
+ * infinity. Returned value is 0 if the point is invalid, 1 otherwise.
+ */
+static uint32_t
+p256_decode(p256_jacobian *P, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t tx[20], ty[20], t1[20], t2[20];
+	uint32_t bad;
+	int i;
+
+	if (len != 65) {
+		return 0;
+	}
+	buf = src;
+
+	/*
+	 * First byte must be 0x04 (uncompressed format). We could support
+	 * "hybrid format" (first byte is 0x06 or 0x07, and encodes the
+	 * least significant bit of the Y coordinate), but it is explicitly
+	 * forbidden by RFC 5480 (section 2.2).
+	 */
+	bad = NEQ(buf[0], 0x04);
+
+	/*
+	 * Decode the coordinates, and check that they are both lower
+	 * than the modulus.
+	 */
+	tx[19] = be8_to_le13(tx, buf + 1, 32);
+	ty[19] = be8_to_le13(ty, buf + 33, 32);
+	bad |= reduce_final_f256(tx);
+	bad |= reduce_final_f256(ty);
+
+	/*
+	 * Check curve equation.
+	 */
+	square_f256(t1, tx);
+	mul_f256(t1, tx, t1);
+	square_f256(t2, ty);
+	for (i = 0; i < 20; i ++) {
+		t1[i] += (F256[i] << 3) - MUL15(3, tx[i]) + P256_B[i] - t2[i];
+	}
+	norm13(t1, t1, 20);
+	reduce_f256(t1);
+	reduce_final_f256(t1);
+	for (i = 0; i < 20; i ++) {
+		bad |= t1[i];
+	}
+
+	/*
+	 * Copy coordinates to the point structure.
+	 */
+	memcpy(P->x, tx, sizeof tx);
+	memcpy(P->y, ty, sizeof ty);
+	memset(P->z, 0, sizeof P->z);
+	P->z[0] = 1;
+	return EQ(bad, 0);
+}
+
+/*
+ * Encode a point into a buffer. This function assumes that the point is
+ * valid, in affine coordinates, and not the point at infinity.
+ */
+static void
+p256_encode(void *dst, const p256_jacobian *P)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = 0x04;
+	le13_to_be8(buf + 1, 32, P->x);
+	le13_to_be8(buf + 33, 32, P->y);
+}
+
+/*
+ * Multiply a curve point by an integer. The integer is assumed to be
+ * lower than the curve order, and the base point must not be the point
+ * at infinity.
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 2-bit window to handle multiplier bits by pairs.
+	 * The precomputed window really is the points P2 and P3.
+	 */
+	uint32_t qz;
+	p256_jacobian P2, P3, Q, T, U;
+
+	/*
+	 * Compute window values.
+	 */
+	P2 = *P;
+	p256_double(&P2);
+	P3 = *P;
+	p256_add(&P3, &P2);
+
+	/*
+	 * We start with Q = 0. We process multiplier bits 2 by 2.
+	 */
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			T = *P;
+			U = Q;
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			p256_add(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	*P = Q;
+}
+
+/*
+ * Precomputed window: k*G points, where G is the curve generator, and k
+ * is an integer from 1 to 15 (inclusive). The X and Y coordinates of
+ * the point are encoded as 20 words of 13 bits each (little-endian
+ * order); 13-bit words are then grouped 2-by-2 into 32-bit words
+ * (little-endian order within each word).
+ */
+static const uint32_t Gwin[15][20] = {
+
+	{ 0x04C60296, 0x02721176, 0x19D00F4A, 0x102517AC,
+	  0x13B8037D, 0x0748103C, 0x1E730E56, 0x08481FE2,
+	  0x0F97012C, 0x00D605F4, 0x1DFA11F5, 0x0C801A0D,
+	  0x0F670CBB, 0x0AED0CC5, 0x115E0E33, 0x181F0785,
+	  0x13F514A7, 0x0FF30E3B, 0x17171E1A, 0x009F18D0 },
+
+	{ 0x1B341978, 0x16911F11, 0x0D9A1A60, 0x1C4E1FC8,
+	  0x1E040969, 0x096A06B0, 0x091C0030, 0x09EF1A29,
+	  0x18C40D03, 0x00F91C9E, 0x13C313D1, 0x096F0748,
+	  0x011419E0, 0x1CC713A6, 0x1DD31DAD, 0x1EE80C36,
+	  0x1ECD0C69, 0x1A0800A4, 0x08861B8E, 0x000E1DD5 },
+
+	{ 0x173F1D6C, 0x02CC06F1, 0x14C21FB4, 0x043D1EB6,
+	  0x0F3606B7, 0x1A971C59, 0x1BF71951, 0x01481323,
+	  0x068D0633, 0x00BD12F9, 0x13EA1032, 0x136209E8,
+	  0x1C1E19A7, 0x06C7013E, 0x06C10AB0, 0x14C908BB,
+	  0x05830CE1, 0x1FEF18DD, 0x00620998, 0x010E0D19 },
+
+	{ 0x18180852, 0x0604111A, 0x0B771509, 0x1B6F0156,
+	  0x00181FE2, 0x1DCC0AF4, 0x16EF0659, 0x11F70E80,
+	  0x11A912D0, 0x01C414D2, 0x027618C6, 0x05840FC6,
+	  0x100215C4, 0x187E0C3B, 0x12771C96, 0x150C0B5D,
+	  0x0FF705FD, 0x07981C67, 0x1AD20C63, 0x01C11C55 },
+
+	{ 0x1E8113ED, 0x0A940370, 0x12920215, 0x1FA31D6F,
+	  0x1F7C0C82, 0x10CD03F7, 0x02640560, 0x081A0B5E,
+	  0x1BD21151, 0x00A21642, 0x0D0B0DA4, 0x0176113F,
+	  0x04440D1D, 0x001A1360, 0x1068012F, 0x1F141E49,
+	  0x10DF136B, 0x0E4F162B, 0x0D44104A, 0x01C1105F },
+
+	{ 0x011411A9, 0x01551A4F, 0x0ADA0C6B, 0x01BD0EC8,
+	  0x18120C74, 0x112F1778, 0x099202CB, 0x0C05124B,
+	  0x195316A4, 0x01600685, 0x1E3B1FE2, 0x189014E3,
+	  0x0B5E1FD7, 0x0E0311F8, 0x08E000F7, 0x174E00DE,
+	  0x160702DF, 0x1B5A15BF, 0x03A11237, 0x01D01704 },
+
+	{ 0x0C3D12A3, 0x0C501C0C, 0x17AD1300, 0x1715003F,
+	  0x03F719F8, 0x18031ED8, 0x1D980667, 0x0F681896,
+	  0x1B7D00BF, 0x011C14CE, 0x0FA000B4, 0x1C3501B0,
+	  0x0D901C55, 0x06790C10, 0x029E0736, 0x0DEB0400,
+	  0x034F183A, 0x030619B4, 0x0DEF0033, 0x00E71AC7 },
+
+	{ 0x1B7D1393, 0x1B3B1076, 0x0BED1B4D, 0x13011F3A,
+	  0x0E0E1238, 0x156A132B, 0x013A02D3, 0x160A0D01,
+	  0x1CED1EE9, 0x00C5165D, 0x184C157E, 0x08141A83,
+	  0x153C0DA5, 0x1ED70F9D, 0x05170D51, 0x02CF13B8,
+	  0x18AE1771, 0x1B04113F, 0x05EC11E9, 0x015A16B3 },
+
+	{ 0x04A41EE0, 0x1D1412E4, 0x1C591D79, 0x118511B7,
+	  0x14F00ACB, 0x1AE31E1C, 0x049C0D51, 0x016E061E,
+	  0x1DB71EDF, 0x01D41A35, 0x0E8208FA, 0x14441293,
+	  0x011F1E85, 0x1D54137A, 0x026B114F, 0x151D0832,
+	  0x00A50964, 0x1F9C1E1C, 0x064B12C9, 0x005409D1 },
+
+	{ 0x062B123F, 0x0C0D0501, 0x183704C3, 0x08E31120,
+	  0x0A2E0A6C, 0x14440FED, 0x090A0D1E, 0x13271964,
+	  0x0B590A3A, 0x019D1D9B, 0x05780773, 0x09770A91,
+	  0x0F770CA3, 0x053F19D4, 0x02C80DED, 0x1A761304,
+	  0x091E0DD9, 0x15D201B8, 0x151109AA, 0x010F0198 },
+
+	{ 0x05E101D1, 0x072314DD, 0x045F1433, 0x1A041541,
+	  0x10B3142E, 0x01840736, 0x1C1B19DB, 0x098B0418,
+	  0x1DBC083B, 0x007D1444, 0x01511740, 0x11DD1F3A,
+	  0x04ED0E2F, 0x1B4B1A62, 0x10480D04, 0x09E911A2,
+	  0x04211AFA, 0x19140893, 0x04D60CC4, 0x01210648 },
+
+	{ 0x112703C4, 0x018B1BA1, 0x164C1D50, 0x05160BE0,
+	  0x0BCC1830, 0x01CB1554, 0x13291732, 0x1B2B1918,
+	  0x0DED0817, 0x00E80775, 0x0A2401D3, 0x0BFE08B3,
+	  0x0E531199, 0x058616E9, 0x04770B91, 0x110F0C55,
+	  0x19C11554, 0x0BFB1159, 0x03541C38, 0x000E1C2D },
+
+	{ 0x10390C01, 0x02BB0751, 0x0AC5098E, 0x096C17AB,
+	  0x03C90E28, 0x10BD18BF, 0x002E1F2D, 0x092B0986,
+	  0x1BD700AC, 0x002E1F20, 0x1E3D1FD8, 0x077718BB,
+	  0x06F919C4, 0x187407ED, 0x11370E14, 0x081E139C,
+	  0x00481ADB, 0x14AB0289, 0x066A0EBE, 0x00C70ED6 },
+
+	{ 0x0694120B, 0x124E1CC9, 0x0E2F0570, 0x17CF081A,
+	  0x078906AC, 0x066D17CF, 0x1B3207F4, 0x0C5705E9,
+	  0x10001C38, 0x00A919DE, 0x06851375, 0x0F900BD8,
+	  0x080401BA, 0x0EEE0D42, 0x1B8B11EA, 0x0B4519F0,
+	  0x090F18C0, 0x062E1508, 0x0DD909F4, 0x01EB067C },
+
+	{ 0x0CDC1D5F, 0x0D1818F9, 0x07781636, 0x125B18E8,
+	  0x0D7003AF, 0x13110099, 0x1D9B1899, 0x175C1EB7,
+	  0x0E34171A, 0x01E01153, 0x081A0F36, 0x0B391783,
+	  0x1D1F147E, 0x19CE16D7, 0x11511B21, 0x1F2C10F9,
+	  0x12CA0E51, 0x05A31D39, 0x171A192E, 0x016B0E4F }
+};
+
+/*
+ * Lookup one of the Gwin[] values, by index. This is constant-time.
+ */
+static void
+lookup_Gwin(p256_jacobian *T, uint32_t idx)
+{
+	uint32_t xy[20];
+	uint32_t k;
+	size_t u;
+
+	memset(xy, 0, sizeof xy);
+	for (k = 0; k < 15; k ++) {
+		uint32_t m;
+
+		m = -EQ(idx, k + 1);
+		for (u = 0; u < 20; u ++) {
+			xy[u] |= m & Gwin[k][u];
+		}
+	}
+	for (u = 0; u < 10; u ++) {
+		T->x[(u << 1) + 0] = xy[u] & 0xFFFF;
+		T->x[(u << 1) + 1] = xy[u] >> 16;
+		T->y[(u << 1) + 0] = xy[u + 10] & 0xFFFF;
+		T->y[(u << 1) + 1] = xy[u + 10] >> 16;
+	}
+	memset(T->z, 0, sizeof T->z);
+	T->z[0] = 1;
+}
+
+/*
+ * Multiply the generator by an integer. The integer is assumed non-zero
+ * and lower than the curve order.
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 4-bit window to handle multiplier bits by groups
+	 * of 4. The precomputed window is constant static data, with
+	 * points in affine coordinates; we use a constant-time lookup.
+	 */
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+		unsigned bx;
+
+		bx = *x ++;
+		for (k = 0; k < 2; k ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_jacobian T, U;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bx >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+			lookup_Gwin(&T, bits);
+			U = Q;
+			p256_add_mixed(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bx <<= 4;
+		}
+	}
+	*P = Q;
+}
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, G, Glen);
+	p256_mul(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	p256_jacobian P, Q;
+	uint32_t r, t, z;
+	int i;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, A, len);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= p256_decode(&Q, B, len);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	reduce_final_f256(P.z);
+	z = 0;
+	for (i = 0; i < 20; i ++) {
+		z |= P.z[i];
+	}
+	z = EQ(z, 0);
+	p256_double(&Q);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	p256_to_affine(&P);
+	p256_encode(A, &P);
+	r &= ~(z & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m15 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_p256_m31.c b/third_party/bearssl/src/ec_p256_m31.c
new file mode 100644
index 0000000..b185937
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m31.c
@@ -0,0 +1,1469 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)    (((uint32_t)(x) >> (n)) \
+                      | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#define ARSHW(x, n)   (((uint64_t)(x) >> (n)) \
+                      | ((-((uint64_t)(x) >> 63)) << (64 - (n))))
+#else
+#define ARSH(x, n)    ((*(int32_t *)&(x)) >> (n))
+#define ARSHW(x, n)   ((*(int64_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned big-endian encoding to a sequence of
+ * 30-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+be8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = src[len];
+		if (acc_len < 22) {
+			acc |= b << acc_len;
+			acc_len += 8;
+		} else {
+			*dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
+			acc = b >> (30 - acc_len);
+			acc_len -= 22;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (30-bit words, little-endian) to unsigned
+ * big-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le30_to_be8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			uint32_t w;
+
+			w = *src ++;
+			dst[len] = (unsigned char)(acc | (w << acc_len));
+			acc = w >> (8 - acc_len);
+			acc_len += 22;
+		} else {
+			dst[len] = (unsigned char)acc;
+			acc >>= 8;
+			acc_len -= 8;
+		}
+	}
+}
+
+/*
+ * Multiply two integers. Source integers are represented as arrays of
+ * nine 30-bit words, for values up to 2^270-1. Result is encoded over
+ * 18 words of 30 bits each.
+ */
+static void
+mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Maximum intermediate result is no more than
+	 * 10376293531797946367, which fits in 64 bits. Reason:
+	 *
+	 *   10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
+	 *   10376293531797946367 < 9663676407 * 2^30
+	 *
+	 * Thus, adding together 9 products of 30-bit integers, with
+	 * a carry of at most 9663676406, yields an integer that fits
+	 * on 64 bits and generates a carry of at most 9663676406.
+	 */
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], b[0]);
+	t[ 1] = MUL31(a[0], b[1])
+		+ MUL31(a[1], b[0]);
+	t[ 2] = MUL31(a[0], b[2])
+		+ MUL31(a[1], b[1])
+		+ MUL31(a[2], b[0]);
+	t[ 3] = MUL31(a[0], b[3])
+		+ MUL31(a[1], b[2])
+		+ MUL31(a[2], b[1])
+		+ MUL31(a[3], b[0]);
+	t[ 4] = MUL31(a[0], b[4])
+		+ MUL31(a[1], b[3])
+		+ MUL31(a[2], b[2])
+		+ MUL31(a[3], b[1])
+		+ MUL31(a[4], b[0]);
+	t[ 5] = MUL31(a[0], b[5])
+		+ MUL31(a[1], b[4])
+		+ MUL31(a[2], b[3])
+		+ MUL31(a[3], b[2])
+		+ MUL31(a[4], b[1])
+		+ MUL31(a[5], b[0]);
+	t[ 6] = MUL31(a[0], b[6])
+		+ MUL31(a[1], b[5])
+		+ MUL31(a[2], b[4])
+		+ MUL31(a[3], b[3])
+		+ MUL31(a[4], b[2])
+		+ MUL31(a[5], b[1])
+		+ MUL31(a[6], b[0]);
+	t[ 7] = MUL31(a[0], b[7])
+		+ MUL31(a[1], b[6])
+		+ MUL31(a[2], b[5])
+		+ MUL31(a[3], b[4])
+		+ MUL31(a[4], b[3])
+		+ MUL31(a[5], b[2])
+		+ MUL31(a[6], b[1])
+		+ MUL31(a[7], b[0]);
+	t[ 8] = MUL31(a[0], b[8])
+		+ MUL31(a[1], b[7])
+		+ MUL31(a[2], b[6])
+		+ MUL31(a[3], b[5])
+		+ MUL31(a[4], b[4])
+		+ MUL31(a[5], b[3])
+		+ MUL31(a[6], b[2])
+		+ MUL31(a[7], b[1])
+		+ MUL31(a[8], b[0]);
+	t[ 9] = MUL31(a[1], b[8])
+		+ MUL31(a[2], b[7])
+		+ MUL31(a[3], b[6])
+		+ MUL31(a[4], b[5])
+		+ MUL31(a[5], b[4])
+		+ MUL31(a[6], b[3])
+		+ MUL31(a[7], b[2])
+		+ MUL31(a[8], b[1]);
+	t[10] = MUL31(a[2], b[8])
+		+ MUL31(a[3], b[7])
+		+ MUL31(a[4], b[6])
+		+ MUL31(a[5], b[5])
+		+ MUL31(a[6], b[4])
+		+ MUL31(a[7], b[3])
+		+ MUL31(a[8], b[2]);
+	t[11] = MUL31(a[3], b[8])
+		+ MUL31(a[4], b[7])
+		+ MUL31(a[5], b[6])
+		+ MUL31(a[6], b[5])
+		+ MUL31(a[7], b[4])
+		+ MUL31(a[8], b[3]);
+	t[12] = MUL31(a[4], b[8])
+		+ MUL31(a[5], b[7])
+		+ MUL31(a[6], b[6])
+		+ MUL31(a[7], b[5])
+		+ MUL31(a[8], b[4]);
+	t[13] = MUL31(a[5], b[8])
+		+ MUL31(a[6], b[7])
+		+ MUL31(a[7], b[6])
+		+ MUL31(a[8], b[5]);
+	t[14] = MUL31(a[6], b[8])
+		+ MUL31(a[7], b[7])
+		+ MUL31(a[8], b[6]);
+	t[15] = MUL31(a[7], b[8])
+		+ MUL31(a[8], b[7]);
+	t[16] = MUL31(a[8], b[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Square a 270-bit integer, represented as an array of nine 30-bit words.
+ * Result uses 18 words of 30 bits each.
+ */
+static void
+square9(uint32_t *d, const uint32_t *a)
+{
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], a[0]);
+	t[ 1] = ((MUL31(a[0], a[1])) << 1);
+	t[ 2] = MUL31(a[1], a[1])
+		+ ((MUL31(a[0], a[2])) << 1);
+	t[ 3] = ((MUL31(a[0], a[3])
+		+ MUL31(a[1], a[2])) << 1);
+	t[ 4] = MUL31(a[2], a[2])
+		+ ((MUL31(a[0], a[4])
+		+ MUL31(a[1], a[3])) << 1);
+	t[ 5] = ((MUL31(a[0], a[5])
+		+ MUL31(a[1], a[4])
+		+ MUL31(a[2], a[3])) << 1);
+	t[ 6] = MUL31(a[3], a[3])
+		+ ((MUL31(a[0], a[6])
+		+ MUL31(a[1], a[5])
+		+ MUL31(a[2], a[4])) << 1);
+	t[ 7] = ((MUL31(a[0], a[7])
+		+ MUL31(a[1], a[6])
+		+ MUL31(a[2], a[5])
+		+ MUL31(a[3], a[4])) << 1);
+	t[ 8] = MUL31(a[4], a[4])
+		+ ((MUL31(a[0], a[8])
+		+ MUL31(a[1], a[7])
+		+ MUL31(a[2], a[6])
+		+ MUL31(a[3], a[5])) << 1);
+	t[ 9] = ((MUL31(a[1], a[8])
+		+ MUL31(a[2], a[7])
+		+ MUL31(a[3], a[6])
+		+ MUL31(a[4], a[5])) << 1);
+	t[10] = MUL31(a[5], a[5])
+		+ ((MUL31(a[2], a[8])
+		+ MUL31(a[3], a[7])
+		+ MUL31(a[4], a[6])) << 1);
+	t[11] = ((MUL31(a[3], a[8])
+		+ MUL31(a[4], a[7])
+		+ MUL31(a[5], a[6])) << 1);
+	t[12] = MUL31(a[6], a[6])
+		+ ((MUL31(a[4], a[8])
+		+ MUL31(a[5], a[7])) << 1);
+	t[13] = ((MUL31(a[5], a[8])
+		+ MUL31(a[6], a[7])) << 1);
+	t[14] = MUL31(a[7], a[7])
+		+ ((MUL31(a[6], a[8])) << 1);
+	t[15] = ((MUL31(a[7], a[8])) << 1);
+	t[16] = MUL31(a[8], a[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Base field modulus for P-256.
+ */
+static const uint32_t F256[] = {
+
+	0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000,
+	0x00000000, 0x00001000, 0x3FFFC000, 0x0000FFFF
+};
+
+/*
+ * The 'b' curve equation coefficient for P-256.
+ */
+static const uint32_t P256_B[] = {
+
+	0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65,
+	0x2EF555DA, 0x293E7B3E, 0x0D762A8E, 0x00005AC6
+};
+
+/*
+ * Addition in the field. Source operands shall fit on 257 bits; output
+ * will be lower than twice the modulus.
+ */
+static void
+add_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t w, cc;
+	int i;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	w >>= 16;
+	d[8] &= 0xFFFF;
+	d[3] -= w << 6;
+	d[6] -= w << 12;
+	d[7] += w << 14;
+	cc = w;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+}
+
+/*
+ * Subtraction in the field. Source operands shall be smaller than twice
+ * the modulus; the result will fulfil the same property.
+ */
+static void
+sub_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t w, cc;
+	int i;
+
+	/*
+	 * We really compute a - b + 2*p to make sure that the result is
+	 * positive.
+	 */
+	w = a[0] - b[0] - 0x00002;
+	d[0] = w & 0x3FFFFFFF;
+	w = a[1] - b[1] + ARSH(w, 30);
+	d[1] = w & 0x3FFFFFFF;
+	w = a[2] - b[2] + ARSH(w, 30);
+	d[2] = w & 0x3FFFFFFF;
+	w = a[3] - b[3] + ARSH(w, 30) + 0x00080;
+	d[3] = w & 0x3FFFFFFF;
+	w = a[4] - b[4] + ARSH(w, 30);
+	d[4] = w & 0x3FFFFFFF;
+	w = a[5] - b[5] + ARSH(w, 30);
+	d[5] = w & 0x3FFFFFFF;
+	w = a[6] - b[6] + ARSH(w, 30) + 0x02000;
+	d[6] = w & 0x3FFFFFFF;
+	w = a[7] - b[7] + ARSH(w, 30) - 0x08000;
+	d[7] = w & 0x3FFFFFFF;
+	w = a[8] - b[8] + ARSH(w, 30) + 0x20000;
+	d[8] = w & 0xFFFF;
+	w >>= 16;
+	d[8] &= 0xFFFF;
+	d[3] -= w << 6;
+	d[6] -= w << 12;
+	d[7] += w << 14;
+	cc = w;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+}
+
+/*
+ * Compute a multiplication in F256. Source operands shall be less than
+ * twice the modulus.
+ */
+static void
+mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[18];
+	uint64_t s[18];
+	uint64_t cc, x;
+	uint32_t z, c;
+	int i;
+
+	mul9(t, a, b);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 *
+	 * We use 64-bit intermediate words to allow for carries to
+	 * accumulate easily, before performing the final propagation.
+	 */
+	for (i = 0; i < 18; i ++) {
+		s[i] = t[i];
+	}
+
+	for (i = 17; i >= 9; i --) {
+		uint64_t y;
+
+		y = s[i];
+		s[i - 1] += ARSHW(y, 2);
+		s[i - 2] += (y << 28) & 0x3FFFFFFF;
+		s[i - 2] -= ARSHW(y, 4);
+		s[i - 3] -= (y << 26) & 0x3FFFFFFF;
+		s[i - 5] -= ARSHW(y, 10);
+		s[i - 6] -= (y << 20) & 0x3FFFFFFF;
+		s[i - 8] += ARSHW(y, 16);
+		s[i - 9] += (y << 14) & 0x3FFFFFFF;
+	}
+
+	/*
+	 * Carry propagation must be signed. Moreover, we may have overdone
+	 * it a bit, and obtain a negative result.
+	 *
+	 * The loop above ran 9 times; each time, each word was augmented
+	 * by at most one extra word (in absolute value). Thus, the top
+	 * word must in fine fit in 39 bits, so the carry below will fit
+	 * on 9 bits.
+	 */
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		x = s[i] + cc;
+		d[i] = (uint32_t)x & 0x3FFFFFFF;
+		cc = ARSHW(x, 30);
+	}
+
+	/*
+	 * All nine words fit on 30 bits, but there may be an extra
+	 * carry for a few bits (at most 9), and that carry may be
+	 * negative. Moreover, we want the result to fit on 257 bits.
+	 * The two lines below ensure that the word in d[] has length
+	 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The
+	 * significant length of cc is less than 24 bits, so we will be
+	 * able to switch to 32-bit operations.
+	 */
+	cc = ARSHW(x, 16);
+	d[8] &= 0xFFFF;
+
+	/*
+	 * One extra round of reduction, for cc*2^256, which means
+	 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)
+	 * value. If cc is negative, then it may happen (rarely, but
+	 * not neglectibly so) that the result would be negative. In
+	 * order to avoid that, if cc is negative, then we add the
+	 * modulus once. Note that if cc is negative, then propagating
+	 * that carry must yield a value lower than the modulus, so
+	 * adding the modulus once will keep the final result under
+	 * twice the modulus.
+	 */
+	z = (uint32_t)cc;
+	d[3] -= z << 6;
+	d[6] -= (z << 12) & 0x3FFFFFFF;
+	d[7] -= ARSH(z, 18);
+	d[7] += (z << 14) & 0x3FFFFFFF;
+	d[8] += ARSH(z, 16);
+	c = z >> 31;
+	d[0] -= c;
+	d[3] += c << 6;
+	d[6] += c << 12;
+	d[7] -= c << 14;
+	d[8] += c << 16;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = d[i] + z;
+		d[i] = w & 0x3FFFFFFF;
+		z = ARSH(w, 30);
+	}
+}
+
+/*
+ * Compute a square in F256. Source operand shall be less than
+ * twice the modulus.
+ */
+static void
+square_f256(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[18];
+	uint64_t s[18];
+	uint64_t cc, x;
+	uint32_t z, c;
+	int i;
+
+	square9(t, a);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 *
+	 * We use 64-bit intermediate words to allow for carries to
+	 * accumulate easily, before performing the final propagation.
+	 */
+	for (i = 0; i < 18; i ++) {
+		s[i] = t[i];
+	}
+
+	for (i = 17; i >= 9; i --) {
+		uint64_t y;
+
+		y = s[i];
+		s[i - 1] += ARSHW(y, 2);
+		s[i - 2] += (y << 28) & 0x3FFFFFFF;
+		s[i - 2] -= ARSHW(y, 4);
+		s[i - 3] -= (y << 26) & 0x3FFFFFFF;
+		s[i - 5] -= ARSHW(y, 10);
+		s[i - 6] -= (y << 20) & 0x3FFFFFFF;
+		s[i - 8] += ARSHW(y, 16);
+		s[i - 9] += (y << 14) & 0x3FFFFFFF;
+	}
+
+	/*
+	 * Carry propagation must be signed. Moreover, we may have overdone
+	 * it a bit, and obtain a negative result.
+	 *
+	 * The loop above ran 9 times; each time, each word was augmented
+	 * by at most one extra word (in absolute value). Thus, the top
+	 * word must in fine fit in 39 bits, so the carry below will fit
+	 * on 9 bits.
+	 */
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		x = s[i] + cc;
+		d[i] = (uint32_t)x & 0x3FFFFFFF;
+		cc = ARSHW(x, 30);
+	}
+
+	/*
+	 * All nine words fit on 30 bits, but there may be an extra
+	 * carry for a few bits (at most 9), and that carry may be
+	 * negative. Moreover, we want the result to fit on 257 bits.
+	 * The two lines below ensure that the word in d[] has length
+	 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The
+	 * significant length of cc is less than 24 bits, so we will be
+	 * able to switch to 32-bit operations.
+	 */
+	cc = ARSHW(x, 16);
+	d[8] &= 0xFFFF;
+
+	/*
+	 * One extra round of reduction, for cc*2^256, which means
+	 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)
+	 * value. If cc is negative, then it may happen (rarely, but
+	 * not neglectibly so) that the result would be negative. In
+	 * order to avoid that, if cc is negative, then we add the
+	 * modulus once. Note that if cc is negative, then propagating
+	 * that carry must yield a value lower than the modulus, so
+	 * adding the modulus once will keep the final result under
+	 * twice the modulus.
+	 */
+	z = (uint32_t)cc;
+	d[3] -= z << 6;
+	d[6] -= (z << 12) & 0x3FFFFFFF;
+	d[7] -= ARSH(z, 18);
+	d[7] += (z << 14) & 0x3FFFFFFF;
+	d[8] += ARSH(z, 16);
+	c = z >> 31;
+	d[0] -= c;
+	d[3] += c << 6;
+	d[6] += c << 12;
+	d[7] -= c << 14;
+	d[8] += c << 16;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = d[i] + z;
+		d[i] = w & 0x3FFFFFFF;
+		z = ARSH(w, 30);
+	}
+}
+
+/*
+ * Perform a "final reduction" in field F256 (field for curve P-256).
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f256(uint32_t *d)
+{
+	uint32_t t[9];
+	uint32_t cc;
+	int i;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = d[i] - F256[i] - cc;
+		cc = w >> 31;
+		t[i] = w & 0x3FFFFFFF;
+	}
+	cc ^= 1;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
+ * are such that:
+ *   X = x / z^2
+ *   Y = y / z^3
+ * For the point at infinity, z = 0.
+ * Each point thus admits many possible representations.
+ *
+ * Coordinates are represented in arrays of 32-bit integers, each holding
+ * 30 bits of data. Values may also be slightly greater than the modulus,
+ * but they will always be lower than twice the modulus.
+ */
+typedef struct {
+	uint32_t x[9];
+	uint32_t y[9];
+	uint32_t z[9];
+} p256_jacobian;
+
+/*
+ * Convert a point to affine coordinates:
+ *  - If the point is the point at infinity, then all three coordinates
+ *    are set to 0.
+ *  - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
+ *    coordinates are the 'X' and 'Y' affine coordinates.
+ * The coordinates are guaranteed to be lower than the modulus.
+ */
+static void
+p256_to_affine(p256_jacobian *P)
+{
+	uint32_t t1[9], t2[9];
+	int i;
+
+	/*
+	 * Invert z with a modular exponentiation: the modulus is
+	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is
+	 * p-2. Exponent bit pattern (from high to low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * Thus, we precompute z^(2^31-1) to speed things up.
+	 *
+	 * If z = 0 (point at infinity) then the modular exponentiation
+	 * will yield 0, which leads to the expected result (all three
+	 * coordinates set to 0).
+	 */
+
+	/*
+	 * A simple square-and-multiply for z^(2^31-1). We could save about
+	 * two dozen multiplications here with an addition chain, but
+	 * this would require a bit more code, and extra stack buffers.
+	 */
+	memcpy(t1, P->z, sizeof P->z);
+	for (i = 0; i < 30; i ++) {
+		square_f256(t1, t1);
+		mul_f256(t1, t1, P->z);
+	}
+
+	/*
+	 * Square-and-multiply. Apart from the squarings, we have a few
+	 * multiplications to set bits to 1; we multiply by the original z
+	 * for setting 1 bit, and by t1 for setting 31 bits.
+	 */
+	memcpy(t2, P->z, sizeof P->z);
+	for (i = 1; i < 256; i ++) {
+		square_f256(t2, t2);
+		switch (i) {
+		case 31:
+		case 190:
+		case 221:
+		case 252:
+			mul_f256(t2, t2, t1);
+			break;
+		case 63:
+		case 253:
+		case 255:
+			mul_f256(t2, t2, P->z);
+			break;
+		}
+	}
+
+	/*
+	 * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3.
+	 */
+	mul_f256(t1, t2, t2);
+	mul_f256(P->x, t1, P->x);
+	mul_f256(t1, t1, t2);
+	mul_f256(P->y, t1, P->y);
+	reduce_final_f256(P->x);
+	reduce_final_f256(P->y);
+
+	/*
+	 * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise
+	 * this will set z to 1.
+	 */
+	mul_f256(P->z, P->z, t2);
+	reduce_final_f256(P->z);
+}
+
+/*
+ * Double a point in P-256. This function works for all valid points,
+ * including the point at infinity.
+ */
+static void
+p256_double(p256_jacobian *Q)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint32_t t1[9], t2[9], t3[9], t4[9];
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	square_f256(t1, Q->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	add_f256(t2, Q->x, t1);
+	sub_f256(t1, Q->x, t1);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	mul_f256(t3, t1, t2);
+	add_f256(t1, t3, t3);
+	add_f256(t1, t3, t1);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	square_f256(t3, Q->y);
+	add_f256(t3, t3, t3);
+	mul_f256(t2, Q->x, t3);
+	add_f256(t2, t2, t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	square_f256(Q->x, t1);
+	sub_f256(Q->x, Q->x, t2);
+	sub_f256(Q->x, Q->x, t2);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	mul_f256(t4, Q->y, Q->z);
+	add_f256(Q->z, t4, t4);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	sub_f256(t2, t2, Q->x);
+	mul_f256(Q->y, t1, t2);
+	square_f256(t4, t3);
+	add_f256(t4, t4, t4);
+	sub_f256(Q->y, Q->y, t4);
+}
+
+/*
+ * Add point P2 to point P1.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - P1 == 0 and P2 == 0
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	square_f256(t3, P2->z);
+	mul_f256(t1, P1->x, t3);
+	mul_f256(t4, P2->z, t3);
+	mul_f256(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	sub_f256(t2, t2, t1);
+	sub_f256(t4, t4, t3);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 9; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	sub_f256(P1->x, P1->x, t5);
+	sub_f256(P1->x, P1->x, t6);
+	sub_f256(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	sub_f256(t6, t6, P1->x);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	sub_f256(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(t1, P1->z, P2->z);
+	mul_f256(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Add point P2 to point P1. This is a specialised function for the
+ * case when P2 is a non-zero point in affine coordinate.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	sub_f256(t2, t2, t1);
+	sub_f256(t4, t4, t3);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 9; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	sub_f256(P1->x, P1->x, t5);
+	sub_f256(P1->x, P1->x, t6);
+	sub_f256(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	sub_f256(t6, t6, P1->x);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	sub_f256(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+/*
+ * Decode a P-256 point. This function does not support the point at
+ * infinity. Returned value is 0 if the point is invalid, 1 otherwise.
+ */
+static uint32_t
+p256_decode(p256_jacobian *P, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t tx[9], ty[9], t1[9], t2[9];
+	uint32_t bad;
+	int i;
+
+	if (len != 65) {
+		return 0;
+	}
+	buf = src;
+
+	/*
+	 * First byte must be 0x04 (uncompressed format). We could support
+	 * "hybrid format" (first byte is 0x06 or 0x07, and encodes the
+	 * least significant bit of the Y coordinate), but it is explicitly
+	 * forbidden by RFC 5480 (section 2.2).
+	 */
+	bad = NEQ(buf[0], 0x04);
+
+	/*
+	 * Decode the coordinates, and check that they are both lower
+	 * than the modulus.
+	 */
+	tx[8] = be8_to_le30(tx, buf + 1, 32);
+	ty[8] = be8_to_le30(ty, buf + 33, 32);
+	bad |= reduce_final_f256(tx);
+	bad |= reduce_final_f256(ty);
+
+	/*
+	 * Check curve equation.
+	 */
+	square_f256(t1, tx);
+	mul_f256(t1, tx, t1);
+	square_f256(t2, ty);
+	sub_f256(t1, t1, tx);
+	sub_f256(t1, t1, tx);
+	sub_f256(t1, t1, tx);
+	add_f256(t1, t1, P256_B);
+	sub_f256(t1, t1, t2);
+	reduce_final_f256(t1);
+	for (i = 0; i < 9; i ++) {
+		bad |= t1[i];
+	}
+
+	/*
+	 * Copy coordinates to the point structure.
+	 */
+	memcpy(P->x, tx, sizeof tx);
+	memcpy(P->y, ty, sizeof ty);
+	memset(P->z, 0, sizeof P->z);
+	P->z[0] = 1;
+	return EQ(bad, 0);
+}
+
+/*
+ * Encode a point into a buffer. This function assumes that the point is
+ * valid, in affine coordinates, and not the point at infinity.
+ */
+static void
+p256_encode(void *dst, const p256_jacobian *P)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = 0x04;
+	le30_to_be8(buf + 1, 32, P->x);
+	le30_to_be8(buf + 33, 32, P->y);
+}
+
+/*
+ * Multiply a curve point by an integer. The integer is assumed to be
+ * lower than the curve order, and the base point must not be the point
+ * at infinity.
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 2-bit window to handle multiplier bits by pairs.
+	 * The precomputed window really is the points P2 and P3.
+	 */
+	uint32_t qz;
+	p256_jacobian P2, P3, Q, T, U;
+
+	/*
+	 * Compute window values.
+	 */
+	P2 = *P;
+	p256_double(&P2);
+	P3 = *P;
+	p256_add(&P3, &P2);
+
+	/*
+	 * We start with Q = 0. We process multiplier bits 2 by 2.
+	 */
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			T = *P;
+			U = Q;
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			p256_add(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	*P = Q;
+}
+
+/*
+ * Precomputed window: k*G points, where G is the curve generator, and k
+ * is an integer from 1 to 15 (inclusive). The X and Y coordinates of
+ * the point are encoded as 9 words of 30 bits each (little-endian
+ * order).
+ */
+static const uint32_t Gwin[15][18] = {
+
+	{ 0x1898C296, 0x1284E517, 0x1EB33A0F, 0x00DF604B,
+	  0x2440F277, 0x339B958E, 0x04247F8B, 0x347CB84B,
+	  0x00006B17, 0x37BF51F5, 0x2ED901A0, 0x3315ECEC,
+	  0x338CD5DA, 0x0F9E162B, 0x1FAD29F0, 0x27F9B8EE,
+	  0x10B8BF86, 0x00004FE3 },
+
+	{ 0x07669978, 0x182D23F1, 0x3F21B35A, 0x225A789D,
+	  0x351AC3C0, 0x08E00C12, 0x34F7E8A5, 0x1EC62340,
+	  0x00007CF2, 0x227873D1, 0x3812DE74, 0x0E982299,
+	  0x1F6B798F, 0x3430DBBA, 0x366B1A7D, 0x2D040293,
+	  0x154436E3, 0x00000777 },
+
+	{ 0x06E7FD6C, 0x2D05986F, 0x3ADA985F, 0x31ADC87B,
+	  0x0BF165E6, 0x1FBE5475, 0x30A44C8F, 0x3934698C,
+	  0x00005ECB, 0x227D5032, 0x29E6C49E, 0x04FB83D9,
+	  0x0AAC0D8E, 0x24A2ECD8, 0x2C1B3869, 0x0FF7E374,
+	  0x19031266, 0x00008734 },
+
+	{ 0x2B030852, 0x024C0911, 0x05596EF5, 0x07F8B6DE,
+	  0x262BD003, 0x3779967B, 0x08FBBA02, 0x128D4CB4,
+	  0x0000E253, 0x184ED8C6, 0x310B08FC, 0x30EE0055,
+	  0x3F25B0FC, 0x062D764E, 0x3FB97F6A, 0x33CC719D,
+	  0x15D69318, 0x0000E0F1 },
+
+	{ 0x03D033ED, 0x05552837, 0x35BE5242, 0x2320BF47,
+	  0x268FDFEF, 0x13215821, 0x140D2D78, 0x02DE9454,
+	  0x00005159, 0x3DA16DA4, 0x0742ED13, 0x0D80888D,
+	  0x004BC035, 0x0A79260D, 0x06FCDAFE, 0x2727D8AE,
+	  0x1F6A2412, 0x0000E0C1 },
+
+	{ 0x3C2291A9, 0x1AC2ABA4, 0x3B215B4C, 0x131D037A,
+	  0x17DDE302, 0x0C90B2E2, 0x0602C92D, 0x05CA9DA9,
+	  0x0000B01A, 0x0FC77FE2, 0x35F1214E, 0x07E16BDF,
+	  0x003DDC07, 0x2703791C, 0x3038B7EE, 0x3DAD56FE,
+	  0x041D0C8D, 0x0000E85C },
+
+	{ 0x3187B2A3, 0x0018A1C0, 0x00FEF5B3, 0x3E7E2E2A,
+	  0x01FB607E, 0x2CC199F0, 0x37B4625B, 0x0EDBE82F,
+	  0x00008E53, 0x01F400B4, 0x15786A1B, 0x3041B21C,
+	  0x31CD8CF2, 0x35900053, 0x1A7E0E9B, 0x318366D0,
+	  0x076F780C, 0x000073EB },
+
+	{ 0x1B6FB393, 0x13767707, 0x3CE97DBB, 0x348E2603,
+	  0x354CADC1, 0x09D0B4EA, 0x1B053404, 0x1DE76FBA,
+	  0x000062D9, 0x0F09957E, 0x295029A8, 0x3E76A78D,
+	  0x3B547DAE, 0x27CEE0A2, 0x0575DC45, 0x1D8244FF,
+	  0x332F647A, 0x0000AD5A },
+
+	{ 0x10949EE0, 0x1E7A292E, 0x06DF8B3D, 0x02B2E30B,
+	  0x31F8729E, 0x24E35475, 0x30B71878, 0x35EDBFB7,
+	  0x0000EA68, 0x0DD048FA, 0x21688929, 0x0DE823FE,
+	  0x1C53FAA9, 0x0EA0C84D, 0x052A592A, 0x1FCE7870,
+	  0x11325CB2, 0x00002A27 },
+
+	{ 0x04C5723F, 0x30D81A50, 0x048306E4, 0x329B11C7,
+	  0x223FB545, 0x085347A8, 0x2993E591, 0x1B5ACA8E,
+	  0x0000CEF6, 0x04AF0773, 0x28D2EEA9, 0x2751EEEC,
+	  0x037B4A7F, 0x3B4C1059, 0x08F37674, 0x2AE906E1,
+	  0x18A88A6A, 0x00008786 },
+
+	{ 0x34BC21D1, 0x0CCE474D, 0x15048BF4, 0x1D0BB409,
+	  0x021CDA16, 0x20DE76C3, 0x34C59063, 0x04EDE20E,
+	  0x00003ED1, 0x282A3740, 0x0BE3BBF3, 0x29889DAE,
+	  0x03413697, 0x34C68A09, 0x210EBE93, 0x0C8A224C,
+	  0x0826B331, 0x00009099 },
+
+	{ 0x0624E3C4, 0x140317BA, 0x2F82C99D, 0x260C0A2C,
+	  0x25D55179, 0x194DCC83, 0x3D95E462, 0x356F6A05,
+	  0x0000741D, 0x0D4481D3, 0x2657FC8B, 0x1BA5CA71,
+	  0x3AE44B0D, 0x07B1548E, 0x0E0D5522, 0x05FDC567,
+	  0x2D1AA70E, 0x00000770 },
+
+	{ 0x06072C01, 0x23857675, 0x1EAD58A9, 0x0B8A12D9,
+	  0x1EE2FC79, 0x0177CB61, 0x0495A618, 0x20DEB82B,
+	  0x0000177C, 0x2FC7BFD8, 0x310EEF8B, 0x1FB4DF39,
+	  0x3B8530E8, 0x0F4E7226, 0x0246B6D0, 0x2A558A24,
+	  0x163353AF, 0x000063BB },
+
+	{ 0x24D2920B, 0x1C249DCC, 0x2069C5E5, 0x09AB2F9E,
+	  0x36DF3CF1, 0x1991FD0C, 0x062B97A7, 0x1E80070E,
+	  0x000054E7, 0x20D0B375, 0x2E9F20BD, 0x35090081,
+	  0x1C7A9DDC, 0x22E7C371, 0x087E3016, 0x03175421,
+	  0x3C6ECA7D, 0x0000F599 },
+
+	{ 0x259B9D5F, 0x0D9A318F, 0x23A0EF16, 0x00EBE4B7,
+	  0x088265AE, 0x2CDE2666, 0x2BAE7ADF, 0x1371A5C6,
+	  0x0000F045, 0x0D034F36, 0x1F967378, 0x1B5FA3F4,
+	  0x0EC8739D, 0x1643E62A, 0x1653947E, 0x22D1F4E6,
+	  0x0FB8D64B, 0x0000B5B9 }
+};
+
+/*
+ * Lookup one of the Gwin[] values, by index. This is constant-time.
+ */
+static void
+lookup_Gwin(p256_jacobian *T, uint32_t idx)
+{
+	uint32_t xy[18];
+	uint32_t k;
+	size_t u;
+
+	memset(xy, 0, sizeof xy);
+	for (k = 0; k < 15; k ++) {
+		uint32_t m;
+
+		m = -EQ(idx, k + 1);
+		for (u = 0; u < 18; u ++) {
+			xy[u] |= m & Gwin[k][u];
+		}
+	}
+	memcpy(T->x, &xy[0], sizeof T->x);
+	memcpy(T->y, &xy[9], sizeof T->y);
+	memset(T->z, 0, sizeof T->z);
+	T->z[0] = 1;
+}
+
+/*
+ * Multiply the generator by an integer. The integer is assumed non-zero
+ * and lower than the curve order.
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 4-bit window to handle multiplier bits by groups
+	 * of 4. The precomputed window is constant static data, with
+	 * points in affine coordinates; we use a constant-time lookup.
+	 */
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+		unsigned bx;
+
+		bx = *x ++;
+		for (k = 0; k < 2; k ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_jacobian T, U;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bx >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+			lookup_Gwin(&T, bits);
+			U = Q;
+			p256_add_mixed(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bx <<= 4;
+		}
+	}
+	*P = Q;
+}
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, G, Glen);
+	p256_mul(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	p256_jacobian P, Q;
+	uint32_t r, t, z;
+	int i;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, A, len);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= p256_decode(&Q, B, len);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	reduce_final_f256(P.z);
+	z = 0;
+	for (i = 0; i < 9; i ++) {
+		z |= P.z[i];
+	}
+	z = EQ(z, 0);
+	p256_double(&Q);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	p256_to_affine(&P);
+	p256_encode(A, &P);
+	r &= ~(z & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m31 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_p256_m62.c b/third_party/bearssl/src/ec_p256_m62.c
new file mode 100644
index 0000000..a431790
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m62.c
@@ -0,0 +1,1765 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+/*
+ * A field element is encoded as five 64-bit integers, in basis 2^52.
+ * Limbs may occasionally exceed 2^52.
+ *
+ * A _partially reduced_ value is such that the following hold:
+ *   - top limb is less than 2^48 + 2^30
+ *   - the other limbs fit on 53 bits each
+ * In particular, such a value is less than twice the modulus p.
+ */
+
+#define BIT(n)   ((uint64_t)1 << (n))
+#define MASK48   (BIT(48) - BIT(0))
+#define MASK52   (BIT(52) - BIT(0))
+
+/* R = 2^260 mod p */
+static const uint64_t F256_R[] = {
+	0x0000000000010, 0xF000000000000, 0xFFFFFFFFFFFFF,
+	0xFFEFFFFFFFFFF, 0x00000000FFFFF
+};
+
+/* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
+   (Montgomery representation of B). */
+static const uint64_t P256_B_MONTY[] = {
+	0xDF6229C4BDDFD, 0xCA8843090D89C, 0x212ED6ACF005C,
+	0x83415A220ABF7, 0x0C30061DD4874
+};
+
+/*
+ * Addition in the field. Carry propagation is not performed.
+ * On input, limbs may be up to 63 bits each; on output, they will
+ * be up to one bit more than on input.
+ */
+static inline void
+f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	d[0] = a[0] + b[0];
+	d[1] = a[1] + b[1];
+	d[2] = a[2] + b[2];
+	d[3] = a[3] + b[3];
+	d[4] = a[4] + b[4];
+}
+
+/*
+ * Partially reduce the provided value.
+ * Input: limbs can go up to 61 bits each.
+ * Output: partially reduced.
+ */
+static inline void
+f256_partial_reduce(uint64_t *a)
+{
+	uint64_t w, cc, s;
+
+	/*
+	 * Propagate carries.
+	 */
+	w = a[0];
+	a[0] = w & MASK52;
+	cc = w >> 52;
+	w = a[1] + cc;
+	a[1] = w & MASK52;
+	cc = w >> 52;
+	w = a[2] + cc;
+	a[2] = w & MASK52;
+	cc = w >> 52;
+	w = a[3] + cc;
+	a[3] = w & MASK52;
+	cc = w >> 52;
+	a[4] += cc;
+
+	s = a[4] >> 48;             /* s < 2^14 */
+	a[0] += s;                  /* a[0] < 2^52 + 2^14 */
+	w = a[1] - (s << 44);
+	a[1] = w & MASK52;          /* a[1] < 2^52 */
+	cc = -(w >> 52) & 0xFFF;    /* cc < 16 */
+	w = a[2] - cc;
+	a[2] = w & MASK52;          /* a[2] < 2^52 */
+	cc = w >> 63;               /* cc = 0 or 1 */
+	w = a[3] - cc - (s << 36);
+	a[3] = w & MASK52;          /* a[3] < 2^52 */
+	cc = w >> 63;               /* cc = 0 or 1 */
+	w = a[4] & MASK48;
+	a[4] = w + (s << 16) - cc;  /* a[4] < 2^48 + 2^30 */
+}
+
+/*
+ * Subtraction in the field.
+ * Input: limbs must fit on 60 bits each; in particular, the complete
+ * integer will be less than 2^268 + 2^217.
+ * Output: partially reduced.
+ */
+static inline void
+f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	uint64_t t[5], w, s, cc;
+
+	/*
+	 * We compute d = 2^13*p + a - b; this ensures a positive
+	 * intermediate value.
+	 *
+	 * Each individual addition/subtraction may yield a positive or
+	 * negative result; thus, we need to handle a signed carry, thus
+	 * with sign extension. We prefer not to use signed types (int64_t)
+	 * because conversion from unsigned to signed is cumbersome (a
+	 * direct cast with the top bit set is undefined behavior; instead,
+	 * we have to use pointer aliasing, using the guaranteed properties
+	 * of exact-width types, but this requires the compiler to optimize
+	 * away the writes and reads from RAM), and right-shifting a
+	 * signed negative value is implementation-defined. Therefore,
+	 * we use a custom sign extension.
+	 */
+
+	w = a[0] - b[0] - BIT(13);
+	t[0] = w & MASK52;
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	w = a[1] - b[1] + cc;
+	t[1] = w & MASK52;
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	w = a[2] - b[2] + cc;
+	t[2] = (w & MASK52) + BIT(5);
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	w = a[3] - b[3] + cc;
+	t[3] = (w & MASK52) + BIT(49);
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	t[4] = (BIT(61) - BIT(29)) + a[4] - b[4] + cc;
+
+	/*
+	 * Perform partial reduction. Rule is:
+	 *  2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * At that point:
+	 *    0 <= t[0] <= 2^52 - 1
+	 *    0 <= t[1] <= 2^52 - 1
+	 *    2^5 <= t[2] <= 2^52 + 2^5 - 1
+	 *    2^49 <= t[3] <= 2^52 + 2^49 - 1
+	 *    2^59 < t[4] <= 2^61 + 2^60 - 2^29
+	 *
+	 * Thus, the value 's' (t[4] / 2^48) will be necessarily
+	 * greater than 2048, and less than 12288.
+	 */
+	s = t[4] >> 48;
+
+	d[0] = t[0] + s;             /* d[0] <= 2^52 + 12287 */
+	w = t[1] - (s << 44);
+	d[1] = w & MASK52;           /* d[1] <= 2^52 - 1 */
+	cc = -(w >> 52) & 0xFFF;     /* cc <= 48 */
+	w = t[2] - cc;
+	cc = w >> 63;                /* cc = 0 or 1 */
+	d[2] = w + (cc << 52);       /* d[2] <= 2^52 + 31 */
+	w = t[3] - cc - (s << 36);
+	cc = w >> 63;                /* cc = 0 or 1 */
+	d[3] = w + (cc << 52);       /* t[3] <= 2^52 + 2^49 - 1 */
+	d[4] = (t[4] & MASK48) + (s << 16) - cc;  /* d[4] < 2^48 + 2^30 */
+
+	/*
+	 * If s = 0, then none of the limbs is modified, and there cannot
+	 * be an overflow; if s != 0, then (s << 16) > cc, and there is
+	 * no overflow either.
+	 */
+}
+
+/*
+ * Montgomery multiplication in the field.
+ * Input: limbs must fit on 56 bits each.
+ * Output: partially reduced.
+ */
+static void
+f256_montymul(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	int i;
+	uint64_t t[5];
+
+	t[0] = 0;
+	t[1] = 0;
+	t[2] = 0;
+	t[3] = 0;
+	t[4] = 0;
+	for (i = 0; i < 5; i ++) {
+		uint64_t x, f, cc, w, s;
+		unsigned __int128 z;
+
+		/*
+		 * Since limbs of a[] and b[] fit on 56 bits each,
+		 * each individual product fits on 112 bits. Also,
+		 * the factor f fits on 52 bits, so f<<48 fits on
+		 * 112 bits too. This guarantees that carries (cc)
+		 * will fit on 62 bits, thus no overflow.
+		 *
+		 * The operations below compute:
+		 *   t <- (t + x*b + f*p) / 2^64
+		 */
+		x = a[i];
+		z = (unsigned __int128)b[0] * (unsigned __int128)x
+			+ (unsigned __int128)t[0];
+		f = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[1] * (unsigned __int128)x
+			+ (unsigned __int128)t[1] + cc
+			+ ((unsigned __int128)f << 44);
+		t[0] = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[2] * (unsigned __int128)x
+			+ (unsigned __int128)t[2] + cc;
+		t[1] = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[3] * (unsigned __int128)x
+			+ (unsigned __int128)t[3] + cc
+			+ ((unsigned __int128)f << 36);
+		t[2] = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[4] * (unsigned __int128)x
+			+ (unsigned __int128)t[4] + cc
+			+ ((unsigned __int128)f << 48)
+			- ((unsigned __int128)f << 16);
+		t[3] = (uint64_t)z & MASK52;
+		t[4] = (uint64_t)(z >> 52);
+
+		/*
+		 * t[4] may be up to 62 bits here; we need to do a
+		 * partial reduction. Note that limbs t[0] to t[3]
+		 * fit on 52 bits each.
+		 */
+		s = t[4] >> 48;             /* s < 2^14 */
+		t[0] += s;                  /* t[0] < 2^52 + 2^14 */
+		w = t[1] - (s << 44);
+		t[1] = w & MASK52;          /* t[1] < 2^52 */
+		cc = -(w >> 52) & 0xFFF;    /* cc < 16 */
+		w = t[2] - cc;
+		t[2] = w & MASK52;          /* t[2] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[3] - cc - (s << 36);
+		t[3] = w & MASK52;          /* t[3] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[4] & MASK48;
+		t[4] = w + (s << 16) - cc;  /* t[4] < 2^48 + 2^30 */
+
+		/*
+		 * The final t[4] cannot overflow because cc is 0 or 1,
+		 * and cc can be 1 only if s != 0.
+		 */
+	}
+
+	d[0] = t[0];
+	d[1] = t[1];
+	d[2] = t[2];
+	d[3] = t[3];
+	d[4] = t[4];
+
+#elif BR_UMUL128
+
+	int i;
+	uint64_t t[5];
+
+	t[0] = 0;
+	t[1] = 0;
+	t[2] = 0;
+	t[3] = 0;
+	t[4] = 0;
+	for (i = 0; i < 5; i ++) {
+		uint64_t x, f, cc, w, s, zh, zl;
+		unsigned char k;
+
+		/*
+		 * Since limbs of a[] and b[] fit on 56 bits each,
+		 * each individual product fits on 112 bits. Also,
+		 * the factor f fits on 52 bits, so f<<48 fits on
+		 * 112 bits too. This guarantees that carries (cc)
+		 * will fit on 62 bits, thus no overflow.
+		 *
+		 * The operations below compute:
+		 *   t <- (t + x*b + f*p) / 2^64
+		 */
+		x = a[i];
+		zl = _umul128(b[0], x, &zh);
+		k = _addcarry_u64(0, t[0], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		f = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[1], x, &zh);
+		k = _addcarry_u64(0, t[1], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, f << 44, zl, &zl);
+		(void)_addcarry_u64(k, f >> 20, zh, &zh);
+		t[0] = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[2], x, &zh);
+		k = _addcarry_u64(0, t[2], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		t[1] = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[3], x, &zh);
+		k = _addcarry_u64(0, t[3], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, f << 36, zl, &zl);
+		(void)_addcarry_u64(k, f >> 28, zh, &zh);
+		t[2] = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[4], x, &zh);
+		k = _addcarry_u64(0, t[4], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, f << 48, zl, &zl);
+		(void)_addcarry_u64(k, f >> 16, zh, &zh);
+		k = _subborrow_u64(0, zl, f << 16, &zl);
+		(void)_subborrow_u64(k, zh, f >> 48, &zh);
+		t[3] = zl & MASK52;
+		t[4] = (zl >> 52) | (zh << 12);
+
+		/*
+		 * t[4] may be up to 62 bits here; we need to do a
+		 * partial reduction. Note that limbs t[0] to t[3]
+		 * fit on 52 bits each.
+		 */
+		s = t[4] >> 48;             /* s < 2^14 */
+		t[0] += s;                  /* t[0] < 2^52 + 2^14 */
+		w = t[1] - (s << 44);
+		t[1] = w & MASK52;          /* t[1] < 2^52 */
+		cc = -(w >> 52) & 0xFFF;    /* cc < 16 */
+		w = t[2] - cc;
+		t[2] = w & MASK52;          /* t[2] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[3] - cc - (s << 36);
+		t[3] = w & MASK52;          /* t[3] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[4] & MASK48;
+		t[4] = w + (s << 16) - cc;  /* t[4] < 2^48 + 2^30 */
+
+		/*
+		 * The final t[4] cannot overflow because cc is 0 or 1,
+		 * and cc can be 1 only if s != 0.
+		 */
+	}
+
+	d[0] = t[0];
+	d[1] = t[1];
+	d[2] = t[2];
+	d[3] = t[3];
+	d[4] = t[4];
+
+#endif
+}
+
+/*
+ * Montgomery squaring in the field; currently a basic wrapper around
+ * multiplication (inline, should be optimized away).
+ * TODO: see if some extra speed can be gained here.
+ */
+static inline void
+f256_montysquare(uint64_t *d, const uint64_t *a)
+{
+	f256_montymul(d, a, a);
+}
+
+/*
+ * Convert to Montgomery representation.
+ */
+static void
+f256_tomonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * R2 = 2^520 mod p.
+	 * If R = 2^260 mod p, then R2 = R^2 mod p; and the Montgomery
+	 * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the
+	 * conversion to Montgomery representation.
+	 */
+	static const uint64_t R2[] = {
+		0x0000000000300, 0xFFFFFFFF00000, 0xFFFFEFFFFFFFB,
+		0xFDFFFFFFFFFFF, 0x0000004FFFFFF
+	};
+
+	f256_montymul(d, a, R2);
+}
+
+/*
+ * Convert from Montgomery representation.
+ */
+static void
+f256_frommonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * Montgomery multiplication by 1 is division by 2^260 modulo p.
+	 */
+	static const uint64_t one[] = { 1, 0, 0, 0, 0 };
+
+	f256_montymul(d, a, one);
+}
+
+/*
+ * Inversion in the field. If the source value is 0 modulo p, then this
+ * returns 0 or p. This function uses Montgomery representation.
+ */
+static void
+f256_invert(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * We compute a^(p-2) mod p. The exponent pattern (from high to
+	 * low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * To speed up the square-and-multiply algorithm, we precompute
+	 * a^(2^31-1).
+	 */
+
+	uint64_t r[5], t[5];
+	int i;
+
+	memcpy(t, a, sizeof t);
+	for (i = 0; i < 30; i ++) {
+		f256_montysquare(t, t);
+		f256_montymul(t, t, a);
+	}
+
+	memcpy(r, t, sizeof t);
+	for (i = 224; i >= 0; i --) {
+		f256_montysquare(r, r);
+		switch (i) {
+		case 0:
+		case 2:
+		case 192:
+		case 224:
+			f256_montymul(r, r, a);
+			break;
+		case 3:
+		case 34:
+		case 65:
+			f256_montymul(r, r, t);
+			break;
+		}
+	}
+	memcpy(d, r, sizeof r);
+}
+
+/*
+ * Finalize reduction.
+ * Input value should be partially reduced.
+ * On output, limbs a[0] to a[3] fit on 52 bits each, limb a[4] fits
+ * on 48 bits, and the integer is less than p.
+ */
+static inline void
+f256_final_reduce(uint64_t *a)
+{
+	uint64_t r[5], t[5], w, cc;
+	int i;
+
+	/*
+	 * Propagate carries to ensure that limbs 0 to 3 fit on 52 bits.
+	 */
+	cc = 0;
+	for (i = 0; i < 5; i ++) {
+		w = a[i] + cc;
+		r[i] = w & MASK52;
+		cc = w >> 52;
+	}
+
+	/*
+	 * We compute t = r + (2^256 - p) = r + 2^224 - 2^192 - 2^96 + 1.
+	 * If t < 2^256, then r < p, and we return r. Otherwise, we
+	 * want to return r - p = t - 2^256.
+	 */
+
+	/*
+	 * Add 2^224 + 1, and propagate carries to ensure that limbs
+	 * t[0] to t[3] fit in 52 bits each.
+	 */
+	w = r[0] + 1;
+	t[0] = w & MASK52;
+	cc = w >> 52;
+	w = r[1] + cc;
+	t[1] = w & MASK52;
+	cc = w >> 52;
+	w = r[2] + cc;
+	t[2] = w & MASK52;
+	cc = w >> 52;
+	w = r[3] + cc;
+	t[3] = w & MASK52;
+	cc = w >> 52;
+	t[4] = r[4] + cc + BIT(16);
+
+	/*
+	 * Subtract 2^192 + 2^96. Since we just added 2^224 + 1, the
+	 * result cannot be negative.
+	 */
+	w = t[1] - BIT(44);
+	t[1] = w & MASK52;
+	cc = w >> 63;
+	w = t[2] - cc;
+	t[2] = w & MASK52;
+	cc = w >> 63;
+	w = t[3] - BIT(36) - cc;
+	t[3] = w & MASK52;
+	cc = w >> 63;
+	t[4] -= cc;
+
+	/*
+	 * If the top limb t[4] fits on 48 bits, then r[] is already
+	 * in the proper range. Otherwise, t[] is the value to return
+	 * (truncated to 256 bits).
+	 */
+	cc = -(t[4] >> 48);
+	t[4] &= MASK48;
+	for (i = 0; i < 5; i ++) {
+		a[i] = r[i] ^ (cc & (r[i] ^ t[i]));
+	}
+}
+
+/*
+ * Points in affine and Jacobian coordinates.
+ *
+ *  - In affine coordinates, the point-at-infinity cannot be encoded.
+ *  - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
+ *    if Z = 0 then this is the point-at-infinity.
+ */
+typedef struct {
+	uint64_t x[5];
+	uint64_t y[5];
+} p256_affine;
+
+typedef struct {
+	uint64_t x[5];
+	uint64_t y[5];
+	uint64_t z[5];
+} p256_jacobian;
+
+/*
+ * Decode a field element (unsigned big endian notation).
+ */
+static void
+f256_decode(uint64_t *a, const unsigned char *buf)
+{
+	uint64_t w0, w1, w2, w3;
+
+	w3 = br_dec64be(buf +  0);
+	w2 = br_dec64be(buf +  8);
+	w1 = br_dec64be(buf + 16);
+	w0 = br_dec64be(buf + 24);
+	a[0] = w0 & MASK52;
+	a[1] = ((w0 >> 52) | (w1 << 12)) & MASK52;
+	a[2] = ((w1 >> 40) | (w2 << 24)) & MASK52;
+	a[3] = ((w2 >> 28) | (w3 << 36)) & MASK52;
+	a[4] = w3 >> 16;
+}
+
+/*
+ * Encode a field element (unsigned big endian notation). The field
+ * element MUST be fully reduced.
+ */
+static void
+f256_encode(unsigned char *buf, const uint64_t *a)
+{
+	uint64_t w0, w1, w2, w3;
+
+	w0 = a[0] | (a[1] << 52);
+	w1 = (a[1] >> 12) | (a[2] << 40);
+	w2 = (a[2] >> 24) | (a[3] << 28);
+	w3 = (a[3] >> 36) | (a[4] << 16);
+	br_enc64be(buf +  0, w3);
+	br_enc64be(buf +  8, w2);
+	br_enc64be(buf + 16, w1);
+	br_enc64be(buf + 24, w0);
+}
+
+/*
+ * Decode a point. The returned point is in Jacobian coordinates, but
+ * with z = 1. If the encoding is invalid, or encodes a point which is
+ * not on the curve, or encodes the point at infinity, then this function
+ * returns 0. Otherwise, 1 is returned.
+ *
+ * The buffer is assumed to have length exactly 65 bytes.
+ */
+static uint32_t
+point_decode(p256_jacobian *P, const unsigned char *buf)
+{
+	uint64_t x[5], y[5], t[5], x3[5], tt;
+	uint32_t r;
+
+	/*
+	 * Header byte shall be 0x04.
+	 */
+	r = EQ(buf[0], 0x04);
+
+	/*
+	 * Decode X and Y coordinates, and convert them into
+	 * Montgomery representation.
+	 */
+	f256_decode(x, buf +  1);
+	f256_decode(y, buf + 33);
+	f256_tomonty(x, x);
+	f256_tomonty(y, y);
+
+	/*
+	 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3.
+	 * Note that the Montgomery representation of 0 is 0. We must
+	 * take care to apply the final reduction to make sure we have
+	 * 0 and not p.
+	 */
+	f256_montysquare(t, y);
+	f256_montysquare(x3, x);
+	f256_montymul(x3, x3, x);
+	f256_sub(t, t, x3);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_sub(t, t, P256_B_MONTY);
+	f256_final_reduce(t);
+	tt = t[0] | t[1] | t[2] | t[3] | t[4];
+	r &= EQ((uint32_t)(tt | (tt >> 32)), 0);
+
+	/*
+	 * Return the point in Jacobian coordinates (and Montgomery
+	 * representation).
+	 */
+	memcpy(P->x, x, sizeof x);
+	memcpy(P->y, y, sizeof y);
+	memcpy(P->z, F256_R, sizeof F256_R);
+	return r;
+}
+
+/*
+ * Final conversion for a point:
+ *  - The point is converted back to affine coordinates.
+ *  - Final reduction is performed.
+ *  - The point is encoded into the provided buffer.
+ *
+ * If the point is the point-at-infinity, all operations are performed,
+ * but the buffer contents are indeterminate, and 0 is returned. Otherwise,
+ * the encoded point is written in the buffer, and 1 is returned.
+ */
+static uint32_t
+point_encode(unsigned char *buf, const p256_jacobian *P)
+{
+	uint64_t t1[5], t2[5], z;
+
+	/* Set t1 = 1/z^2 and t2 = 1/z^3. */
+	f256_invert(t2, P->z);
+	f256_montysquare(t1, t2);
+	f256_montymul(t2, t2, t1);
+
+	/* Compute affine coordinates x (in t1) and y (in t2). */
+	f256_montymul(t1, P->x, t1);
+	f256_montymul(t2, P->y, t2);
+
+	/* Convert back from Montgomery representation, and finalize
+	   reductions. */
+	f256_frommonty(t1, t1);
+	f256_frommonty(t2, t2);
+	f256_final_reduce(t1);
+	f256_final_reduce(t2);
+
+	/* Encode. */
+	buf[0] = 0x04;
+	f256_encode(buf +  1, t1);
+	f256_encode(buf + 33, t2);
+
+	/* Return success if and only if P->z != 0. */
+	z = P->z[0] | P->z[1] | P->z[2] | P->z[3] | P->z[4];
+	return NEQ((uint32_t)(z | z >> 32), 0);
+}
+
+/*
+ * Point doubling in Jacobian coordinates: point P is doubled.
+ * Note: if the source point is the point-at-infinity, then the result is
+ * still the point-at-infinity, which is correct. Moreover, if the three
+ * coordinates were zero, then they still are zero in the returned value.
+ */
+static void
+p256_double(p256_jacobian *P)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5];
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	f256_montysquare(t1, P->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	f256_add(t2, P->x, t1);
+	f256_sub(t1, P->x, t1);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	f256_montymul(t3, t1, t2);
+	f256_add(t1, t3, t3);
+	f256_add(t1, t3, t1);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	f256_montysquare(t3, P->y);
+	f256_add(t3, t3, t3);
+	f256_montymul(t2, P->x, t3);
+	f256_add(t2, t2, t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	f256_montysquare(P->x, t1);
+	f256_sub(P->x, P->x, t2);
+	f256_sub(P->x, P->x, t2);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	f256_montymul(t4, P->y, P->z);
+	f256_add(P->z, t4, t4);
+	f256_partial_reduce(P->z);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	f256_sub(t2, t2, P->x);
+	f256_montymul(P->y, t1, t2);
+	f256_montysquare(t4, t3);
+	f256_add(t4, t4, t4);
+	f256_sub(P->y, P->y, t4);
+}
+
+/*
+ * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate.
+ *   - P1 == 0 and P2 == 0.
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Note that you can get a returned value of 0 with a correct result,
+ * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	f256_montysquare(t3, P2->z);
+	f256_montymul(t1, P1->x, t3);
+	f256_montymul(t4, P2->z, t3);
+	f256_montymul(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(t1, P1->z, P2->z);
+	f256_montymul(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Point addition (mixed coordinates): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y (affine) coordinate.
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Again, a value of 0 may be returned in some cases where the addition
+ * result is correct.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+#if 0
+/* unused */
+/*
+ * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function returns the correct result in all cases.
+ */
+static uint32_t
+p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas, in the general case, are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 *
+	 * These formulas mishandle the two following cases:
+	 *
+	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
+	 *    incorrectly set to 0.
+	 *
+	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
+	 *    are all set to 0.
+	 *
+	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
+	 * we correctly get z3 = 0 (the point-at-infinity).
+	 *
+	 * To fix the case P1 = 0, we perform at the end a copy of P2
+	 * over P1, conditional to z1 = 0.
+	 *
+	 * For P1 = P2: in that case, both h and r are set to 0, and
+	 * we get x3, y3 and z3 equal to 0. We can test for that
+	 * occurrence to make a mask which will be all-one if P1 = P2,
+	 * or all-zero otherwise; then we can compute the double of P2
+	 * and add it, combined with the mask, to (x3,y3,z3).
+	 *
+	 * Using the doubling formulas in p256_double() on (x2,y2),
+	 * simplifying since P2 is affine (i.e. z2 = 1, implicitly),
+	 * we get:
+	 *   s = 4*x2*y2^2
+	 *   m = 3*(x2 + 1)*(x2 - 1)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y2^4
+	 *   z' = 2*y2
+	 * which requires only 6 multiplications. Added to the 11
+	 * multiplications of the normal mixed addition in Jacobian
+	 * coordinates, we get a cost of 17 multiplications in total.
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt, zz;
+	int i;
+
+	/*
+	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
+	 */
+	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3] | P1->z[4];
+	zz = ((zz | -zz) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+
+	/*
+	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
+	 * the mask tt to -1; otherwise, the mask will be 0.
+	 */
+	f256_final_reduce(t2);
+	f256_final_reduce(t4);
+	tt = t2[0] | t2[1] | t2[2] | t2[3] | t2[4]
+		| t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
+	tt = ((tt | -tt) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	/*
+	 * The "double" result, in case P1 = P2.
+	 */
+
+	/*
+	 * Compute z' = 2*y2 (in t1).
+	 */
+	f256_add(t1, P2->y, P2->y);
+	f256_partial_reduce(t1);
+
+	/*
+	 * Compute 2*(y2^2) (in t2) and s = 4*x2*(y2^2) (in t3).
+	 */
+	f256_montysquare(t2, P2->y);
+	f256_add(t2, t2, t2);
+	f256_add(t3, t2, t2);
+	f256_montymul(t3, P2->x, t3);
+
+	/*
+	 * Compute m = 3*(x2^2 - 1) (in t4).
+	 */
+	f256_montysquare(t4, P2->x);
+	f256_sub(t4, t4, F256_R);
+	f256_add(t5, t4, t4);
+	f256_add(t4, t4, t5);
+
+	/*
+	 * Compute x' = m^2 - 2*s (in t5).
+	 */
+	f256_montysquare(t5, t4);
+	f256_sub(t5, t3);
+	f256_sub(t5, t3);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
+	 */
+	f256_sub(t6, t3, t5);
+	f256_montymul(t6, t6, t4);
+	f256_montysquare(t7, t2);
+	f256_sub(t6, t6, t7);
+	f256_sub(t6, t6, t7);
+
+	/*
+	 * We now have the alternate (doubling) coordinates in (t5,t6,t1).
+	 * We combine them with (x3,y3,z3).
+	 */
+	for (i = 0; i < 5; i ++) {
+		P1->x[i] |= tt & t5[i];
+		P1->y[i] |= tt & t6[i];
+		P1->z[i] |= tt & t1[i];
+	}
+
+	/*
+	 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
+	 * then we want to replace the result with a copy of P2. The
+	 * test on z1 was done at the start, in the zz mask.
+	 */
+	for (i = 0; i < 5; i ++) {
+		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
+		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
+		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
+	}
+}
+#endif
+
+/*
+ * Inner function for computing a point multiplication. A window is
+ * provided, with points 1*P to 15*P in affine coordinates.
+ *
+ * Assumptions:
+ *  - All provided points are valid points on the curve.
+ *  - Multiplier is non-zero, and smaller than the curve order.
+ *  - Everything is in Montgomery representation.
+ */
+static void
+point_mul_inner(p256_jacobian *R, const p256_affine *W,
+	const unsigned char *k, size_t klen)
+{
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (klen -- > 0) {
+		int i;
+		unsigned bk;
+
+		bk = *k ++;
+		for (i = 0; i < 2; i ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_affine T;
+			p256_jacobian U;
+			uint32_t n;
+			int j;
+			uint64_t m;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bk >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+
+			/*
+			 * Lookup point in window. If the bits are 0,
+			 * we get something invalid, which is not a
+			 * problem because we will use it only if the
+			 * bits are non-zero.
+			 */
+			memset(&T, 0, sizeof T);
+			for (n = 0; n < 15; n ++) {
+				m = -(uint64_t)EQ(bits, n + 1);
+				T.x[0] |= m & W[n].x[0];
+				T.x[1] |= m & W[n].x[1];
+				T.x[2] |= m & W[n].x[2];
+				T.x[3] |= m & W[n].x[3];
+				T.x[4] |= m & W[n].x[4];
+				T.y[0] |= m & W[n].y[0];
+				T.y[1] |= m & W[n].y[1];
+				T.y[2] |= m & W[n].y[2];
+				T.y[3] |= m & W[n].y[3];
+				T.y[4] |= m & W[n].y[4];
+			}
+
+			U = Q;
+			p256_add_mixed(&U, &T);
+
+			/*
+			 * If qz is still 1, then Q was all-zeros, and this
+			 * is conserved through p256_double().
+			 */
+			m = -(uint64_t)(bnz & qz);
+			for (j = 0; j < 5; j ++) {
+				Q.x[j] ^= m & (Q.x[j] ^ T.x[j]);
+				Q.y[j] ^= m & (Q.y[j] ^ T.y[j]);
+				Q.z[j] ^= m & (Q.z[j] ^ F256_R[j]);
+			}
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bk <<= 4;
+		}
+	}
+	*R = Q;
+}
+
+/*
+ * Convert a window from Jacobian to affine coordinates. A single
+ * field inversion is used. This function works for windows up to
+ * 32 elements.
+ *
+ * The destination array (aff[]) and the source array (jac[]) may
+ * overlap, provided that the start of aff[] is not after the start of
+ * jac[]. Even if the arrays do _not_ overlap, the source array is
+ * modified.
+ */
+static void
+window_to_affine(p256_affine *aff, p256_jacobian *jac, int num)
+{
+	/*
+	 * Convert the window points to affine coordinates. We use the
+	 * following trick to mutualize the inversion computation: if
+	 * we have z1, z2, z3, and z4, and want to invert all of them,
+	 * we compute u = 1/(z1*z2*z3*z4), and then we have:
+	 *   1/z1 = u*z2*z3*z4
+	 *   1/z2 = u*z1*z3*z4
+	 *   1/z3 = u*z1*z2*z4
+	 *   1/z4 = u*z1*z2*z3
+	 *
+	 * The partial products are computed recursively:
+	 *
+	 *  - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2
+	 *  - on input (z_1,z_2,... z_n):
+	 *       recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1
+	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2
+	 *       multiply elements of r1 by m2 -> s1
+	 *       multiply elements of r2 by m1 -> s2
+	 *       return r1||r2 and m1*m2
+	 *
+	 * In the example below, we suppose that we have 14 elements.
+	 * Let z1, z2,... zE be the 14 values to invert (index noted in
+	 * hexadecimal, starting at 1).
+	 *
+	 *  - Depth 1:
+	 *      swap(z1, z2); z12 = z1*z2
+	 *      swap(z3, z4); z34 = z3*z4
+	 *      swap(z5, z6); z56 = z5*z6
+	 *      swap(z7, z8); z78 = z7*z8
+	 *      swap(z9, zA); z9A = z9*zA
+	 *      swap(zB, zC); zBC = zB*zC
+	 *      swap(zD, zE); zDE = zD*zE
+	 *
+	 *  - Depth 2:
+	 *      z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12
+	 *      z1234 = z12*z34
+	 *      z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56
+	 *      z5678 = z56*z78
+	 *      z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A
+	 *      z9ABC = z9A*zBC
+	 *
+	 *  - Depth 3:
+	 *      z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678
+	 *      z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234
+	 *      z12345678 = z1234*z5678
+	 *      z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE
+	 *      zD <- zD*z9ABC, zE*z9ABC
+	 *      z9ABCDE = z9ABC*zDE
+	 *
+	 *  - Depth 4:
+	 *      multiply z1..z8 by z9ABCDE
+	 *      multiply z9..zE by z12345678
+	 *      final z = z12345678*z9ABCDE
+	 */
+
+	uint64_t z[16][5];
+	int i, k, s;
+#define zt   (z[15])
+#define zu   (z[14])
+#define zv   (z[13])
+
+	/*
+	 * First recursion step (pairwise swapping and multiplication).
+	 * If there is an odd number of elements, then we "invent" an
+	 * extra one with coordinate Z = 1 (in Montgomery representation).
+	 */
+	for (i = 0; (i + 1) < num; i += 2) {
+		memcpy(zt, jac[i].z, sizeof zt);
+		memcpy(jac[i].z, jac[i + 1].z, sizeof zt);
+		memcpy(jac[i + 1].z, zt, sizeof zt);
+		f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z);
+	}
+	if ((num & 1) != 0) {
+		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);
+		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);
+	}
+
+	/*
+	 * Perform further recursion steps. At the entry of each step,
+	 * the process has been done for groups of 's' points. The
+	 * integer k is the log2 of s.
+	 */
+	for (k = 1, s = 2; s < num; k ++, s <<= 1) {
+		int n;
+
+		for (i = 0; i < num; i ++) {
+			f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]);
+		}
+		n = (num + s - 1) >> k;
+		for (i = 0; i < (n >> 1); i ++) {
+			f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]);
+		}
+		if ((n & 1) != 0) {
+			memmove(z[n >> 1], z[n], sizeof zt);
+		}
+	}
+
+	/*
+	 * Invert the final result, and convert all points.
+	 */
+	f256_invert(zt, z[0]);
+	for (i = 0; i < num; i ++) {
+		f256_montymul(zv, jac[i].z, zt);
+		f256_montysquare(zu, zv);
+		f256_montymul(zv, zv, zu);
+		f256_montymul(aff[i].x, jac[i].x, zu);
+		f256_montymul(aff[i].y, jac[i].y, zv);
+	}
+}
+
+/*
+ * Multiply the provided point by an integer.
+ * Assumptions:
+ *  - Source point is a valid curve point.
+ *  - Source point is not the point-at-infinity.
+ *  - Integer is not 0, and is lower than the curve order.
+ * If these conditions are not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	union {
+		p256_affine aff[15];
+		p256_jacobian jac[15];
+	} window;
+	int i;
+
+	/*
+	 * Compute window, in Jacobian coordinates.
+	 */
+	window.jac[0] = *P;
+	for (i = 2; i < 16; i ++) {
+		window.jac[i - 1] = window.jac[(i >> 1) - 1];
+		if ((i & 1) == 0) {
+			p256_double(&window.jac[i - 1]);
+		} else {
+			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);
+		}
+	}
+
+	/*
+	 * Convert the window points to affine coordinates. Point
+	 * window[0] is the source point, already in affine coordinates.
+	 */
+	window_to_affine(window.aff, window.jac, 15);
+
+	/*
+	 * Perform point multiplication.
+	 */
+	point_mul_inner(P, window.aff, k, klen);
+}
+
+/*
+ * Precomputed window for the conventional generator: P256_Gwin[n]
+ * contains (n+1)*G (affine coordinates, in Montgomery representation).
+ */
+static const p256_affine P256_Gwin[] = {
+	{
+		{ 0x30D418A9143C1, 0xC4FEDB60179E7, 0x62251075BA95F,
+		  0x5C669FB732B77, 0x08905F76B5375 },
+		{ 0x5357CE95560A8, 0x43A19E45CDDF2, 0x21F3258B4AB8E,
+		  0xD8552E88688DD, 0x0571FF18A5885 }
+	},
+	{
+		{ 0x46D410DDD64DF, 0x0B433827D8500, 0x1490D9AA6AE3C,
+		  0xA3A832205038D, 0x06BB32E52DCF3 },
+		{ 0x48D361BEE1A57, 0xB7B236FF82F36, 0x042DBE152CD7C,
+		  0xA3AA9A8FB0E92, 0x08C577517A5B8 }
+	},
+	{
+		{ 0x3F904EEBC1272, 0x9E87D81FBFFAC, 0xCBBC98B027F84,
+		  0x47E46AD77DD87, 0x06936A3FD6FF7 },
+		{ 0x5C1FC983A7EBD, 0xC3861FE1AB04C, 0x2EE98E583E47A,
+		  0xC06A88208311A, 0x05F06A2AB587C }
+	},
+	{
+		{ 0xB50D46918DCC5, 0xD7623C17374B0, 0x100AF24650A6E,
+		  0x76ABCDAACACE8, 0x077362F591B01 },
+		{ 0xF24CE4CBABA68, 0x17AD6F4472D96, 0xDDD22E1762847,
+		  0x862EB6C36DEE5, 0x04B14C39CC5AB }
+	},
+	{
+		{ 0x8AAEC45C61F5C, 0x9D4B9537DBE1B, 0x76C20C90EC649,
+		  0x3C7D41CB5AAD0, 0x0907960649052 },
+		{ 0x9B4AE7BA4F107, 0xF75EB882BEB30, 0x7A1F6873C568E,
+		  0x915C540A9877E, 0x03A076BB9DD1E }
+	},
+	{
+		{ 0x47373E77664A1, 0xF246CEE3E4039, 0x17A3AD55AE744,
+		  0x673C50A961A5B, 0x03074B5964213 },
+		{ 0x6220D377E44BA, 0x30DFF14B593D3, 0x639F11299C2B5,
+		  0x75F5424D44CEF, 0x04C9916DEA07F }
+	},
+	{
+		{ 0x354EA0173B4F1, 0x3C23C00F70746, 0x23BB082BD2021,
+		  0xE03E43EAAB50C, 0x03BA5119D3123 },
+		{ 0xD0303F5B9D4DE, 0x17DA67BDD2847, 0xC941956742F2F,
+		  0x8670F933BDC77, 0x0AEDD9164E240 }
+	},
+	{
+		{ 0x4CD19499A78FB, 0x4BF9B345527F1, 0x2CFC6B462AB5C,
+		  0x30CDF90F02AF0, 0x0763891F62652 },
+		{ 0xA3A9532D49775, 0xD7F9EBA15F59D, 0x60BBF021E3327,
+		  0xF75C23C7B84BE, 0x06EC12F2C706D }
+	},
+	{
+		{ 0x6E8F264E20E8E, 0xC79A7A84175C9, 0xC8EB00ABE6BFE,
+		  0x16A4CC09C0444, 0x005B3081D0C4E },
+		{ 0x777AA45F33140, 0xDCE5D45E31EB7, 0xB12F1A56AF7BE,
+		  0xF9B2B6E019A88, 0x086659CDFD835 }
+	},
+	{
+		{ 0xDBD19DC21EC8C, 0x94FCF81392C18, 0x250B4998F9868,
+		  0x28EB37D2CD648, 0x0C61C947E4B34 },
+		{ 0x407880DD9E767, 0x0C83FBE080C2B, 0x9BE5D2C43A899,
+		  0xAB4EF7D2D6577, 0x08719A555B3B4 }
+	},
+	{
+		{ 0x260A6245E4043, 0x53E7FDFE0EA7D, 0xAC1AB59DE4079,
+		  0x072EFF3A4158D, 0x0E7090F1949C9 },
+		{ 0x85612B944E886, 0xE857F61C81A76, 0xAD643D250F939,
+		  0x88DAC0DAA891E, 0x089300244125B }
+	},
+	{
+		{ 0x1AA7D26977684, 0x58A345A3304B7, 0x37385EABDEDEF,
+		  0x155E409D29DEE, 0x0EE1DF780B83E },
+		{ 0x12D91CBB5B437, 0x65A8956370CAC, 0xDE6D66170ED2F,
+		  0xAC9B8228CFA8A, 0x0FF57C95C3238 }
+	},
+	{
+		{ 0x25634B2ED7097, 0x9156FD30DCCC4, 0x9E98110E35676,
+		  0x7594CBCD43F55, 0x038477ACC395B },
+		{ 0x2B90C00EE17FF, 0xF842ED2E33575, 0x1F5BC16874838,
+		  0x7968CD06422BD, 0x0BC0876AB9E7B }
+	},
+	{
+		{ 0xA35BB0CF664AF, 0x68F9707E3A242, 0x832660126E48F,
+		  0x72D2717BF54C6, 0x0AAE7333ED12C },
+		{ 0x2DB7995D586B1, 0xE732237C227B5, 0x65E7DBBE29569,
+		  0xBBBD8E4193E2A, 0x052706DC3EAA1 }
+	},
+	{
+		{ 0xD8B7BC60055BE, 0xD76E27E4B72BC, 0x81937003CC23E,
+		  0xA090E337424E4, 0x02AA0E43EAD3D },
+		{ 0x524F6383C45D2, 0x422A41B2540B8, 0x8A4797D766355,
+		  0xDF444EFA6DE77, 0x0042170A9079A }
+	},
+};
+
+/*
+ * Multiply the conventional generator of the curve by the provided
+ * integer. Return is written in *P.
+ *
+ * Assumptions:
+ *  - Integer is not 0, and is lower than the curve order.
+ * If this conditions is not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	point_mul_inner(P, P256_Gwin, k, klen);
+}
+
+/*
+ * Return 1 if all of the following hold:
+ *  - klen <= 32
+ *  - k != 0
+ *  - k is lower than the curve order
+ * Otherwise, return 0.
+ *
+ * Constant-time behaviour: only klen may be observable.
+ */
+static uint32_t
+check_scalar(const unsigned char *k, size_t klen)
+{
+	uint32_t z;
+	int32_t c;
+	size_t u;
+
+	if (klen > 32) {
+		return 0;
+	}
+	z = 0;
+	for (u = 0; u < klen; u ++) {
+		z |= k[u];
+	}
+	if (klen == 32) {
+		c = 0;
+		for (u = 0; u < klen; u ++) {
+			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);
+		}
+	} else {
+		c = -1;
+	}
+	return NEQ(z, 0) & LT0(c);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *k, size_t klen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = check_scalar(k, klen);
+	r &= point_decode(&P, G);
+	p256_mul(&P, k, klen);
+	r &= point_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *k, size_t klen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, k, klen);
+	point_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We might want to use Shamir's trick here: make a composite
+	 * window of u*P+v*Q points, to merge the two doubling-ladders
+	 * into one. This, however, has some complications:
+	 *
+	 *  - During the computation, we may hit the point-at-infinity.
+	 *    Thus, we would need p256_add_complete_mixed() (complete
+	 *    formulas for point addition), with a higher cost (17 muls
+	 *    instead of 11).
+	 *
+	 *  - A 4-bit window would be too large, since it would involve
+	 *    16*16-1 = 255 points. For the same window size as in the
+	 *    p256_mul() case, we would need to reduce the window size
+	 *    to 2 bits, and thus perform twice as many non-doubling
+	 *    point additions.
+	 *
+	 *  - The window may itself contain the point-at-infinity, and
+	 *    thus cannot be in all generality be made of affine points.
+	 *    Instead, we would need to make it a window of points in
+	 *    Jacobian coordinates. Even p256_add_complete_mixed() would
+	 *    be inappropriate.
+	 *
+	 * For these reasons, the code below performs two separate
+	 * point multiplications, then computes the final point addition
+	 * (which is both a "normal" addition, and a doubling, to handle
+	 * all cases).
+	 */
+
+	p256_jacobian P, Q;
+	uint32_t r, t, s;
+	uint64_t z;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = point_decode(&P, A);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= point_decode(&Q, B);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	f256_final_reduce(P.z);
+	z = P.z[0] | P.z[1] | P.z[2] | P.z[3] | P.z[4];
+	s = EQ((uint32_t)(z | (z >> 32)), 0);
+	p256_double(&Q);
+
+	/*
+	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   s = 0, t = 0   return P (normal addition)
+	 *   s = 0, t = 1   return P (normal addition)
+	 *   s = 1, t = 0   return Q (a 'double' case)
+	 *   s = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(s & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P);
+	r &= ~(s & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m62 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m62_get(void)
+{
+	return &br_ec_p256_m62;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m62_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_p256_m64.c b/third_party/bearssl/src/ec_p256_m64.c
new file mode 100644
index 0000000..71a527c
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m64.c
@@ -0,0 +1,1781 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+/*
+ * A field element is encoded as four 64-bit integers, in basis 2^64.
+ * Values may reach up to 2^256-1. Montgomery multiplication is used.
+ */
+
+/* R = 2^256 mod p */
+static const uint64_t F256_R[] = {
+	0x0000000000000001, 0xFFFFFFFF00000000,
+	0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFE
+};
+
+/* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
+   (Montgomery representation of B). */
+static const uint64_t P256_B_MONTY[] = {
+	0xD89CDF6229C4BDDF, 0xACF005CD78843090,
+	0xE5A220ABF7212ED6, 0xDC30061D04874834
+};
+
+/*
+ * Addition in the field.
+ */
+static inline void
+f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+	unsigned __int128 w;
+	uint64_t t;
+
+	/*
+	 * Do the addition, with an extra carry in t.
+	 */
+	w = (unsigned __int128)a[0] + b[0];
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)a[1] + b[1] + (w >> 64);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)a[2] + b[2] + (w >> 64);
+	d[2] = (uint64_t)w;
+	w = (unsigned __int128)a[3] + b[3] + (w >> 64);
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64);
+
+	/*
+	 * Fold carry t, using: 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p.
+	 */
+	w = (unsigned __int128)d[0] + t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);
+	d[1] = (uint64_t)w;
+	/* Here, carry "w >> 64" can only be 0 or -1 */
+	w = (unsigned __int128)d[2] - ((w >> 64) & 1);
+	d[2] = (uint64_t)w;
+	/* Again, carry is 0 or -1. But there can be carry only if t = 1,
+	   in which case the addition of (t << 32) - t is positive. */
+	w = (unsigned __int128)d[3] - ((w >> 64) & 1) + (t << 32) - t;
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64);
+
+	/*
+	 * There can be an extra carry here, which we must fold again.
+	 */
+	w = (unsigned __int128)d[0] + t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)d[2] - ((w >> 64) & 1);
+	d[2] = (uint64_t)w;
+	d[3] += (t << 32) - t - (uint64_t)((w >> 64) & 1);
+
+#elif BR_UMUL128
+
+	unsigned char cc;
+	uint64_t t;
+
+	cc = _addcarry_u64(0, a[0], b[0], &d[0]);
+	cc = _addcarry_u64(cc, a[1], b[1], &d[1]);
+	cc = _addcarry_u64(cc, a[2], b[2], &d[2]);
+	cc = _addcarry_u64(cc, a[3], b[3], &d[3]);
+
+	/*
+	 * If there is a carry, then we want to subtract p, which we
+	 * do by adding 2^256 - p.
+	 */
+	t = cc;
+	cc = _addcarry_u64(cc, d[0], 0, &d[0]);
+	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _addcarry_u64(cc, d[2], -t, &d[2]);
+	cc = _addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+	/*
+	 * We have to do it again if there still is a carry.
+	 */
+	t = cc;
+	cc = _addcarry_u64(cc, d[0], 0, &d[0]);
+	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _addcarry_u64(cc, d[2], -t, &d[2]);
+	(void)_addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+#endif
+}
+
+/*
+ * Subtraction in the field.
+ */
+static inline void
+f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	unsigned __int128 w;
+	uint64_t t;
+
+	w = (unsigned __int128)a[0] - b[0];
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)a[1] - b[1] - ((w >> 64) & 1);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)a[2] - b[2] - ((w >> 64) & 1);
+	d[2] = (uint64_t)w;
+	w = (unsigned __int128)a[3] - b[3] - ((w >> 64) & 1);
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64) & 1;
+
+	/*
+	 * If there is a borrow (t = 1), then we must add the modulus
+	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1.
+	 */
+	w = (unsigned __int128)d[0] - t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);
+	d[1] = (uint64_t)w;
+	/* Here, carry "w >> 64" can only be 0 or +1 */
+	w = (unsigned __int128)d[2] + (w >> 64);
+	d[2] = (uint64_t)w;
+	/* Again, carry is 0 or +1 */
+	w = (unsigned __int128)d[3] + (w >> 64) - (t << 32) + t;
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64) & 1;
+
+	/*
+	 * There may be again a borrow, in which case we must add the
+	 * modulus again.
+	 */
+	w = (unsigned __int128)d[0] - t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)d[2] + (w >> 64);
+	d[2] = (uint64_t)w;
+	d[3] += (uint64_t)(w >> 64) - (t << 32) + t;
+
+#elif BR_UMUL128
+
+	unsigned char cc;
+	uint64_t t;
+
+	cc = _subborrow_u64(0, a[0], b[0], &d[0]);
+	cc = _subborrow_u64(cc, a[1], b[1], &d[1]);
+	cc = _subborrow_u64(cc, a[2], b[2], &d[2]);
+	cc = _subborrow_u64(cc, a[3], b[3], &d[3]);
+
+	/*
+	 * If there is a borrow, then we need to add p. We (virtually)
+	 * add 2^256, then subtract 2^256 - p.
+	 */
+	t = cc;
+	cc = _subborrow_u64(0, d[0], t, &d[0]);
+	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _subborrow_u64(cc, d[2], -t, &d[2]);
+	cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+	/*
+	 * If there still is a borrow, then we need to add p again.
+	 */
+	t = cc;
+	cc = _subborrow_u64(0, d[0], t, &d[0]);
+	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _subborrow_u64(cc, d[2], -t, &d[2]);
+	(void)_subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+#endif
+}
+
+/*
+ * Montgomery multiplication in the field.
+ */
+static void
+f256_montymul(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	uint64_t x, f, t0, t1, t2, t3, t4;
+	unsigned __int128 z, ff;
+	int i;
+
+	/*
+	 * When computing d <- d + a[u]*b, we also add f*p such
+	 * that d + a[u]*b + f*p is a multiple of 2^64. Since
+	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.
+	 */
+
+	/*
+	 * Step 1: t <- (a[0]*b + f*p) / 2^64
+	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this
+	 * ensures that (a[0]*b + f*p) is a multiple of 2^64.
+	 *
+	 * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f.
+	 */
+	x = a[0];
+	z = (unsigned __int128)b[0] * x;
+	f = (uint64_t)z;
+	z = (unsigned __int128)b[1] * x + (z >> 64) + (uint64_t)(f << 32);
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)b[2] * x + (z >> 64) + (uint64_t)(f >> 32);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)b[3] * x + (z >> 64) + f;
+	t2 = (uint64_t)z;
+	t3 = (uint64_t)(z >> 64);
+	ff = ((unsigned __int128)f << 64) - ((unsigned __int128)f << 32);
+	z = (unsigned __int128)t2 + (uint64_t)ff;
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)t3 + (z >> 64) + (ff >> 64);
+	t3 = (uint64_t)z;
+	t4 = (uint64_t)(z >> 64);
+
+	/*
+	 * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64
+	 */
+	for (i = 1; i < 4; i ++) {
+		x = a[i];
+
+		/* t <- (t + x*b - f) / 2^64 */
+		z = (unsigned __int128)b[0] * x + t0;
+		f = (uint64_t)z;
+		z = (unsigned __int128)b[1] * x + t1 + (z >> 64);
+		t0 = (uint64_t)z;
+		z = (unsigned __int128)b[2] * x + t2 + (z >> 64);
+		t1 = (uint64_t)z;
+		z = (unsigned __int128)b[3] * x + t3 + (z >> 64);
+		t2 = (uint64_t)z;
+		z = t4 + (z >> 64);
+		t3 = (uint64_t)z;
+		t4 = (uint64_t)(z >> 64);
+
+		/* t <- t + f*2^32, carry in the upper half of z */
+		z = (unsigned __int128)t0 + (uint64_t)(f << 32);
+		t0 = (uint64_t)z;
+		z = (z >> 64) + (unsigned __int128)t1 + (uint64_t)(f >> 32);
+		t1 = (uint64_t)z;
+
+		/* t <- t + f*2^192 - f*2^160 + f*2^128 */
+		ff = ((unsigned __int128)f << 64) 
+			- ((unsigned __int128)f << 32) + f;
+		z = (z >> 64) + (unsigned __int128)t2 + (uint64_t)ff;
+		t2 = (uint64_t)z;
+		z = (unsigned __int128)t3 + (z >> 64) + (ff >> 64);
+		t3 = (uint64_t)z;
+		t4 += (uint64_t)(z >> 64);
+	}
+
+	/*
+	 * At that point, we have computed t = (a*b + F*p) / 2^256, where
+	 * F is a 256-bit integer whose limbs are the "f" coefficients
+	 * in the steps above. We have:
+	 *   a <= 2^256-1
+	 *   b <= 2^256-1
+	 *   F <= 2^256-1
+	 * Hence:
+	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)
+	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p
+	 * Therefore:
+	 *   t < 2^256 + p - 2
+	 * Since p < 2^256, it follows that:
+	 *   t4 can be only 0 or 1
+	 *   t - p < 2^256
+	 * We can therefore subtract p from t, conditionally on t4, to
+	 * get a nonnegative result that fits on 256 bits.
+	 */
+	z = (unsigned __int128)t0 + t4;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 - (t4 << 32) + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 - (z >> 127);
+	t2 = (uint64_t)z;
+	t3 = t3 - (uint64_t)(z >> 127) - t4 + (t4 << 32);
+
+	d[0] = t0;
+	d[1] = t1;
+	d[2] = t2;
+	d[3] = t3;
+
+#elif BR_UMUL128
+
+	uint64_t x, f, t0, t1, t2, t3, t4;
+	uint64_t zl, zh, ffl, ffh;
+	unsigned char k, m;
+	int i;
+
+	/*
+	 * When computing d <- d + a[u]*b, we also add f*p such
+	 * that d + a[u]*b + f*p is a multiple of 2^64. Since
+	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.
+	 */
+
+	/*
+	 * Step 1: t <- (a[0]*b + f*p) / 2^64
+	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this
+	 * ensures that (a[0]*b + f*p) is a multiple of 2^64.
+	 *
+	 * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f.
+	 */
+	x = a[0];
+
+	zl = _umul128(b[0], x, &zh);
+	f = zl;
+	t0 = zh;
+
+	zl = _umul128(b[1], x, &zh);
+	k = _addcarry_u64(0, zl, t0, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	k = _addcarry_u64(0, zl, f << 32, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	t0 = zl;
+	t1 = zh;
+
+	zl = _umul128(b[2], x, &zh);
+	k = _addcarry_u64(0, zl, t1, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	k = _addcarry_u64(0, zl, f >> 32, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	t1 = zl;
+	t2 = zh;
+
+	zl = _umul128(b[3], x, &zh);
+	k = _addcarry_u64(0, zl, t2, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	k = _addcarry_u64(0, zl, f, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	t2 = zl;
+	t3 = zh;
+
+	t4 = _addcarry_u64(0, t3, f, &t3);
+	k = _subborrow_u64(0, t2, f << 32, &t2);
+	k = _subborrow_u64(k, t3, f >> 32, &t3);
+	(void)_subborrow_u64(k, t4, 0, &t4);
+
+	/*
+	 * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64
+	 */
+	for (i = 1; i < 4; i ++) {
+		x = a[i];
+		/* f = t0 + x * b[0]; -- computed below */
+
+		/* t <- (t + x*b - f) / 2^64 */
+		zl = _umul128(b[0], x, &zh);
+		k = _addcarry_u64(0, zl, t0, &f);
+		(void)_addcarry_u64(k, zh, 0, &t0);
+
+		zl = _umul128(b[1], x, &zh);
+		k = _addcarry_u64(0, zl, t0, &zl);
+		(void)_addcarry_u64(k, zh, 0, &zh);
+		k = _addcarry_u64(0, zl, t1, &t0);
+		(void)_addcarry_u64(k, zh, 0, &t1);
+
+		zl = _umul128(b[2], x, &zh);
+		k = _addcarry_u64(0, zl, t1, &zl);
+		(void)_addcarry_u64(k, zh, 0, &zh);
+		k = _addcarry_u64(0, zl, t2, &t1);
+		(void)_addcarry_u64(k, zh, 0, &t2);
+
+		zl = _umul128(b[3], x, &zh);
+		k = _addcarry_u64(0, zl, t2, &zl);
+		(void)_addcarry_u64(k, zh, 0, &zh);
+		k = _addcarry_u64(0, zl, t3, &t2);
+		(void)_addcarry_u64(k, zh, 0, &t3);
+
+		t4 = _addcarry_u64(0, t3, t4, &t3);
+
+		/* t <- t + f*2^32, carry in k */
+		k = _addcarry_u64(0, t0, f << 32, &t0);
+		k = _addcarry_u64(k, t1, f >> 32, &t1);
+
+		/* t <- t + f*2^192 - f*2^160 + f*2^128 */
+		m = _subborrow_u64(0, f, f << 32, &ffl);
+		(void)_subborrow_u64(m, f, f >> 32, &ffh);
+		k = _addcarry_u64(k, t2, ffl, &t2);
+		k = _addcarry_u64(k, t3, ffh, &t3);
+		(void)_addcarry_u64(k, t4, 0, &t4);
+	}
+
+	/*
+	 * At that point, we have computed t = (a*b + F*p) / 2^256, where
+	 * F is a 256-bit integer whose limbs are the "f" coefficients
+	 * in the steps above. We have:
+	 *   a <= 2^256-1
+	 *   b <= 2^256-1
+	 *   F <= 2^256-1
+	 * Hence:
+	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)
+	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p
+	 * Therefore:
+	 *   t < 2^256 + p - 2
+	 * Since p < 2^256, it follows that:
+	 *   t4 can be only 0 or 1
+	 *   t - p < 2^256
+	 * We can therefore subtract p from t, conditionally on t4, to
+	 * get a nonnegative result that fits on 256 bits.
+	 */
+	k = _addcarry_u64(0, t0, t4, &t0);
+	k = _addcarry_u64(k, t1, -(t4 << 32), &t1);
+	k = _addcarry_u64(k, t2, -t4, &t2);
+	(void)_addcarry_u64(k, t3, (t4 << 32) - (t4 << 1), &t3);
+
+	d[0] = t0;
+	d[1] = t1;
+	d[2] = t2;
+	d[3] = t3;
+
+#endif
+}
+
+/*
+ * Montgomery squaring in the field; currently a basic wrapper around
+ * multiplication (inline, should be optimized away).
+ * TODO: see if some extra speed can be gained here.
+ */
+static inline void
+f256_montysquare(uint64_t *d, const uint64_t *a)
+{
+	f256_montymul(d, a, a);
+}
+
+/*
+ * Convert to Montgomery representation.
+ */
+static void
+f256_tomonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * R2 = 2^512 mod p.
+	 * If R = 2^256 mod p, then R2 = R^2 mod p; and the Montgomery
+	 * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the
+	 * conversion to Montgomery representation.
+	 */
+	static const uint64_t R2[] = {
+		0x0000000000000003,
+		0xFFFFFFFBFFFFFFFF,
+		0xFFFFFFFFFFFFFFFE,
+		0x00000004FFFFFFFD
+	};
+
+	f256_montymul(d, a, R2);
+}
+
+/*
+ * Convert from Montgomery representation.
+ */
+static void
+f256_frommonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * Montgomery multiplication by 1 is division by 2^256 modulo p.
+	 */
+	static const uint64_t one[] = { 1, 0, 0, 0 };
+
+	f256_montymul(d, a, one);
+}
+
+/*
+ * Inversion in the field. If the source value is 0 modulo p, then this
+ * returns 0 or p. This function uses Montgomery representation.
+ */
+static void
+f256_invert(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * We compute a^(p-2) mod p. The exponent pattern (from high to
+	 * low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * To speed up the square-and-multiply algorithm, we precompute
+	 * a^(2^31-1).
+	 */
+
+	uint64_t r[4], t[4];
+	int i;
+
+	memcpy(t, a, sizeof t);
+	for (i = 0; i < 30; i ++) {
+		f256_montysquare(t, t);
+		f256_montymul(t, t, a);
+	}
+
+	memcpy(r, t, sizeof t);
+	for (i = 224; i >= 0; i --) {
+		f256_montysquare(r, r);
+		switch (i) {
+		case 0:
+		case 2:
+		case 192:
+		case 224:
+			f256_montymul(r, r, a);
+			break;
+		case 3:
+		case 34:
+		case 65:
+			f256_montymul(r, r, t);
+			break;
+		}
+	}
+	memcpy(d, r, sizeof r);
+}
+
+/*
+ * Finalize reduction.
+ * Input value fits on 256 bits. This function subtracts p if and only
+ * if the input is greater than or equal to p.
+ */
+static inline void
+f256_final_reduce(uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned __int128 z;
+
+	/*
+	 * We add 2^224 - 2^192 - 2^96 + 1 to a. If there is no carry,
+	 * then a < p; otherwise, the addition result we computed is
+	 * the value we must return.
+	 */
+	z = (unsigned __int128)a[0] + 1;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (z >> 64) - ((uint64_t)1 << 32);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] - (z >> 127);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] - (z >> 127) + 0xFFFFFFFF;
+	t3 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+
+	a[0] ^= cc & (a[0] ^ t0);
+	a[1] ^= cc & (a[1] ^ t1);
+	a[2] ^= cc & (a[2] ^ t2);
+	a[3] ^= cc & (a[3] ^ t3);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned char k;
+
+	k = _addcarry_u64(0, a[0], (uint64_t)1, &t0);
+	k = _addcarry_u64(k, a[1], -((uint64_t)1 << 32), &t1);
+	k = _addcarry_u64(k, a[2], -(uint64_t)1, &t2);
+	k = _addcarry_u64(k, a[3], ((uint64_t)1 << 32) - 2, &t3);
+	m = -(uint64_t)k;
+
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#endif
+}
+
+/*
+ * Points in affine and Jacobian coordinates.
+ *
+ *  - In affine coordinates, the point-at-infinity cannot be encoded.
+ *  - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
+ *    if Z = 0 then this is the point-at-infinity.
+ */
+typedef struct {
+	uint64_t x[4];
+	uint64_t y[4];
+} p256_affine;
+
+typedef struct {
+	uint64_t x[4];
+	uint64_t y[4];
+	uint64_t z[4];
+} p256_jacobian;
+
+/*
+ * Decode a point. The returned point is in Jacobian coordinates, but
+ * with z = 1. If the encoding is invalid, or encodes a point which is
+ * not on the curve, or encodes the point at infinity, then this function
+ * returns 0. Otherwise, 1 is returned.
+ *
+ * The buffer is assumed to have length exactly 65 bytes.
+ */
+static uint32_t
+point_decode(p256_jacobian *P, const unsigned char *buf)
+{
+	uint64_t x[4], y[4], t[4], x3[4], tt;
+	uint32_t r;
+
+	/*
+	 * Header byte shall be 0x04.
+	 */
+	r = EQ(buf[0], 0x04);
+
+	/*
+	 * Decode X and Y coordinates, and convert them into
+	 * Montgomery representation.
+	 */
+	x[3] = br_dec64be(buf +  1);
+	x[2] = br_dec64be(buf +  9);
+	x[1] = br_dec64be(buf + 17);
+	x[0] = br_dec64be(buf + 25);
+	y[3] = br_dec64be(buf + 33);
+	y[2] = br_dec64be(buf + 41);
+	y[1] = br_dec64be(buf + 49);
+	y[0] = br_dec64be(buf + 57);
+	f256_tomonty(x, x);
+	f256_tomonty(y, y);
+
+	/*
+	 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3.
+	 * Note that the Montgomery representation of 0 is 0. We must
+	 * take care to apply the final reduction to make sure we have
+	 * 0 and not p.
+	 */
+	f256_montysquare(t, y);
+	f256_montysquare(x3, x);
+	f256_montymul(x3, x3, x);
+	f256_sub(t, t, x3);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_sub(t, t, P256_B_MONTY);
+	f256_final_reduce(t);
+	tt = t[0] | t[1] | t[2] | t[3];
+	r &= EQ((uint32_t)(tt | (tt >> 32)), 0);
+
+	/*
+	 * Return the point in Jacobian coordinates (and Montgomery
+	 * representation).
+	 */
+	memcpy(P->x, x, sizeof x);
+	memcpy(P->y, y, sizeof y);
+	memcpy(P->z, F256_R, sizeof F256_R);
+	return r;
+}
+
+/*
+ * Final conversion for a point:
+ *  - The point is converted back to affine coordinates.
+ *  - Final reduction is performed.
+ *  - The point is encoded into the provided buffer.
+ *
+ * If the point is the point-at-infinity, all operations are performed,
+ * but the buffer contents are indeterminate, and 0 is returned. Otherwise,
+ * the encoded point is written in the buffer, and 1 is returned.
+ */
+static uint32_t
+point_encode(unsigned char *buf, const p256_jacobian *P)
+{
+	uint64_t t1[4], t2[4], z;
+
+	/* Set t1 = 1/z^2 and t2 = 1/z^3. */
+	f256_invert(t2, P->z);
+	f256_montysquare(t1, t2);
+	f256_montymul(t2, t2, t1);
+
+	/* Compute affine coordinates x (in t1) and y (in t2). */
+	f256_montymul(t1, P->x, t1);
+	f256_montymul(t2, P->y, t2);
+
+	/* Convert back from Montgomery representation, and finalize
+	   reductions. */
+	f256_frommonty(t1, t1);
+	f256_frommonty(t2, t2);
+	f256_final_reduce(t1);
+	f256_final_reduce(t2);
+
+	/* Encode. */
+	buf[0] = 0x04;
+	br_enc64be(buf +  1, t1[3]);
+	br_enc64be(buf +  9, t1[2]);
+	br_enc64be(buf + 17, t1[1]);
+	br_enc64be(buf + 25, t1[0]);
+	br_enc64be(buf + 33, t2[3]);
+	br_enc64be(buf + 41, t2[2]);
+	br_enc64be(buf + 49, t2[1]);
+	br_enc64be(buf + 57, t2[0]);
+
+	/* Return success if and only if P->z != 0. */
+	z = P->z[0] | P->z[1] | P->z[2] | P->z[3];
+	return NEQ((uint32_t)(z | z >> 32), 0);
+}
+
+/*
+ * Point doubling in Jacobian coordinates: point P is doubled.
+ * Note: if the source point is the point-at-infinity, then the result is
+ * still the point-at-infinity, which is correct. Moreover, if the three
+ * coordinates were zero, then they still are zero in the returned value.
+ *
+ * (Note: this is true even without the final reduction: if the three
+ * coordinates are encoded as four words of value zero each, then the
+ * result will also have all-zero coordinate encodings, not the alternate
+ * encoding as the integer p.)
+ */
+static void
+p256_double(p256_jacobian *P)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4];
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	f256_montysquare(t1, P->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	f256_add(t2, P->x, t1);
+	f256_sub(t1, P->x, t1);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	f256_montymul(t3, t1, t2);
+	f256_add(t1, t3, t3);
+	f256_add(t1, t3, t1);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	f256_montysquare(t3, P->y);
+	f256_add(t3, t3, t3);
+	f256_montymul(t2, P->x, t3);
+	f256_add(t2, t2, t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	f256_montysquare(P->x, t1);
+	f256_sub(P->x, P->x, t2);
+	f256_sub(P->x, P->x, t2);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	f256_montymul(t4, P->y, P->z);
+	f256_add(P->z, t4, t4);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	f256_sub(t2, t2, P->x);
+	f256_montymul(P->y, t1, t2);
+	f256_montysquare(t4, t3);
+	f256_add(t4, t4, t4);
+	f256_sub(P->y, P->y, t4);
+}
+
+/*
+ * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate.
+ *   - P1 == 0 and P2 == 0.
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Note that you can get a returned value of 0 with a correct result,
+ * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	f256_montysquare(t3, P2->z);
+	f256_montymul(t1, P1->x, t3);
+	f256_montymul(t4, P2->z, t3);
+	f256_montymul(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(t1, P1->z, P2->z);
+	f256_montymul(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Point addition (mixed coordinates): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y (affine) coordinate.
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Again, a value of 0 may be returned in some cases where the addition
+ * result is correct.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+#if 0
+/* unused */
+/*
+ * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function returns the correct result in all cases.
+ */
+static uint32_t
+p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas, in the general case, are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 *
+	 * These formulas mishandle the two following cases:
+	 *
+	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
+	 *    incorrectly set to 0.
+	 *
+	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
+	 *    are all set to 0.
+	 *
+	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
+	 * we correctly get z3 = 0 (the point-at-infinity).
+	 *
+	 * To fix the case P1 = 0, we perform at the end a copy of P2
+	 * over P1, conditional to z1 = 0.
+	 *
+	 * For P1 = P2: in that case, both h and r are set to 0, and
+	 * we get x3, y3 and z3 equal to 0. We can test for that
+	 * occurrence to make a mask which will be all-one if P1 = P2,
+	 * or all-zero otherwise; then we can compute the double of P2
+	 * and add it, combined with the mask, to (x3,y3,z3).
+	 *
+	 * Using the doubling formulas in p256_double() on (x2,y2),
+	 * simplifying since P2 is affine (i.e. z2 = 1, implicitly),
+	 * we get:
+	 *   s = 4*x2*y2^2
+	 *   m = 3*(x2 + 1)*(x2 - 1)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y2^4
+	 *   z' = 2*y2
+	 * which requires only 6 multiplications. Added to the 11
+	 * multiplications of the normal mixed addition in Jacobian
+	 * coordinates, we get a cost of 17 multiplications in total.
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt, zz;
+	int i;
+
+	/*
+	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
+	 */
+	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3];
+	zz = ((zz | -zz) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+
+	/*
+	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
+	 * the mask tt to -1; otherwise, the mask will be 0.
+	 */
+	f256_final_reduce(t2);
+	f256_final_reduce(t4);
+	tt = t2[0] | t2[1] | t2[2] | t2[3] | t4[0] | t4[1] | t4[2] | t4[3];
+	tt = ((tt | -tt) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	/*
+	 * The "double" result, in case P1 = P2.
+	 */
+
+	/*
+	 * Compute z' = 2*y2 (in t1).
+	 */
+	f256_add(t1, P2->y, P2->y);
+
+	/*
+	 * Compute 2*(y2^2) (in t2) and s = 4*x2*(y2^2) (in t3).
+	 */
+	f256_montysquare(t2, P2->y);
+	f256_add(t2, t2, t2);
+	f256_add(t3, t2, t2);
+	f256_montymul(t3, P2->x, t3);
+
+	/*
+	 * Compute m = 3*(x2^2 - 1) (in t4).
+	 */
+	f256_montysquare(t4, P2->x);
+	f256_sub(t4, t4, F256_R);
+	f256_add(t5, t4, t4);
+	f256_add(t4, t4, t5);
+
+	/*
+	 * Compute x' = m^2 - 2*s (in t5).
+	 */
+	f256_montysquare(t5, t4);
+	f256_sub(t5, t3);
+	f256_sub(t5, t3);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
+	 */
+	f256_sub(t6, t3, t5);
+	f256_montymul(t6, t6, t4);
+	f256_montysquare(t7, t2);
+	f256_sub(t6, t6, t7);
+	f256_sub(t6, t6, t7);
+
+	/*
+	 * We now have the alternate (doubling) coordinates in (t5,t6,t1).
+	 * We combine them with (x3,y3,z3).
+	 */
+	for (i = 0; i < 4; i ++) {
+		P1->x[i] |= tt & t5[i];
+		P1->y[i] |= tt & t6[i];
+		P1->z[i] |= tt & t1[i];
+	}
+
+	/*
+	 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
+	 * then we want to replace the result with a copy of P2. The
+	 * test on z1 was done at the start, in the zz mask.
+	 */
+	for (i = 0; i < 4; i ++) {
+		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
+		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
+		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
+	}
+}
+#endif
+
+/*
+ * Inner function for computing a point multiplication. A window is
+ * provided, with points 1*P to 15*P in affine coordinates.
+ *
+ * Assumptions:
+ *  - All provided points are valid points on the curve.
+ *  - Multiplier is non-zero, and smaller than the curve order.
+ *  - Everything is in Montgomery representation.
+ */
+static void
+point_mul_inner(p256_jacobian *R, const p256_affine *W,
+	const unsigned char *k, size_t klen)
+{
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (klen -- > 0) {
+		int i;
+		unsigned bk;
+
+		bk = *k ++;
+		for (i = 0; i < 2; i ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_affine T;
+			p256_jacobian U;
+			uint32_t n;
+			int j;
+			uint64_t m;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bk >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+
+			/*
+			 * Lookup point in window. If the bits are 0,
+			 * we get something invalid, which is not a
+			 * problem because we will use it only if the
+			 * bits are non-zero.
+			 */
+			memset(&T, 0, sizeof T);
+			for (n = 0; n < 15; n ++) {
+				m = -(uint64_t)EQ(bits, n + 1);
+				T.x[0] |= m & W[n].x[0];
+				T.x[1] |= m & W[n].x[1];
+				T.x[2] |= m & W[n].x[2];
+				T.x[3] |= m & W[n].x[3];
+				T.y[0] |= m & W[n].y[0];
+				T.y[1] |= m & W[n].y[1];
+				T.y[2] |= m & W[n].y[2];
+				T.y[3] |= m & W[n].y[3];
+			}
+
+			U = Q;
+			p256_add_mixed(&U, &T);
+
+			/*
+			 * If qz is still 1, then Q was all-zeros, and this
+			 * is conserved through p256_double().
+			 */
+			m = -(uint64_t)(bnz & qz);
+			for (j = 0; j < 4; j ++) {
+				Q.x[j] |= m & T.x[j];
+				Q.y[j] |= m & T.y[j];
+				Q.z[j] |= m & F256_R[j];
+			}
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bk <<= 4;
+		}
+	}
+	*R = Q;
+}
+
+/*
+ * Convert a window from Jacobian to affine coordinates. A single
+ * field inversion is used. This function works for windows up to
+ * 32 elements.
+ *
+ * The destination array (aff[]) and the source array (jac[]) may
+ * overlap, provided that the start of aff[] is not after the start of
+ * jac[]. Even if the arrays do _not_ overlap, the source array is
+ * modified.
+ */
+static void
+window_to_affine(p256_affine *aff, p256_jacobian *jac, int num)
+{
+	/*
+	 * Convert the window points to affine coordinates. We use the
+	 * following trick to mutualize the inversion computation: if
+	 * we have z1, z2, z3, and z4, and want to inverse all of them,
+	 * we compute u = 1/(z1*z2*z3*z4), and then we have:
+	 *   1/z1 = u*z2*z3*z4
+	 *   1/z2 = u*z1*z3*z4
+	 *   1/z3 = u*z1*z2*z4
+	 *   1/z4 = u*z1*z2*z3
+	 *
+	 * The partial products are computed recursively:
+	 *
+	 *  - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2
+	 *  - on input (z_1,z_2,... z_n):
+	 *       recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1
+	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2
+	 *       multiply elements of r1 by m2 -> s1
+	 *       multiply elements of r2 by m1 -> s2
+	 *       return r1||r2 and m1*m2
+	 *
+	 * In the example below, we suppose that we have 14 elements.
+	 * Let z1, z2,... zE be the 14 values to invert (index noted in
+	 * hexadecimal, starting at 1).
+	 *
+	 *  - Depth 1:
+	 *      swap(z1, z2); z12 = z1*z2
+	 *      swap(z3, z4); z34 = z3*z4
+	 *      swap(z5, z6); z56 = z5*z6
+	 *      swap(z7, z8); z78 = z7*z8
+	 *      swap(z9, zA); z9A = z9*zA
+	 *      swap(zB, zC); zBC = zB*zC
+	 *      swap(zD, zE); zDE = zD*zE
+	 *
+	 *  - Depth 2:
+	 *      z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12
+	 *      z1234 = z12*z34
+	 *      z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56
+	 *      z5678 = z56*z78
+	 *      z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A
+	 *      z9ABC = z9A*zBC
+	 *
+	 *  - Depth 3:
+	 *      z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678
+	 *      z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234
+	 *      z12345678 = z1234*z5678
+	 *      z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE
+	 *      zD <- zD*z9ABC, zE*z9ABC
+	 *      z9ABCDE = z9ABC*zDE
+	 *
+	 *  - Depth 4:
+	 *      multiply z1..z8 by z9ABCDE
+	 *      multiply z9..zE by z12345678
+	 *      final z = z12345678*z9ABCDE
+	 */
+
+	uint64_t z[16][4];
+	int i, k, s;
+#define zt   (z[15])
+#define zu   (z[14])
+#define zv   (z[13])
+
+	/*
+	 * First recursion step (pairwise swapping and multiplication).
+	 * If there is an odd number of elements, then we "invent" an
+	 * extra one with coordinate Z = 1 (in Montgomery representation).
+	 */
+	for (i = 0; (i + 1) < num; i += 2) {
+		memcpy(zt, jac[i].z, sizeof zt);
+		memcpy(jac[i].z, jac[i + 1].z, sizeof zt);
+		memcpy(jac[i + 1].z, zt, sizeof zt);
+		f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z);
+	}
+	if ((num & 1) != 0) {
+		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);
+		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);
+	}
+
+	/*
+	 * Perform further recursion steps. At the entry of each step,
+	 * the process has been done for groups of 's' points. The
+	 * integer k is the log2 of s.
+	 */
+	for (k = 1, s = 2; s < num; k ++, s <<= 1) {
+		int n;
+
+		for (i = 0; i < num; i ++) {
+			f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]);
+		}
+		n = (num + s - 1) >> k;
+		for (i = 0; i < (n >> 1); i ++) {
+			f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]);
+		}
+		if ((n & 1) != 0) {
+			memmove(z[n >> 1], z[n], sizeof zt);
+		}
+	}
+
+	/*
+	 * Invert the final result, and convert all points.
+	 */
+	f256_invert(zt, z[0]);
+	for (i = 0; i < num; i ++) {
+		f256_montymul(zv, jac[i].z, zt);
+		f256_montysquare(zu, zv);
+		f256_montymul(zv, zv, zu);
+		f256_montymul(aff[i].x, jac[i].x, zu);
+		f256_montymul(aff[i].y, jac[i].y, zv);
+	}
+}
+
+/*
+ * Multiply the provided point by an integer.
+ * Assumptions:
+ *  - Source point is a valid curve point.
+ *  - Source point is not the point-at-infinity.
+ *  - Integer is not 0, and is lower than the curve order.
+ * If these conditions are not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	union {
+		p256_affine aff[15];
+		p256_jacobian jac[15];
+	} window;
+	int i;
+
+	/*
+	 * Compute window, in Jacobian coordinates.
+	 */
+	window.jac[0] = *P;
+	for (i = 2; i < 16; i ++) {
+		window.jac[i - 1] = window.jac[(i >> 1) - 1];
+		if ((i & 1) == 0) {
+			p256_double(&window.jac[i - 1]);
+		} else {
+			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);
+		}
+	}
+
+	/*
+	 * Convert the window points to affine coordinates. Point
+	 * window[0] is the source point, already in affine coordinates.
+	 */
+	window_to_affine(window.aff, window.jac, 15);
+
+	/*
+	 * Perform point multiplication.
+	 */
+	point_mul_inner(P, window.aff, k, klen);
+}
+
+/*
+ * Precomputed window for the conventional generator: P256_Gwin[n]
+ * contains (n+1)*G (affine coordinates, in Montgomery representation).
+ */
+static const p256_affine P256_Gwin[] = {
+	{
+		{ 0x79E730D418A9143C, 0x75BA95FC5FEDB601,
+		  0x79FB732B77622510, 0x18905F76A53755C6 },
+		{ 0xDDF25357CE95560A, 0x8B4AB8E4BA19E45C,
+		  0xD2E88688DD21F325, 0x8571FF1825885D85 }
+	},
+	{
+		{ 0x850046D410DDD64D, 0xAA6AE3C1A433827D,
+		  0x732205038D1490D9, 0xF6BB32E43DCF3A3B },
+		{ 0x2F3648D361BEE1A5, 0x152CD7CBEB236FF8,
+		  0x19A8FB0E92042DBE, 0x78C577510A5B8A3B }
+	},
+	{
+		{ 0xFFAC3F904EEBC127, 0xB027F84A087D81FB,
+		  0x66AD77DD87CBBC98, 0x26936A3FB6FF747E },
+		{ 0xB04C5C1FC983A7EB, 0x583E47AD0861FE1A,
+		  0x788208311A2EE98E, 0xD5F06A29E587CC07 }
+	},
+	{
+		{ 0x74B0B50D46918DCC, 0x4650A6EDC623C173,
+		  0x0CDAACACE8100AF2, 0x577362F541B0176B },
+		{ 0x2D96F24CE4CBABA6, 0x17628471FAD6F447,
+		  0x6B6C36DEE5DDD22E, 0x84B14C394C5AB863 }
+	},
+	{
+		{ 0xBE1B8AAEC45C61F5, 0x90EC649A94B9537D,
+		  0x941CB5AAD076C20C, 0xC9079605890523C8 },
+		{ 0xEB309B4AE7BA4F10, 0x73C568EFE5EB882B,
+		  0x3540A9877E7A1F68, 0x73A076BB2DD1E916 }
+	},
+	{
+		{ 0x403947373E77664A, 0x55AE744F346CEE3E,
+		  0xD50A961A5B17A3AD, 0x13074B5954213673 },
+		{ 0x93D36220D377E44B, 0x299C2B53ADFF14B5,
+		  0xF424D44CEF639F11, 0xA4C9916D4A07F75F }
+	},
+	{
+		{ 0x0746354EA0173B4F, 0x2BD20213D23C00F7,
+		  0xF43EAAB50C23BB08, 0x13BA5119C3123E03 },
+		{ 0x2847D0303F5B9D4D, 0x6742F2F25DA67BDD,
+		  0xEF933BDC77C94195, 0xEAEDD9156E240867 }
+	},
+	{
+		{ 0x27F14CD19499A78F, 0x462AB5C56F9B3455,
+		  0x8F90F02AF02CFC6B, 0xB763891EB265230D },
+		{ 0xF59DA3A9532D4977, 0x21E3327DCF9EBA15,
+		  0x123C7B84BE60BBF0, 0x56EC12F27706DF76 }
+	},
+	{
+		{ 0x75C96E8F264E20E8, 0xABE6BFED59A7A841,
+		  0x2CC09C0444C8EB00, 0xE05B3080F0C4E16B },
+		{ 0x1EB7777AA45F3314, 0x56AF7BEDCE5D45E3,
+		  0x2B6E019A88B12F1A, 0x086659CDFD835F9B }
+	},
+	{
+		{ 0x2C18DBD19DC21EC8, 0x98F9868A0FCF8139,
+		  0x737D2CD648250B49, 0xCC61C94724B3428F },
+		{ 0x0C2B407880DD9E76, 0xC43A8991383FBE08,
+		  0x5F7D2D65779BE5D2, 0x78719A54EB3B4AB5 }
+	},
+	{
+		{ 0xEA7D260A6245E404, 0x9DE407956E7FDFE0,
+		  0x1FF3A4158DAC1AB5, 0x3E7090F1649C9073 },
+		{ 0x1A7685612B944E88, 0x250F939EE57F61C8,
+		  0x0C0DAA891EAD643D, 0x68930023E125B88E }
+	},
+	{
+		{ 0x04B71AA7D2697768, 0xABDEDEF5CA345A33,
+		  0x2409D29DEE37385E, 0x4EE1DF77CB83E156 },
+		{ 0x0CAC12D91CBB5B43, 0x170ED2F6CA895637,
+		  0x28228CFA8ADE6D66, 0x7FF57C9553238ACA }
+	},
+	{
+		{ 0xCCC425634B2ED709, 0x0E356769856FD30D,
+		  0xBCBCD43F559E9811, 0x738477AC5395B759 },
+		{ 0x35752B90C00EE17F, 0x68748390742ED2E3,
+		  0x7CD06422BD1F5BC1, 0xFBC08769C9E7B797 }
+	},
+	{
+		{ 0xA242A35BB0CF664A, 0x126E48F77F9707E3,
+		  0x1717BF54C6832660, 0xFAAE7332FD12C72E },
+		{ 0x27B52DB7995D586B, 0xBE29569E832237C2,
+		  0xE8E4193E2A65E7DB, 0x152706DC2EAA1BBB }
+	},
+	{
+		{ 0x72BCD8B7BC60055B, 0x03CC23EE56E27E4B,
+		  0xEE337424E4819370, 0xE2AA0E430AD3DA09 },
+		{ 0x40B8524F6383C45D, 0xD766355442A41B25,
+		  0x64EFA6DE778A4797, 0x2042170A7079ADF4 }
+	}
+};
+
+/*
+ * Multiply the conventional generator of the curve by the provided
+ * integer. Return is written in *P.
+ *
+ * Assumptions:
+ *  - Integer is not 0, and is lower than the curve order.
+ * If this conditions is not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	point_mul_inner(P, P256_Gwin, k, klen);
+}
+
+/*
+ * Return 1 if all of the following hold:
+ *  - klen <= 32
+ *  - k != 0
+ *  - k is lower than the curve order
+ * Otherwise, return 0.
+ *
+ * Constant-time behaviour: only klen may be observable.
+ */
+static uint32_t
+check_scalar(const unsigned char *k, size_t klen)
+{
+	uint32_t z;
+	int32_t c;
+	size_t u;
+
+	if (klen > 32) {
+		return 0;
+	}
+	z = 0;
+	for (u = 0; u < klen; u ++) {
+		z |= k[u];
+	}
+	if (klen == 32) {
+		c = 0;
+		for (u = 0; u < klen; u ++) {
+			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);
+		}
+	} else {
+		c = -1;
+	}
+	return NEQ(z, 0) & LT0(c);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *k, size_t klen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = check_scalar(k, klen);
+	r &= point_decode(&P, G);
+	p256_mul(&P, k, klen);
+	r &= point_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *k, size_t klen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, k, klen);
+	point_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We might want to use Shamir's trick here: make a composite
+	 * window of u*P+v*Q points, to merge the two doubling-ladders
+	 * into one. This, however, has some complications:
+	 *
+	 *  - During the computation, we may hit the point-at-infinity.
+	 *    Thus, we would need p256_add_complete_mixed() (complete
+	 *    formulas for point addition), with a higher cost (17 muls
+	 *    instead of 11).
+	 *
+	 *  - A 4-bit window would be too large, since it would involve
+	 *    16*16-1 = 255 points. For the same window size as in the
+	 *    p256_mul() case, we would need to reduce the window size
+	 *    to 2 bits, and thus perform twice as many non-doubling
+	 *    point additions.
+	 *
+	 *  - The window may itself contain the point-at-infinity, and
+	 *    thus cannot be in all generality be made of affine points.
+	 *    Instead, we would need to make it a window of points in
+	 *    Jacobian coordinates. Even p256_add_complete_mixed() would
+	 *    be inappropriate.
+	 *
+	 * For these reasons, the code below performs two separate
+	 * point multiplications, then computes the final point addition
+	 * (which is both a "normal" addition, and a doubling, to handle
+	 * all cases).
+	 */
+
+	p256_jacobian P, Q;
+	uint32_t r, t, s;
+	uint64_t z;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = point_decode(&P, A);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= point_decode(&Q, B);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	f256_final_reduce(P.z);
+	z = P.z[0] | P.z[1] | P.z[2] | P.z[3];
+	s = EQ((uint32_t)(z | (z >> 32)), 0);
+	p256_double(&Q);
+
+	/*
+	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   s = 0, t = 0   return P (normal addition)
+	 *   s = 0, t = 1   return P (normal addition)
+	 *   s = 1, t = 0   return Q (a 'double' case)
+	 *   s = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(s & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P);
+	r &= ~(s & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m64 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m64_get(void)
+{
+	return &br_ec_p256_m64;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m64_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_prime_i15.c b/third_party/bearssl/src/ec_prime_i15.c
new file mode 100644
index 0000000..f86dbe6
--- /dev/null
+++ b/third_party/bearssl/src/ec_prime_i15.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves:
+ *   - field modulus p
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ *   - b*R mod p (b is the second curve equation parameter)
+ */
+
+static const uint16_t P256_P[] = {
+	0x0111,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
+	0x7FFF, 0x0001
+};
+
+static const uint16_t P256_R2[] = {
+	0x0111,
+	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
+	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
+	0x4FFF, 0x0000
+};
+
+static const uint16_t P256_B[] = {
+	0x0111,
+	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
+	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
+	0x0187, 0x0000
+};
+
+static const uint16_t P384_P[] = {
+	0x0199,
+	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
+	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x01FF
+};
+
+static const uint16_t P384_R2[] = {
+	0x0199,
+	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
+	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
+	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000
+};
+
+static const uint16_t P384_B[] = {
+	0x0199,
+	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
+	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
+	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
+	0x0452, 0x0084
+};
+
+static const uint16_t P521_P[] = {
+	0x022B,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x07FF
+};
+
+static const uint16_t P521_R2[] = {
+	0x022B,
+	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000
+};
+
+static const uint16_t P521_B[] = {
+	0x022B,
+	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
+	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
+	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
+	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
+	0x1618, 0x27D7, 0x0465
+};
+
+typedef struct {
+	const uint16_t *p;
+	const uint16_t *b;
+	const uint16_t *R2;
+	uint16_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint16_t c[3][I15_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint16_t t[13][I15_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
+	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
+			break;
+		case 1:
+			ctl = br_i15_add(t[d], t[a], 1);
+			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
+			br_i15_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+			br_i15_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i15_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i15_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
+	return r;
+}
+
+static void
+set_one(uint16_t *x, const uint16_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 31) >> 4;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x0001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i15_encode(buf + 1, plen, Q.c[0]);
+	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	api_generator(curve, len);
+	*len >>= 1;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	if (Glen != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	point_encode(G, &P, cc);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independent point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	if (len != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, A, len, cc);
+	if (B == NULL) {
+		size_t Glen;
+
+		B = api_generator(curve, &Glen);
+	}
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i15_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i15 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_prime_i31.c b/third_party/bearssl/src/ec_prime_i31.c
new file mode 100644
index 0000000..b205f36
--- /dev/null
+++ b/third_party/bearssl/src/ec_prime_i31.c
@@ -0,0 +1,826 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves (field modulus, and 'b' equation
+ * parameter; both values use the 'i31' format, and 'b' is in Montgomery
+ * representation).
+ */
+
+static const uint32_t P256_P[] = {
+	0x00000108,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
+	0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
+	0x000000FF
+};
+
+static const uint32_t P256_R2[] = {
+	0x00000108,
+	0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
+	0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
+	0x00000000
+};
+
+static const uint32_t P256_B[] = {
+	0x00000108,
+	0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
+	0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
+	0x0000000E
+};
+
+static const uint32_t P384_P[] = {
+	0x0000018C,
+	0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
+	0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x00000FFF
+};
+
+static const uint32_t P384_R2[] = {
+	0x0000018C,
+	0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
+	0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
+	0x00008000, 0x00008000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+static const uint32_t P384_B[] = {
+	0x0000018C,
+	0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
+	0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
+	0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
+	0x000008A5
+};
+
+static const uint32_t P521_P[] = {
+	0x00000219,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x01FFFFFF
+};
+
+static const uint32_t P521_R2[] = {
+	0x00000219,
+	0x00001000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+static const uint32_t P521_B[] = {
+	0x00000219,
+	0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
+	0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
+	0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
+	0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
+	0x00654FAE
+};
+
+typedef struct {
+	const uint32_t *p;
+	const uint32_t *b;
+	const uint32_t *R2;
+	uint32_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x00000001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x00000001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x00000001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint32_t c[3][I31_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint32_t t[13][I31_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
+	memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
+			break;
+		case 1:
+			ctl = br_i31_add(t[d], t[a], 1);
+			ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
+			br_i31_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
+			br_i31_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i31_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i31_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
+	return r;
+}
+
+static void
+set_one(uint32_t *x, const uint32_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 63) >> 5;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x00000001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	uint32_t xbl;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	xbl = cc->p[0];
+	xbl -= (xbl >> 5);
+	plen = (xbl + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i31_encode(buf + 1, plen, Q.c[0]);
+	br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	api_generator(curve, len);
+	*len >>= 1;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	if (Glen != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	point_encode(G, &P, cc);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independent point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	if (len != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, A, len, cc);
+	if (B == NULL) {
+		size_t Glen;
+
+		B = api_generator(curve, &Glen);
+	}
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i31_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i31 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_pubkey.c b/third_party/bearssl/src/ec_pubkey.c
new file mode 100644
index 0000000..383ff28
--- /dev/null
+++ b/third_party/bearssl/src/ec_pubkey.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char POINT_LEN[] = {
+	  0,   /* 0: not a valid curve ID */
+	 43,   /* sect163k1 */
+	 43,   /* sect163r1 */
+	 43,   /* sect163r2 */
+	 51,   /* sect193r1 */
+	 51,   /* sect193r2 */
+	 61,   /* sect233k1 */
+	 61,   /* sect233r1 */
+	 61,   /* sect239k1 */
+	 73,   /* sect283k1 */
+	 73,   /* sect283r1 */
+	105,   /* sect409k1 */
+	105,   /* sect409r1 */
+	145,   /* sect571k1 */
+	145,   /* sect571r1 */
+	 41,   /* secp160k1 */
+	 41,   /* secp160r1 */
+	 41,   /* secp160r2 */
+	 49,   /* secp192k1 */
+	 49,   /* secp192r1 */
+	 57,   /* secp224k1 */
+	 57,   /* secp224r1 */
+	 65,   /* secp256k1 */
+	 65,   /* secp256r1 */
+	 97,   /* secp384r1 */
+	133,   /* secp521r1 */
+	 65,   /* brainpoolP256r1 */
+	 97,   /* brainpoolP384r1 */
+	129,   /* brainpoolP512r1 */
+	 32,   /* curve25519 */
+	 56,   /* curve448 */
+};
+
+/* see bearssl_ec.h */
+size_t
+br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
+	void *kbuf, const br_ec_private_key *sk)
+{
+	int curve;
+	size_t len;
+
+	curve = sk->curve;
+	if (curve < 0 || curve >= 32 || curve >= (int)(sizeof POINT_LEN)
+		|| ((impl->supported_curves >> curve) & 1) == 0)
+	{
+		return 0;
+	}
+	if (kbuf == NULL) {
+		return POINT_LEN[curve];
+	}
+	len = impl->mulgen(kbuf, sk->x, sk->xlen, curve);
+	if (pk != NULL) {
+		pk->curve = curve;
+		pk->q = kbuf;
+		pk->qlen = len;
+	}
+	return len;
+}
diff --git a/third_party/bearssl/src/ec_secp256r1.c b/third_party/bearssl/src/ec_secp256r1.c
new file mode 100644
index 0000000..a9d6c45
--- /dev/null
+++ b/third_party/bearssl/src/ec_secp256r1.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84,
+	0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
+};
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42,
+	0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40,
+	0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33,
+	0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2,
+	0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E,
+	0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E,
+	0xCE, 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51,
+	0xF5
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp256r1 = {
+	BR_EC_secp256r1,
+	P256_N, sizeof P256_N,
+	P256_G, sizeof P256_G
+};
diff --git a/third_party/bearssl/src/ec_secp384r1.c b/third_party/bearssl/src/ec_secp384r1.c
new file mode 100644
index 0000000..693d93e
--- /dev/null
+++ b/third_party/bearssl/src/ec_secp384r1.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P384_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, 
+	0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A, 
+	0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73
+};
+
+static const unsigned char P384_G[] = {
+	0x04, 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05,
+	0x37, 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD,
+	0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B,
+	0x98, 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A,
+	0x38, 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29,
+	0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A,
+	0xB7, 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C,
+	0x6F, 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC,
+	0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14,
+	0x7C, 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8,
+	0xC0, 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81,
+	0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E,
+	0x5F
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp384r1 = {
+	BR_EC_secp384r1,
+	P384_N, sizeof P384_N,
+	P384_G, sizeof P384_G
+};
diff --git a/third_party/bearssl/src/ec_secp521r1.c b/third_party/bearssl/src/ec_secp521r1.c
new file mode 100644
index 0000000..161acd0
--- /dev/null
+++ b/third_party/bearssl/src/ec_secp521r1.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P521_N[] = {
+	0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
+	0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
+	0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
+	0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
+	0x64, 0x09
+};
+
+static const unsigned char P521_G[] = {
+	0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04,
+	0x04, 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23,
+	0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05,
+	0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B,
+	0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF,
+	0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2,
+	0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85,
+	0x6A, 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2,
+	0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A,
+	0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F,
+	0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44,
+	0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD,
+	0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72,
+	0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9,
+	0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
+	0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94,
+	0x76, 0x9F, 0xD1, 0x66, 0x50
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp521r1 = {
+	BR_EC_secp521r1,
+	P521_N, sizeof P521_N,
+	P521_G, sizeof P521_G
+};
diff --git a/third_party/bearssl/src/ecdsa_atr.c b/third_party/bearssl/src/ecdsa_atr.c
new file mode 100644
index 0000000..3a11226
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_atr.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_asn1_to_raw(void *sig, size_t sig_len)
+{
+	/*
+	 * Note: this code is a bit lenient in that it accepts a few
+	 * deviations to DER with regards to minimality of encoding of
+	 * lengths and integer values. These deviations are still
+	 * unambiguous.
+	 *
+	 * Signature format is a SEQUENCE of two INTEGER values. We
+	 * support only integers of less than 127 bytes each (signed
+	 * encoding) so the resulting raw signature will have length
+	 * at most 254 bytes.
+	 */
+
+	unsigned char *buf, *r, *s;
+	size_t zlen, rlen, slen, off;
+	unsigned char tmp[254];
+
+	buf = sig;
+	if (sig_len < 8) {
+		return 0;
+	}
+
+	/*
+	 * First byte is SEQUENCE tag.
+	 */
+	if (buf[0] != 0x30) {
+		return 0;
+	}
+
+	/*
+	 * The SEQUENCE length will be encoded over one or two bytes. We
+	 * limit the total SEQUENCE contents to 255 bytes, because it
+	 * makes things simpler; this is enough for subgroup orders up
+	 * to 999 bits.
+	 */
+	zlen = buf[1];
+	if (zlen > 0x80) {
+		if (zlen != 0x81) {
+			return 0;
+		}
+		zlen = buf[2];
+		if (zlen != sig_len - 3) {
+			return 0;
+		}
+		off = 3;
+	} else {
+		if (zlen != sig_len - 2) {
+			return 0;
+		}
+		off = 2;
+	}
+
+	/*
+	 * First INTEGER (r).
+	 */
+	if (buf[off ++] != 0x02) {
+		return 0;
+	}
+	rlen = buf[off ++];
+	if (rlen >= 0x80) {
+		return 0;
+	}
+	r = buf + off;
+	off += rlen;
+
+	/*
+	 * Second INTEGER (s).
+	 */
+	if (off + 2 > sig_len) {
+		return 0;
+	}
+	if (buf[off ++] != 0x02) {
+		return 0;
+	}
+	slen = buf[off ++];
+	if (slen >= 0x80 || slen != sig_len - off) {
+		return 0;
+	}
+	s = buf + off;
+
+	/*
+	 * Removing leading zeros from r and s.
+	 */
+	while (rlen > 0 && *r == 0) {
+		rlen --;
+		r ++;
+	}
+	while (slen > 0 && *s == 0) {
+		slen --;
+		s ++;
+	}
+
+	/*
+	 * Compute common length for the two integers, then copy integers
+	 * into the temporary buffer, and finally copy it back over the
+	 * signature buffer.
+	 */
+	zlen = rlen > slen ? rlen : slen;
+	sig_len = zlen << 1;
+	memset(tmp, 0, sig_len);
+	memcpy(tmp + zlen - rlen, r, rlen);
+	memcpy(tmp + sig_len - slen, s, slen);
+	memcpy(sig, tmp, sig_len);
+	return sig_len;
+}
diff --git a/third_party/bearssl/src/ecdsa_default_sign_asn1.c b/third_party/bearssl/src/ecdsa_default_sign_asn1.c
new file mode 100644
index 0000000..afbf8ac
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_sign_asn1.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_sign
+br_ecdsa_sign_asn1_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_sign_asn1;
+#else
+	return &br_ecdsa_i31_sign_asn1;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_default_sign_raw.c b/third_party/bearssl/src/ecdsa_default_sign_raw.c
new file mode 100644
index 0000000..287c970
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_sign_raw.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_sign
+br_ecdsa_sign_raw_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_sign_raw;
+#else
+	return &br_ecdsa_i31_sign_raw;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c
new file mode 100644
index 0000000..fe0996e
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_vrfy
+br_ecdsa_vrfy_asn1_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_vrfy_asn1;
+#else
+	return &br_ecdsa_i31_vrfy_asn1;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_default_vrfy_raw.c b/third_party/bearssl/src/ecdsa_default_vrfy_raw.c
new file mode 100644
index 0000000..e564a10
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_vrfy_raw.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_vrfy
+br_ecdsa_vrfy_raw_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_vrfy_raw;
+#else
+	return &br_ecdsa_i31_vrfy_raw;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_bits.c b/third_party/bearssl/src/ecdsa_i15_bits.c
new file mode 100644
index 0000000..402d14a
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_bits.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ecdsa_i15_bits2int(uint16_t *x,
+	const void *src, size_t len, uint32_t ebitlen)
+{
+	uint32_t bitlen, hbitlen;
+	int sc;
+
+	bitlen = ebitlen - (ebitlen >> 4);
+	hbitlen = (uint32_t)len << 3;
+	if (hbitlen > bitlen) {
+		len = (bitlen + 7) >> 3;
+		sc = (int)((hbitlen - bitlen) & 7);
+	} else {
+		sc = 0;
+	}
+	br_i15_zero(x, ebitlen);
+	br_i15_decode(x, src, len);
+	br_i15_rshift(x, sc);
+	x[0] = ebitlen;
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_sign_asn1.c b/third_party/bearssl/src/ecdsa_i15_sign_asn1.c
new file mode 100644
index 0000000..ab4a283
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_sign_asn1.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	unsigned char rsig[(ORDER_LEN << 1) + 12];
+	size_t sig_len;
+
+	sig_len = br_ecdsa_i15_sign_raw(impl, hf, hash_value, sk, rsig);
+	if (sig_len == 0) {
+		return 0;
+	}
+	sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len);
+	memcpy(sig, rsig, sig_len);
+	return sig_len;
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_sign_raw.c b/third_party/bearssl/src/ecdsa_i15_sign_raw.c
new file mode 100644
index 0000000..39b2e1d
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_sign_raw.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I15_LEN     ((BR_MAX_EC_SIZE + 29) / 15)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 * We also rely on the last byte of the curve order to be distinct
+	 * from 0 and 1.
+	 */
+	const br_ec_curve_def *cd;
+	uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], x[I15_LEN];
+	uint16_t m[I15_LEN], k[I15_LEN], t1[I15_LEN], t2[I15_LEN];
+	unsigned char tt[ORDER_LEN << 1];
+	unsigned char eU[POINT_LEN];
+	size_t hash_len, nlen, ulen;
+	uint16_t n0i;
+	uint32_t ctl;
+	br_hmac_drbg_context drbg;
+
+	/*
+	 * If the curve is not supported, then exit with an error.
+	 */
+	if (((impl->supported_curves >> sk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (sk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Get modulus.
+	 */
+	nlen = cd->order_len;
+	br_i15_decode(n, cd->order, nlen);
+	n0i = br_i15_ninv15(n[1]);
+
+	/*
+	 * Get private key as an i15 integer. This also checks that the
+	 * private key is well-defined (not zero, and less than the
+	 * curve order).
+	 */
+	if (!br_i15_decode_mod(x, sk->x, sk->xlen, n)) {
+		return 0;
+	}
+	if (br_i15_iszero(x)) {
+		return 0;
+	}
+
+	/*
+	 * Get hash length.
+	 */
+	hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+
+	/*
+	 * Truncate and reduce the hash value modulo the curve order.
+	 */
+	br_ecdsa_i15_bits2int(m, hash_value, hash_len, n[0]);
+	br_i15_sub(m, n, br_i15_sub(m, n, 0) ^ 1);
+
+	/*
+	 * RFC 6979 generation of the "k" value.
+	 *
+	 * The process uses HMAC_DRBG (with the hash function used to
+	 * process the message that is to be signed). The seed is the
+	 * concatenation of the encodings of the private key and
+	 * the hash value (after truncation and modular reduction).
+	 */
+	br_i15_encode(tt, nlen, x);
+	br_i15_encode(tt + nlen, nlen, m);
+	br_hmac_drbg_init(&drbg, hf, tt, nlen << 1);
+	for (;;) {
+		br_hmac_drbg_generate(&drbg, tt, nlen);
+		br_ecdsa_i15_bits2int(k, tt, nlen, n[0]);
+		if (br_i15_iszero(k)) {
+			continue;
+		}
+		if (br_i15_sub(k, n, 0)) {
+			break;
+		}
+	}
+
+	/*
+	 * Compute k*G and extract the X coordinate, then reduce it
+	 * modulo the curve order. Since we support only curves with
+	 * prime order, that reduction is only a matter of computing
+	 * a subtraction.
+	 */
+	br_i15_encode(tt, nlen, k);
+	ulen = impl->mulgen(eU, tt, nlen, sk->curve);
+	br_i15_zero(r, n[0]);
+	br_i15_decode(r, &eU[1], ulen >> 1);
+	r[0] = n[0];
+	br_i15_sub(r, n, br_i15_sub(r, n, 0) ^ 1);
+
+	/*
+	 * Compute 1/k in double-Montgomery representation. We do so by
+	 * first converting _from_ Montgomery representation (twice),
+	 * then using a modular exponentiation.
+	 */
+	br_i15_from_monty(k, n, n0i);
+	br_i15_from_monty(k, n, n0i);
+	memcpy(tt, cd->order, nlen);
+	tt[nlen - 1] -= 2;
+	br_i15_modpow(k, tt, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Compute s = (m+xr)/k (mod n).
+	 * The k[] array contains R^2/k (double-Montgomery representation);
+	 * we thus can use direct Montgomery multiplications and conversions
+	 * from Montgomery, avoiding any call to br_i15_to_monty() (which
+	 * is slower).
+	 */
+	br_i15_from_monty(m, n, n0i);
+	br_i15_montymul(t1, x, r, n, n0i);
+	ctl = br_i15_add(t1, m, 1);
+	ctl |= br_i15_sub(t1, n, 0) ^ 1;
+	br_i15_sub(t1, n, ctl);
+	br_i15_montymul(s, t1, k, n, n0i);
+
+	/*
+	 * Encode r and s in the signature.
+	 */
+	br_i15_encode(sig, nlen, r);
+	br_i15_encode((unsigned char *)sig + nlen, nlen, s);
+	return nlen << 1;
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c
new file mode 100644
index 0000000..f4bef99
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define FIELD_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * We use a double-sized buffer because a malformed ASN.1 signature
+	 * may trigger a size expansion when converting to "raw" format.
+	 */
+	unsigned char rsig[(FIELD_LEN << 2) + 24];
+
+	if (sig_len > ((sizeof rsig) >> 1)) {
+		return 0;
+	}
+	memcpy(rsig, sig, sig_len);
+	sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len);
+	return br_ecdsa_i15_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len);
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c b/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c
new file mode 100644
index 0000000..14dd5e4
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I15_LEN     ((BR_MAX_EC_SIZE + 29) / 15)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 */
+	const br_ec_curve_def *cd;
+	uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], t1[I15_LEN], t2[I15_LEN];
+	unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char eU[POINT_LEN];
+	size_t nlen, rlen, ulen;
+	uint16_t n0i;
+	uint32_t res;
+
+	/*
+	 * If the curve is not supported, then report an error.
+	 */
+	if (((impl->supported_curves >> pk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (pk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Signature length must be even.
+	 */
+	if (sig_len & 1) {
+		return 0;
+	}
+	rlen = sig_len >> 1;
+
+	/*
+	 * Public key point must have the proper size for this curve.
+	 */
+	if (pk->qlen != cd->generator_len) {
+		return 0;
+	}
+
+	/*
+	 * Get modulus; then decode the r and s values. They must be
+	 * lower than the modulus, and s must not be null.
+	 */
+	nlen = cd->order_len;
+	br_i15_decode(n, cd->order, nlen);
+	n0i = br_i15_ninv15(n[1]);
+	if (!br_i15_decode_mod(r, sig, rlen, n)) {
+		return 0;
+	}
+	if (!br_i15_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) {
+		return 0;
+	}
+	if (br_i15_iszero(s)) {
+		return 0;
+	}
+
+	/*
+	 * Invert s. We do that with a modular exponentiation; we use
+	 * the fact that for all the curves we support, the least
+	 * significant byte is not 0 or 1, so we can subtract 2 without
+	 * any carry to process.
+	 * We also want 1/s in Montgomery representation, which can be
+	 * done by converting _from_ Montgomery representation before
+	 * the inversion (because (1/s)*R = 1/(s/R)).
+	 */
+	br_i15_from_monty(s, n, n0i);
+	memcpy(tx, cd->order, nlen);
+	tx[nlen - 1] -= 2;
+	br_i15_modpow(s, tx, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Truncate the hash to the modulus length (in bits) and reduce
+	 * it modulo the curve order. The modular reduction can be done
+	 * with a subtraction since the truncation already reduced the
+	 * value to the modulus bit length.
+	 */
+	br_ecdsa_i15_bits2int(t1, hash, hash_len, n[0]);
+	br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1);
+
+	/*
+	 * Multiply the (truncated, reduced) hash value with 1/s, result in
+	 * t2, encoded in ty.
+	 */
+	br_i15_montymul(t2, t1, s, n, n0i);
+	br_i15_encode(ty, nlen, t2);
+
+	/*
+	 * Multiply r with 1/s, result in t1, encoded in tx.
+	 */
+	br_i15_montymul(t1, r, s, n, n0i);
+	br_i15_encode(tx, nlen, t1);
+
+	/*
+	 * Compute the point x*Q + y*G.
+	 */
+	ulen = cd->generator_len;
+	memcpy(eU, pk->q, ulen);
+	res = impl->muladd(eU, NULL, ulen,
+		tx, nlen, ty, nlen, cd->curve);
+
+	/*
+	 * Get the X coordinate, reduce modulo the curve order, and
+	 * compare with the 'r' value.
+	 *
+	 * The modular reduction can be done with subtractions because
+	 * we work with curves of prime order, so the curve order is
+	 * close to the field order (Hasse's theorem).
+	 */
+	br_i15_zero(t1, n[0]);
+	br_i15_decode(t1, &eU[1], ulen >> 1);
+	t1[0] = n[0];
+	br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1);
+	res &= ~br_i15_sub(t1, r, 1);
+	res &= br_i15_iszero(t1);
+	return res;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_bits.c b/third_party/bearssl/src/ecdsa_i31_bits.c
new file mode 100644
index 0000000..9a8d673
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_bits.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ecdsa_i31_bits2int(uint32_t *x,
+	const void *src, size_t len, uint32_t ebitlen)
+{
+	uint32_t bitlen, hbitlen;
+	int sc;
+
+	bitlen = ebitlen - (ebitlen >> 5);
+	hbitlen = (uint32_t)len << 3;
+	if (hbitlen > bitlen) {
+		len = (bitlen + 7) >> 3;
+		sc = (int)((hbitlen - bitlen) & 7);
+	} else {
+		sc = 0;
+	}
+	br_i31_zero(x, ebitlen);
+	br_i31_decode(x, src, len);
+	br_i31_rshift(x, sc);
+	x[0] = ebitlen;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_sign_asn1.c b/third_party/bearssl/src/ecdsa_i31_sign_asn1.c
new file mode 100644
index 0000000..cf0d351
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_sign_asn1.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i31_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	unsigned char rsig[(ORDER_LEN << 1) + 12];
+	size_t sig_len;
+
+	sig_len = br_ecdsa_i31_sign_raw(impl, hf, hash_value, sk, rsig);
+	if (sig_len == 0) {
+		return 0;
+	}
+	sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len);
+	memcpy(sig, rsig, sig_len);
+	return sig_len;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_sign_raw.c b/third_party/bearssl/src/ecdsa_i31_sign_raw.c
new file mode 100644
index 0000000..1df98fe
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_sign_raw.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I31_LEN     ((BR_MAX_EC_SIZE + 61) / 31)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i31_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 * We also rely on the last byte of the curve order to be distinct
+	 * from 0 and 1.
+	 */
+	const br_ec_curve_def *cd;
+	uint32_t n[I31_LEN], r[I31_LEN], s[I31_LEN], x[I31_LEN];
+	uint32_t m[I31_LEN], k[I31_LEN], t1[I31_LEN], t2[I31_LEN];
+	unsigned char tt[ORDER_LEN << 1];
+	unsigned char eU[POINT_LEN];
+	size_t hash_len, nlen, ulen;
+	uint32_t n0i, ctl;
+	br_hmac_drbg_context drbg;
+
+	/*
+	 * If the curve is not supported, then exit with an error.
+	 */
+	if (((impl->supported_curves >> sk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (sk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Get modulus.
+	 */
+	nlen = cd->order_len;
+	br_i31_decode(n, cd->order, nlen);
+	n0i = br_i31_ninv31(n[1]);
+
+	/*
+	 * Get private key as an i31 integer. This also checks that the
+	 * private key is well-defined (not zero, and less than the
+	 * curve order).
+	 */
+	if (!br_i31_decode_mod(x, sk->x, sk->xlen, n)) {
+		return 0;
+	}
+	if (br_i31_iszero(x)) {
+		return 0;
+	}
+
+	/*
+	 * Get hash length.
+	 */
+	hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+
+	/*
+	 * Truncate and reduce the hash value modulo the curve order.
+	 */
+	br_ecdsa_i31_bits2int(m, hash_value, hash_len, n[0]);
+	br_i31_sub(m, n, br_i31_sub(m, n, 0) ^ 1);
+
+	/*
+	 * RFC 6979 generation of the "k" value.
+	 *
+	 * The process uses HMAC_DRBG (with the hash function used to
+	 * process the message that is to be signed). The seed is the
+	 * concatenation of the encodings of the private key and
+	 * the hash value (after truncation and modular reduction).
+	 */
+	br_i31_encode(tt, nlen, x);
+	br_i31_encode(tt + nlen, nlen, m);
+	br_hmac_drbg_init(&drbg, hf, tt, nlen << 1);
+	for (;;) {
+		br_hmac_drbg_generate(&drbg, tt, nlen);
+		br_ecdsa_i31_bits2int(k, tt, nlen, n[0]);
+		if (br_i31_iszero(k)) {
+			continue;
+		}
+		if (br_i31_sub(k, n, 0)) {
+			break;
+		}
+	}
+
+	/*
+	 * Compute k*G and extract the X coordinate, then reduce it
+	 * modulo the curve order. Since we support only curves with
+	 * prime order, that reduction is only a matter of computing
+	 * a subtraction.
+	 */
+	br_i31_encode(tt, nlen, k);
+	ulen = impl->mulgen(eU, tt, nlen, sk->curve);
+	br_i31_zero(r, n[0]);
+	br_i31_decode(r, &eU[1], ulen >> 1);
+	r[0] = n[0];
+	br_i31_sub(r, n, br_i31_sub(r, n, 0) ^ 1);
+
+	/*
+	 * Compute 1/k in double-Montgomery representation. We do so by
+	 * first converting _from_ Montgomery representation (twice),
+	 * then using a modular exponentiation.
+	 */
+	br_i31_from_monty(k, n, n0i);
+	br_i31_from_monty(k, n, n0i);
+	memcpy(tt, cd->order, nlen);
+	tt[nlen - 1] -= 2;
+	br_i31_modpow(k, tt, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Compute s = (m+xr)/k (mod n).
+	 * The k[] array contains R^2/k (double-Montgomery representation);
+	 * we thus can use direct Montgomery multiplications and conversions
+	 * from Montgomery, avoiding any call to br_i31_to_monty() (which
+	 * is slower).
+	 */
+	br_i31_from_monty(m, n, n0i);
+	br_i31_montymul(t1, x, r, n, n0i);
+	ctl = br_i31_add(t1, m, 1);
+	ctl |= br_i31_sub(t1, n, 0) ^ 1;
+	br_i31_sub(t1, n, ctl);
+	br_i31_montymul(s, t1, k, n, n0i);
+
+	/*
+	 * Encode r and s in the signature.
+	 */
+	br_i31_encode(sig, nlen, r);
+	br_i31_encode((unsigned char *)sig + nlen, nlen, s);
+	return nlen << 1;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c
new file mode 100644
index 0000000..4161aaa
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define FIELD_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i31_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * We use a double-sized buffer because a malformed ASN.1 signature
+	 * may trigger a size expansion when converting to "raw" format.
+	 */
+	unsigned char rsig[(FIELD_LEN << 2) + 24];
+
+	if (sig_len > ((sizeof rsig) >> 1)) {
+		return 0;
+	}
+	memcpy(rsig, sig, sig_len);
+	sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len);
+	return br_ecdsa_i31_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len);
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c b/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c
new file mode 100644
index 0000000..259477f
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I31_LEN     ((BR_MAX_EC_SIZE + 61) / 31)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 */
+	const br_ec_curve_def *cd;
+	uint32_t n[I31_LEN], r[I31_LEN], s[I31_LEN], t1[I31_LEN], t2[I31_LEN];
+	unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char eU[POINT_LEN];
+	size_t nlen, rlen, ulen;
+	uint32_t n0i, res;
+
+	/*
+	 * If the curve is not supported, then report an error.
+	 */
+	if (((impl->supported_curves >> pk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (pk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Signature length must be even.
+	 */
+	if (sig_len & 1) {
+		return 0;
+	}
+	rlen = sig_len >> 1;
+
+	/*
+	 * Public key point must have the proper size for this curve.
+	 */
+	if (pk->qlen != cd->generator_len) {
+		return 0;
+	}
+
+	/*
+	 * Get modulus; then decode the r and s values. They must be
+	 * lower than the modulus, and s must not be null.
+	 */
+	nlen = cd->order_len;
+	br_i31_decode(n, cd->order, nlen);
+	n0i = br_i31_ninv31(n[1]);
+	if (!br_i31_decode_mod(r, sig, rlen, n)) {
+		return 0;
+	}
+	if (!br_i31_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) {
+		return 0;
+	}
+	if (br_i31_iszero(s)) {
+		return 0;
+	}
+
+	/*
+	 * Invert s. We do that with a modular exponentiation; we use
+	 * the fact that for all the curves we support, the least
+	 * significant byte is not 0 or 1, so we can subtract 2 without
+	 * any carry to process.
+	 * We also want 1/s in Montgomery representation, which can be
+	 * done by converting _from_ Montgomery representation before
+	 * the inversion (because (1/s)*R = 1/(s/R)).
+	 */
+	br_i31_from_monty(s, n, n0i);
+	memcpy(tx, cd->order, nlen);
+	tx[nlen - 1] -= 2;
+	br_i31_modpow(s, tx, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Truncate the hash to the modulus length (in bits) and reduce
+	 * it modulo the curve order. The modular reduction can be done
+	 * with a subtraction since the truncation already reduced the
+	 * value to the modulus bit length.
+	 */
+	br_ecdsa_i31_bits2int(t1, hash, hash_len, n[0]);
+	br_i31_sub(t1, n, br_i31_sub(t1, n, 0) ^ 1);
+
+	/*
+	 * Multiply the (truncated, reduced) hash value with 1/s, result in
+	 * t2, encoded in ty.
+	 */
+	br_i31_montymul(t2, t1, s, n, n0i);
+	br_i31_encode(ty, nlen, t2);
+
+	/*
+	 * Multiply r with 1/s, result in t1, encoded in tx.
+	 */
+	br_i31_montymul(t1, r, s, n, n0i);
+	br_i31_encode(tx, nlen, t1);
+
+	/*
+	 * Compute the point x*Q + y*G.
+	 */
+	ulen = cd->generator_len;
+	memcpy(eU, pk->q, ulen);
+	res = impl->muladd(eU, NULL, ulen,
+		tx, nlen, ty, nlen, cd->curve);
+
+	/*
+	 * Get the X coordinate, reduce modulo the curve order, and
+	 * compare with the 'r' value.
+	 *
+	 * The modular reduction can be done with subtractions because
+	 * we work with curves of prime order, so the curve order is
+	 * close to the field order (Hasse's theorem).
+	 */
+	br_i31_zero(t1, n[0]);
+	br_i31_decode(t1, &eU[1], ulen >> 1);
+	t1[0] = n[0];
+	br_i31_sub(t1, n, br_i31_sub(t1, n, 0) ^ 1);
+	res &= ~br_i31_sub(t1, r, 1);
+	res &= br_i31_iszero(t1);
+	return res;
+}
diff --git a/third_party/bearssl/src/ecdsa_rta.c b/third_party/bearssl/src/ecdsa_rta.c
new file mode 100644
index 0000000..005c62c
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_rta.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Compute ASN.1 encoded length for the provided integer. The ASN.1
+ * encoding is signed, so its leading bit must have value 0; it must
+ * also be of minimal length (so leading bytes of value 0 must be
+ * removed, except if that would contradict the rule about the sign
+ * bit).
+ */
+static size_t
+asn1_int_length(const unsigned char *x, size_t xlen)
+{
+	while (xlen > 0 && *x == 0) {
+		x ++;
+		xlen --;
+	}
+	if (xlen == 0 || *x >= 0x80) {
+		xlen ++;
+	}
+	return xlen;
+}
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_raw_to_asn1(void *sig, size_t sig_len)
+{
+	/*
+	 * Internal buffer is large enough to accommodate a signature
+	 * such that r and s fit on 125 bytes each (signed encoding),
+	 * meaning a curve order of up to 999 bits. This is the limit
+	 * that ensures "simple" length encodings.
+	 */
+	unsigned char *buf;
+	size_t hlen, rlen, slen, zlen, off;
+	unsigned char tmp[257];
+
+	buf = sig;
+	if ((sig_len & 1) != 0) {
+		return 0;
+	}
+
+	/*
+	 * Compute lengths for the two integers.
+	 */
+	hlen = sig_len >> 1;
+	rlen = asn1_int_length(buf, hlen);
+	slen = asn1_int_length(buf + hlen, hlen);
+	if (rlen > 125 || slen > 125) {
+		return 0;
+	}
+
+	/*
+	 * SEQUENCE header.
+	 */
+	tmp[0] = 0x30;
+	zlen = rlen + slen + 4;
+	if (zlen >= 0x80) {
+		tmp[1] = 0x81;
+		tmp[2] = zlen;
+		off = 3;
+	} else {
+		tmp[1] = zlen;
+		off = 2;
+	}
+
+	/*
+	 * First INTEGER (r).
+	 */
+	tmp[off ++] = 0x02;
+	tmp[off ++] = rlen;
+	if (rlen > hlen) {
+		tmp[off] = 0x00;
+		memcpy(tmp + off + 1, buf, hlen);
+	} else {
+		memcpy(tmp + off, buf + hlen - rlen, rlen);
+	}
+	off += rlen;
+
+	/*
+	 * Second INTEGER (s).
+	 */
+	tmp[off ++] = 0x02;
+	tmp[off ++] = slen;
+	if (slen > hlen) {
+		tmp[off] = 0x00;
+		memcpy(tmp + off + 1, buf + hlen, hlen);
+	} else {
+		memcpy(tmp + off, buf + sig_len - slen, slen);
+	}
+	off += slen;
+
+	/*
+	 * Return ASN.1 signature.
+	 */
+	memcpy(sig, tmp, off);
+	return off;
+}
diff --git a/third_party/bearssl/src/enc16be.c b/third_party/bearssl/src/enc16be.c
new file mode 100644
index 0000000..6e06652
--- /dev/null
+++ b/third_party/bearssl/src/enc16be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc16be(void *dst, const uint16_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc16be(buf, *v ++);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/enc16le.c b/third_party/bearssl/src/enc16le.c
new file mode 100644
index 0000000..3e5049a
--- /dev/null
+++ b/third_party/bearssl/src/enc16le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc16le(void *dst, const uint16_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc16le(buf, *v ++);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/enc32be.c b/third_party/bearssl/src/enc32be.c
new file mode 100644
index 0000000..97298b5
--- /dev/null
+++ b/third_party/bearssl/src/enc32be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc32be(void *dst, const uint32_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc32be(buf, *v ++);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/enc32le.c b/third_party/bearssl/src/enc32le.c
new file mode 100644
index 0000000..9e9c856
--- /dev/null
+++ b/third_party/bearssl/src/enc32le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc32le(void *dst, const uint32_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc32le(buf, *v ++);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/enc64be.c b/third_party/bearssl/src/enc64be.c
new file mode 100644
index 0000000..d548944
--- /dev/null
+++ b/third_party/bearssl/src/enc64be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc64be(void *dst, const uint64_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc64be(buf, *v ++);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/enc64le.c b/third_party/bearssl/src/enc64le.c
new file mode 100644
index 0000000..1f1d68e
--- /dev/null
+++ b/third_party/bearssl/src/enc64le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc64le(void *dst, const uint64_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc64le(buf, *v ++);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/encode_ec_pk8der.c b/third_party/bearssl/src/encode_ec_pk8der.c
new file mode 100644
index 0000000..53717ce
--- /dev/null
+++ b/third_party/bearssl/src/encode_ec_pk8der.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+size_t
+br_encode_ec_pkcs8_der(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   OneAsymmetricKey ::= SEQUENCE {
+	 *     version                   Version,
+	 *     privateKeyAlgorithm       PrivateKeyAlgorithmIdentifier,
+	 *     privateKey                PrivateKey,
+	 *     attributes            [0] Attributes OPTIONAL,
+	 *     ...,
+	 *     [[2: publicKey        [1] PublicKey OPTIONAL ]],
+	 *     ...
+	 *   }
+	 *
+	 * We don't include attributes or public key (the public key
+	 * is included in the private key value instead). The
+	 * 'version' field is an INTEGER that we will set to 0
+	 * (meaning 'v1', compatible with previous versions of PKCS#8).
+	 * The 'privateKeyAlgorithm' structure is an AlgorithmIdentifier
+	 * whose OID should be id-ecPublicKey, with, as parameters, the
+	 * curve OID. The 'privateKey' is an OCTET STRING, whose value
+	 * is the "raw DER" encoding of the key pair.
+	 */
+
+	/*
+	 * OID id-ecPublicKey (1.2.840.10045.2.1), DER-encoded (with
+	 * the tag).
+	 */
+	static const unsigned char OID_ECPUBKEY[] = {
+		0x06, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01
+	};
+
+	size_t len_version, len_privateKeyAlgorithm, len_privateKeyValue;
+	size_t len_privateKey, len_seq;
+	const unsigned char *oid;
+
+	oid = br_get_curve_OID(sk->curve);
+	if (oid == NULL) {
+		return 0;
+	}
+	len_version = 3;
+	len_privateKeyAlgorithm = 2 + sizeof OID_ECPUBKEY + 2 + oid[0];
+	len_privateKeyValue = br_encode_ec_raw_der_inner(NULL, sk, pk, 0);
+	len_privateKey = 1 + len_of_len(len_privateKeyValue)
+		+ len_privateKeyValue;
+	len_seq = len_version + len_privateKeyAlgorithm + len_privateKey;
+
+	if (dest == NULL) {
+		return 1 + len_of_len(len_seq) + len_seq;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, len_seq);
+		buf += lenlen;
+
+		/* version */
+		*buf ++ = 0x02;
+		*buf ++ = 0x01;
+		*buf ++ = 0x00;
+
+		/* privateKeyAlgorithm */
+		*buf ++ = 0x30;
+		*buf ++ = (sizeof OID_ECPUBKEY) + 2 + oid[0];
+		memcpy(buf, OID_ECPUBKEY, sizeof OID_ECPUBKEY);
+		buf += sizeof OID_ECPUBKEY;
+		*buf ++ = 0x06;
+		memcpy(buf, oid, 1 + oid[0]);
+		buf += 1 + oid[0];
+
+		/* privateKey */
+		*buf ++ = 0x04;
+		buf += br_asn1_encode_length(buf, len_privateKeyValue);
+		br_encode_ec_raw_der_inner(buf, sk, pk, 0);
+
+		return 1 + lenlen + len_seq;
+	}
+}
diff --git a/third_party/bearssl/src/encode_ec_rawder.c b/third_party/bearssl/src/encode_ec_rawder.c
new file mode 100644
index 0000000..5985909
--- /dev/null
+++ b/third_party/bearssl/src/encode_ec_rawder.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+const unsigned char *
+br_get_curve_OID(int curve)
+{
+	static const unsigned char OID_secp256r1[] = {
+		0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07
+	};
+	static const unsigned char OID_secp384r1[] = {
+		0x05, 0x2b, 0x81, 0x04, 0x00, 0x22
+	};
+	static const unsigned char OID_secp521r1[] = {
+		0x05, 0x2b, 0x81, 0x04, 0x00, 0x23
+	};
+
+	switch (curve) {
+	case BR_EC_secp256r1:  return OID_secp256r1;
+	case BR_EC_secp384r1:  return OID_secp384r1;
+	case BR_EC_secp521r1:  return OID_secp521r1;
+	default:
+		return NULL;
+	}
+}
+
+/* see inner.h */
+size_t
+br_encode_ec_raw_der_inner(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk,
+	int include_curve_oid)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   ECPrivateKey ::= SEQUENCE {
+	 *     version        INTEGER { ecPrivkeyVer1(1) } (ecPrivkeyVer1),
+	 *     privateKey     OCTET STRING,
+	 *     parameters [0] ECParameters {{ NamedCurve }} OPTIONAL,
+	 *     publicKey  [1] BIT STRING OPTIONAL
+	 *   }
+	 *
+	 * The tages '[0]' and '[1]' are explicit. The 'ECParameters'
+	 * is a CHOICE; in our case, it will always be an OBJECT IDENTIFIER
+	 * that identifies the curve.
+	 *
+	 * The value of the 'privateKey' field is the raw unsigned big-endian
+	 * encoding of the private key (integer modulo the curve subgroup
+	 * order); there is no INTEGER tag, and the leading bit may be 1.
+	 * Also, leading bytes of value 0x00 are _not_ removed.
+	 *
+	 * The 'publicKey' contents are the raw encoded public key point,
+	 * normally uncompressed (leading byte of value 0x04, followed
+	 * by the unsigned big-endian encodings of the X and Y coordinates,
+	 * padded to the full field length if necessary).
+	 */
+
+	size_t len_version, len_privateKey, len_parameters, len_publicKey;
+	size_t len_publicKey_bits, len_seq;
+	const unsigned char *oid;
+
+	if (include_curve_oid) {
+		oid = br_get_curve_OID(sk->curve);
+		if (oid == NULL) {
+			return 0;
+		}
+	} else {
+		oid = NULL;
+	}
+	len_version = 3;
+	len_privateKey = 1 + len_of_len(sk->xlen) + sk->xlen;
+	if (include_curve_oid) {
+		len_parameters = 4 + oid[0];
+	} else {
+		len_parameters = 0;
+	}
+	if (pk == NULL) {
+		len_publicKey = 0;
+		len_publicKey_bits = 0;
+	} else {
+		len_publicKey_bits = 2 + len_of_len(pk->qlen) + pk->qlen;
+		len_publicKey = 1 + len_of_len(len_publicKey_bits)
+			+ len_publicKey_bits;
+	}
+	len_seq = len_version + len_privateKey + len_parameters + len_publicKey;
+	if (dest == NULL) {
+		return 1 + len_of_len(len_seq) + len_seq;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, len_seq);
+		buf += lenlen;
+
+		/* version */
+		*buf ++ = 0x02;
+		*buf ++ = 0x01;
+		*buf ++ = 0x01;
+
+		/* privateKey */
+		*buf ++ = 0x04;
+		buf += br_asn1_encode_length(buf, sk->xlen);
+		memcpy(buf, sk->x, sk->xlen);
+		buf += sk->xlen;
+
+		/* parameters */
+		if (include_curve_oid) {
+			*buf ++ = 0xA0;
+			*buf ++ = oid[0] + 2;
+			*buf ++ = 0x06;
+			memcpy(buf, oid, oid[0] + 1);
+			buf += oid[0] + 1;
+		}
+
+		/* publicKey */
+		if (pk != NULL) {
+			*buf ++ = 0xA1;
+			buf += br_asn1_encode_length(buf, len_publicKey_bits);
+			*buf ++ = 0x03;
+			buf += br_asn1_encode_length(buf, pk->qlen + 1);
+			*buf ++ = 0x00;
+			memcpy(buf, pk->q, pk->qlen);
+			/* buf += pk->qlen; */
+		}
+
+		return 1 + lenlen + len_seq;
+	}
+}
+
+/* see bearssl_x509.h */
+size_t
+br_encode_ec_raw_der(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk)
+{
+	return br_encode_ec_raw_der_inner(dest, sk, pk, 1);
+}
diff --git a/third_party/bearssl/src/encode_rsa_pk8der.c b/third_party/bearssl/src/encode_rsa_pk8der.c
new file mode 100644
index 0000000..c053503
--- /dev/null
+++ b/third_party/bearssl/src/encode_rsa_pk8der.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+size_t
+br_encode_rsa_pkcs8_der(void *dest, const br_rsa_private_key *sk,
+	const br_rsa_public_key *pk, const void *d, size_t dlen)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   OneAsymmetricKey ::= SEQUENCE {
+	 *     version                   Version,
+	 *     privateKeyAlgorithm       PrivateKeyAlgorithmIdentifier,
+	 *     privateKey                PrivateKey,
+	 *     attributes            [0] Attributes OPTIONAL,
+	 *     ...,
+	 *     [[2: publicKey        [1] PublicKey OPTIONAL ]],
+	 *     ...
+	 *   }
+	 *
+	 * We don't include attributes or public key. The 'version' field
+	 * is an INTEGER that we will set to 0 (meaning 'v1', compatible
+	 * with previous versions of PKCS#8). The 'privateKeyAlgorithm'
+	 * structure is an AlgorithmIdentifier whose OID should be
+	 * rsaEncryption, with NULL parameters. The 'privateKey' is an
+	 * OCTET STRING, whose value is the "raw DER" encoding of the
+	 * key pair.
+	 *
+	 * Since the private key value comes last, this function really
+	 * adds a header, which is mostly fixed (only some lengths have
+	 * to be modified.
+	 */
+
+	/*
+	 * Concatenation of:
+	 *  - DER encoding of an INTEGER of value 0 (the 'version' field)
+	 *  - DER encoding of a PrivateKeyAlgorithmIdentifier that uses
+	 *    the rsaEncryption OID, and NULL parameters
+	 *  - An OCTET STRING tag
+	 */
+	static const unsigned char PK8_HEAD[] = {
+		0x02, 0x01, 0x00,
+		0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
+		0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00,
+		0x04
+	};
+
+	size_t len_raw, len_seq;
+
+	len_raw = br_encode_rsa_raw_der(NULL, sk, pk, d, dlen);
+	len_seq = (sizeof PK8_HEAD) + len_of_len(len_raw) + len_raw;
+	if (dest == NULL) {
+		return 1 + len_of_len(len_seq) + len_seq;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, len_seq);
+		buf += lenlen;
+
+		/* version, privateKeyAlgorithm, privateKey tag */
+		memcpy(buf, PK8_HEAD, sizeof PK8_HEAD);
+		buf += sizeof PK8_HEAD;
+
+		/* privateKey */
+		buf += br_asn1_encode_length(buf, len_raw);
+		br_encode_rsa_raw_der(buf, sk, pk, d, dlen);
+
+		return 1 + lenlen + len_seq;
+	}
+}
diff --git a/third_party/bearssl/src/encode_rsa_rawder.c b/third_party/bearssl/src/encode_rsa_rawder.c
new file mode 100644
index 0000000..1a8052b
--- /dev/null
+++ b/third_party/bearssl/src/encode_rsa_rawder.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+size_t
+br_encode_rsa_raw_der(void *dest, const br_rsa_private_key *sk,
+	const br_rsa_public_key *pk, const void *d, size_t dlen)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   RSAPrivateKey ::= SEQUENCE {
+	 *       version           Version,
+	 *       modulus           INTEGER,  -- n
+	 *       publicExponent    INTEGER,  -- e
+	 *       privateExponent   INTEGER,  -- d
+	 *       prime1            INTEGER,  -- p
+	 *       prime2            INTEGER,  -- q
+	 *       exponent1         INTEGER,  -- d mod (p-1)
+	 *       exponent2         INTEGER,  -- d mod (q-1)
+	 *       coefficient       INTEGER,  -- (inverse of q) mod p
+	 *       otherPrimeInfos   OtherPrimeInfos OPTIONAL
+	 *   }
+	 *
+	 * The 'version' field is an INTEGER of value 0 (meaning: there
+	 * are exactly two prime factors), and 'otherPrimeInfos' will
+	 * be absent (because there are exactly two prime factors).
+	 */
+
+	br_asn1_uint num[9];
+	size_t u, slen;
+
+	/*
+	 * For all INTEGER values, get the pointer and length for the
+	 * data bytes.
+	 */
+	num[0] = br_asn1_uint_prepare(NULL, 0);
+	num[1] = br_asn1_uint_prepare(pk->n, pk->nlen);
+	num[2] = br_asn1_uint_prepare(pk->e, pk->elen);
+	num[3] = br_asn1_uint_prepare(d, dlen);
+	num[4] = br_asn1_uint_prepare(sk->p, sk->plen);
+	num[5] = br_asn1_uint_prepare(sk->q, sk->qlen);
+	num[6] = br_asn1_uint_prepare(sk->dp, sk->dplen);
+	num[7] = br_asn1_uint_prepare(sk->dq, sk->dqlen);
+	num[8] = br_asn1_uint_prepare(sk->iq, sk->iqlen);
+
+	/*
+	 * Get the length of the SEQUENCE contents.
+	 */
+	slen = 0;
+	for (u = 0; u < 9; u ++) {
+		uint32_t ilen;
+
+		ilen = num[u].asn1len;
+		slen += 1 + len_of_len(ilen) + ilen;
+	}
+
+	if (dest == NULL) {
+		return 1 + len_of_len(slen) + slen;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, slen);
+		buf += lenlen;
+		for (u = 0; u < 9; u ++) {
+			buf += br_asn1_encode_uint(buf, num[u]);
+		}
+		return 1 + lenlen + slen;
+	}
+}
diff --git a/third_party/bearssl/src/gcm.c b/third_party/bearssl/src/gcm.c
new file mode 100644
index 0000000..ede5f08
--- /dev/null
+++ b/third_party/bearssl/src/gcm.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * Since CTR and GHASH implementations can handle only full blocks, a
+ * 16-byte buffer (buf[]) is maintained in the context:
+ *
+ *  - When processing AAD, buf[] contains the 0-15 unprocessed bytes.
+ *
+ *  - When doing CTR encryption / decryption, buf[] contains the AES output
+ *    for the last partial block, to be used with the next few bytes of
+ *    data, as well as the already encrypted bytes. For instance, if the
+ *    processed data length so far is 21 bytes, then buf[0..4] contains
+ *    the five last encrypted bytes, and buf[5..15] contains the next 11
+ *    AES output bytes to be XORed with the next 11 bytes of input.
+ *
+ *    The recorded AES output bytes are used to complete the block when
+ *    the corresponding bytes are obtained. Note that buf[] always
+ *    contains the _encrypted_ bytes, whether we apply encryption or
+ *    decryption: these bytes are used as input to GHASH when the block
+ *    is complete.
+ *
+ * In both cases, the low bits of the data length counters (count_aad,
+ * count_ctr) are used to work out the current situation.
+ */
+
+/* see bearssl_aead.h */
+void
+br_gcm_init(br_gcm_context *ctx, const br_block_ctr_class **bctx, br_ghash gh)
+{
+	unsigned char iv[12];
+
+	ctx->vtable = &br_gcm_vtable;
+	ctx->bctx = bctx;
+	ctx->gh = gh;
+
+	/*
+	 * The GHASH key h[] is the raw encryption of the all-zero
+	 * block. Since we only have a CTR implementation, we use it
+	 * with an all-zero IV and a zero counter, to CTR-encrypt an
+	 * all-zero block.
+	 */
+	memset(ctx->h, 0, sizeof ctx->h);
+	memset(iv, 0, sizeof iv);
+	(*bctx)->run(bctx, iv, 0, ctx->h, sizeof ctx->h);
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_reset(br_gcm_context *ctx, const void *iv, size_t len)
+{
+	/*
+	 * If the provided nonce is 12 bytes, then this is the initial
+	 * IV for CTR mode; it will be used with a counter that starts
+	 * at 2 (value 1 is for encrypting the GHASH output into the tag).
+	 *
+	 * If the provided nonce has any other length, then it is hashed
+	 * (with GHASH) into a 16-byte value that will be the IV for CTR
+	 * (both 12-byte IV and 32-bit counter).
+	 */
+	if (len == 12) {
+		memcpy(ctx->j0_1, iv, 12);
+		ctx->j0_2 = 1;
+	} else {
+		unsigned char ty[16], tmp[16];
+
+		memset(ty, 0, sizeof ty);
+		ctx->gh(ty, ctx->h, iv, len);
+		memset(tmp, 0, 8);
+		br_enc64be(tmp + 8, (uint64_t)len << 3);
+		ctx->gh(ty, ctx->h, tmp, 16);
+		memcpy(ctx->j0_1, ty, 12);
+		ctx->j0_2 = br_dec32be(ty + 12);
+	}
+	ctx->jc = ctx->j0_2 + 1;
+	memset(ctx->y, 0, sizeof ctx->y);
+	ctx->count_aad = 0;
+	ctx->count_ctr = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_aad_inject(br_gcm_context *ctx, const void *data, size_t len)
+{
+	size_t ptr, dlen;
+
+	ptr = (size_t)ctx->count_aad & (size_t)15;
+	if (ptr != 0) {
+		/*
+		 * If there is a partial block, then we first try to
+		 * complete it.
+		 */
+		size_t clen;
+
+		clen = 16 - ptr;
+		if (len < clen) {
+			memcpy(ctx->buf + ptr, data, len);
+			ctx->count_aad += (uint64_t)len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, data, clen);
+		ctx->gh(ctx->y, ctx->h, ctx->buf, 16);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		ctx->count_aad += (uint64_t)clen;
+	}
+
+	/*
+	 * Now AAD is aligned on a 16-byte block (with regards to GHASH).
+	 * We process all complete blocks, and save the last partial
+	 * block.
+	 */
+	dlen = len & ~(size_t)15;
+	ctx->gh(ctx->y, ctx->h, data, dlen);
+	memcpy(ctx->buf, (const unsigned char *)data + dlen, len - dlen);
+	ctx->count_aad += (uint64_t)len;
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_flip(br_gcm_context *ctx)
+{
+	/*
+	 * We complete the GHASH computation if there is a partial block.
+	 * The GHASH implementation automatically applies padding with
+	 * zeros.
+	 */
+	size_t ptr;
+
+	ptr = (size_t)ctx->count_aad & (size_t)15;
+	if (ptr != 0) {
+		ctx->gh(ctx->y, ctx->h, ctx->buf, ptr);
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_run(br_gcm_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr, dlen;
+
+	buf = data;
+	ptr = (size_t)ctx->count_ctr & (size_t)15;
+	if (ptr != 0) {
+		/*
+		 * If we have a partial block, then we try to complete it.
+		 */
+		size_t u, clen;
+
+		clen = 16 - ptr;
+		if (len < clen) {
+			clen = len;
+		}
+		for (u = 0; u < clen; u ++) {
+			unsigned x, y;
+
+			x = buf[u];
+			y = x ^ ctx->buf[ptr + u];
+			ctx->buf[ptr + u] = encrypt ? y : x;
+			buf[u] = y;
+		}
+		ctx->count_ctr += (uint64_t)clen;
+		buf += clen;
+		len -= clen;
+		if (ptr + clen < 16) {
+			return;
+		}
+		ctx->gh(ctx->y, ctx->h, ctx->buf, 16);
+	}
+
+	/*
+	 * Process full blocks.
+	 */
+	dlen = len & ~(size_t)15;
+	if (!encrypt) {
+		ctx->gh(ctx->y, ctx->h, buf, dlen);
+	}
+	ctx->jc = (*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->jc, buf, dlen);
+	if (encrypt) {
+		ctx->gh(ctx->y, ctx->h, buf, dlen);
+	}
+	buf += dlen;
+	len -= dlen;
+	ctx->count_ctr += (uint64_t)dlen;
+
+	if (len > 0) {
+		/*
+		 * There is a partial block.
+		 */
+		size_t u;
+
+		memset(ctx->buf, 0, sizeof ctx->buf);
+		ctx->jc = (*ctx->bctx)->run(ctx->bctx, ctx->j0_1,
+			ctx->jc, ctx->buf, 16);
+		for (u = 0; u < len; u ++) {
+			unsigned x, y;
+
+			x = buf[u];
+			y = x ^ ctx->buf[u];
+			ctx->buf[u] = encrypt ? y : x;
+			buf[u] = y;
+		}
+		ctx->count_ctr += (uint64_t)len;
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_get_tag(br_gcm_context *ctx, void *tag)
+{
+	size_t ptr;
+	unsigned char tmp[16];
+
+	ptr = (size_t)ctx->count_ctr & (size_t)15;
+	if (ptr > 0) {
+		/*
+		 * There is a partial block: encrypted/decrypted data has
+		 * been produced, but the encrypted bytes must still be
+		 * processed by GHASH.
+		 */
+		ctx->gh(ctx->y, ctx->h, ctx->buf, ptr);
+	}
+
+	/*
+	 * Final block for GHASH: the AAD and plaintext lengths (in bits).
+	 */
+	br_enc64be(tmp, ctx->count_aad << 3);
+	br_enc64be(tmp + 8, ctx->count_ctr << 3);
+	ctx->gh(ctx->y, ctx->h, tmp, 16);
+
+	/*
+	 * Tag is the GHASH output XORed with the encryption of the
+	 * nonce with the initial counter value.
+	 */
+	memcpy(tag, ctx->y, 16);
+	(*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->j0_2, tag, 16);
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len)
+{
+	unsigned char tmp[16];
+
+	br_gcm_get_tag(ctx, tmp);
+	memcpy(tag, tmp, len);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_gcm_check_tag_trunc(br_gcm_context *ctx, const void *tag, size_t len)
+{
+	unsigned char tmp[16];
+	size_t u;
+	int x;
+
+	br_gcm_get_tag(ctx, tmp);
+	x = 0;
+	for (u = 0; u < len; u ++) {
+		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(x);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_gcm_check_tag(br_gcm_context *ctx, const void *tag)
+{
+	return br_gcm_check_tag_trunc(ctx, tag, 16);
+}
+
+/* see bearssl_aead.h */
+const br_aead_class br_gcm_vtable = {
+	16,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_reset,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_aad_inject,
+	(void (*)(const br_aead_class **))
+		&br_gcm_flip,
+	(void (*)(const br_aead_class **, int, void *, size_t))
+		&br_gcm_run,
+	(void (*)(const br_aead_class **, void *))
+		&br_gcm_get_tag,
+	(uint32_t (*)(const br_aead_class **, const void *))
+		&br_gcm_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_gcm_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_check_tag_trunc
+};
diff --git a/third_party/bearssl/src/ghash_ctmul.c b/third_party/bearssl/src/ghash_ctmul.c
new file mode 100644
index 0000000..3623202
--- /dev/null
+++ b/third_party/bearssl/src/ghash_ctmul.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * We compute "carryless multiplications" through normal integer
+ * multiplications, masking out enough bits to create "holes" in which
+ * carries may expand without altering our bits; we really use 8 data
+ * bits per 32-bit word, spaced every fourth bit. Accumulated carries
+ * may not exceed 8 in total, which fits in 4 bits.
+ *
+ * It would be possible to use a 3-bit spacing, allowing two operands,
+ * one with 7 non-zero data bits, the other one with 10 or 11 non-zero
+ * data bits; this asymmetric splitting makes the overall code more
+ * complex with thresholds and exceptions, and does not appear to be
+ * worth the effort.
+ */
+
+/*
+ * We cannot really autodetect whether multiplications are "slow" or
+ * not. A typical example is the ARM Cortex M0+, which exists in two
+ * versions: one with a 1-cycle multiplication opcode, the other with
+ * a 32-cycle multiplication opcode. They both use exactly the same
+ * architecture and ABI, and cannot be distinguished from each other
+ * at compile-time.
+ *
+ * Since most modern CPU (even embedded CPU) still have fast
+ * multiplications, we use the "fast mul" code by default.
+ */
+
+#if BR_SLOW_MUL
+
+/*
+ * This implementation uses Karatsuba-like reduction to make fewer
+ * integer multiplications (9 instead of 16), at the expense of extra
+ * logical operations (XOR, shifts...). On modern x86 CPU that offer
+ * fast, pipelined multiplications, this code is about twice slower than
+ * the simpler code with 16 multiplications. This tendency may be
+ * reversed on low-end platforms with expensive multiplications.
+ */
+
+#define MUL32(h, l, x, y)   do { \
+		uint64_t mul32tmp = MUL(x, y); \
+		(h) = (uint32_t)(mul32tmp >> 32); \
+		(l) = (uint32_t)mul32tmp; \
+	} while (0)
+
+static inline void
+bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint32_t a0, a1, a2, a3, a4, a5, a6, a7, a8;
+	uint32_t b0, b1, b2, b3, b4, b5, b6, b7, b8;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+
+	/*
+	 * (x0+W*x1)*(y0+W*y1) -> a0:b0
+	 * (x2+W*x3)*(y2+W*y3) -> a3:b3
+	 * ((x0+x2)+W*(x1+x3))*((y0+y2)+W*(y1+y3)) -> a6:b6
+	 */
+	a0 = x0;
+	b0 = y0;
+	a1 = x1 >> 1;
+	b1 = y1 >> 1;
+	a2 = a0 ^ a1;
+	b2 = b0 ^ b1;
+	a3 = x2 >> 2;
+	b3 = y2 >> 2;
+	a4 = x3 >> 3;
+	b4 = y3 >> 3;
+	a5 = a3 ^ a4;
+	b5 = b3 ^ b4;
+	a6 = a0 ^ a3;
+	b6 = b0 ^ b3;
+	a7 = a1 ^ a4;
+	b7 = b1 ^ b4;
+	a8 = a6 ^ a7;
+	b8 = b6 ^ b7;
+
+	MUL32(b0, a0, b0, a0);
+	MUL32(b1, a1, b1, a1);
+	MUL32(b2, a2, b2, a2);
+	MUL32(b3, a3, b3, a3);
+	MUL32(b4, a4, b4, a4);
+	MUL32(b5, a5, b5, a5);
+	MUL32(b6, a6, b6, a6);
+	MUL32(b7, a7, b7, a7);
+	MUL32(b8, a8, b8, a8);
+
+	a0 &= (uint32_t)0x11111111;
+	a1 &= (uint32_t)0x11111111;
+	a2 &= (uint32_t)0x11111111;
+	a3 &= (uint32_t)0x11111111;
+	a4 &= (uint32_t)0x11111111;
+	a5 &= (uint32_t)0x11111111;
+	a6 &= (uint32_t)0x11111111;
+	a7 &= (uint32_t)0x11111111;
+	a8 &= (uint32_t)0x11111111;
+	b0 &= (uint32_t)0x11111111;
+	b1 &= (uint32_t)0x11111111;
+	b2 &= (uint32_t)0x11111111;
+	b3 &= (uint32_t)0x11111111;
+	b4 &= (uint32_t)0x11111111;
+	b5 &= (uint32_t)0x11111111;
+	b6 &= (uint32_t)0x11111111;
+	b7 &= (uint32_t)0x11111111;
+	b8 &= (uint32_t)0x11111111;
+
+	a2 ^= a0 ^ a1;
+	b2 ^= b0 ^ b1;
+	a0 ^= (a2 << 1) ^ (a1 << 2);
+	b0 ^= (b2 << 1) ^ (b1 << 2);
+	a5 ^= a3 ^ a4;
+	b5 ^= b3 ^ b4;
+	a3 ^= (a5 << 1) ^ (a4 << 2);
+	b3 ^= (b5 << 1) ^ (b4 << 2);
+	a8 ^= a6 ^ a7;
+	b8 ^= b6 ^ b7;
+	a6 ^= (a8 << 1) ^ (a7 << 2);
+	b6 ^= (b8 << 1) ^ (b7 << 2);
+	a6 ^= a0 ^ a3;
+	b6 ^= b0 ^ b3;
+	*lo = a0 ^ (a6 << 2) ^ (a3 << 4);
+	*hi = b0 ^ (b6 << 2) ^ (b3 << 4) ^ (a6 >> 30) ^ (a3 >> 28);
+}
+
+#else
+
+/*
+ * Simple multiplication in GF(2)[X], using 16 integer multiplications.
+ */
+
+static inline void
+bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint64_t z0, z1, z2, z3;
+	uint64_t z;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+	z0 = MUL(x0, y0) ^ MUL(x1, y3) ^ MUL(x2, y2) ^ MUL(x3, y1);
+	z1 = MUL(x0, y1) ^ MUL(x1, y0) ^ MUL(x2, y3) ^ MUL(x3, y2);
+	z2 = MUL(x0, y2) ^ MUL(x1, y1) ^ MUL(x2, y0) ^ MUL(x3, y3);
+	z3 = MUL(x0, y3) ^ MUL(x1, y2) ^ MUL(x2, y1) ^ MUL(x3, y0);
+	z0 &= (uint64_t)0x1111111111111111;
+	z1 &= (uint64_t)0x2222222222222222;
+	z2 &= (uint64_t)0x4444444444444444;
+	z3 &= (uint64_t)0x8888888888888888;
+	z = z0 | z1 | z2 | z3;
+	*lo = (uint32_t)z;
+	*hi = (uint32_t)(z >> 32);
+}
+
+#endif
+
+/* see bearssl_hash.h */
+void
+br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint32_t yw[4];
+	uint32_t hw[4];
+
+	/*
+	 * Throughout the loop we handle the y and h values as arrays
+	 * of 32-bit words.
+	 */
+	buf = data;
+	yb = y;
+	hb = h;
+	yw[3] = br_dec32be(yb);
+	yw[2] = br_dec32be(yb + 4);
+	yw[1] = br_dec32be(yb + 8);
+	yw[0] = br_dec32be(yb + 12);
+	hw[3] = br_dec32be(hb);
+	hw[2] = br_dec32be(hb + 4);
+	hw[1] = br_dec32be(hb + 8);
+	hw[0] = br_dec32be(hb + 12);
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		int i;
+		uint32_t a[9], b[9], zw[8];
+		uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3;
+
+		/*
+		 * Get the next 16-byte block (using zero-padding if
+		 * necessary).
+		 */
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+
+		/*
+		 * Decode the block. The GHASH standard mandates
+		 * big-endian encoding.
+		 */
+		yw[3] ^= br_dec32be(src);
+		yw[2] ^= br_dec32be(src + 4);
+		yw[1] ^= br_dec32be(src + 8);
+		yw[0] ^= br_dec32be(src + 12);
+
+		/*
+		 * We multiply two 128-bit field elements. We use
+		 * Karatsuba to turn that into three 64-bit
+		 * multiplications, which are themselves done with a
+		 * total of nine 32-bit multiplications.
+		 */
+
+		/*
+		 * y[0,1]*h[0,1] -> 0..2
+		 * y[2,3]*h[2,3] -> 3..5
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6..8
+		 */
+		a[0] = yw[0];
+		b[0] = hw[0];
+		a[1] = yw[1];
+		b[1] = hw[1];
+		a[2] = a[0] ^ a[1];
+		b[2] = b[0] ^ b[1];
+
+		a[3] = yw[2];
+		b[3] = hw[2];
+		a[4] = yw[3];
+		b[4] = hw[3];
+		a[5] = a[3] ^ a[4];
+		b[5] = b[3] ^ b[4];
+
+		a[6] = a[0] ^ a[3];
+		b[6] = b[0] ^ b[3];
+		a[7] = a[1] ^ a[4];
+		b[7] = b[1] ^ b[4];
+		a[8] = a[6] ^ a[7];
+		b[8] = b[6] ^ b[7];
+
+		for (i = 0; i < 9; i ++) {
+			bmul(&b[i], &a[i], b[i], a[i]);
+		}
+
+		c0 = a[0];
+		c1 = b[0] ^ a[2] ^ a[0] ^ a[1];
+		c2 = a[1] ^ b[2] ^ b[0] ^ b[1];
+		c3 = b[1];
+		d0 = a[3];
+		d1 = b[3] ^ a[5] ^ a[3] ^ a[4];
+		d2 = a[4] ^ b[5] ^ b[3] ^ b[4];
+		d3 = b[4];
+		e0 = a[6];
+		e1 = b[6] ^ a[8] ^ a[6] ^ a[7];
+		e2 = a[7] ^ b[8] ^ b[6] ^ b[7];
+		e3 = b[7];
+
+		e0 ^= c0 ^ d0;
+		e1 ^= c1 ^ d1;
+		e2 ^= c2 ^ d2;
+		e3 ^= c3 ^ d3;
+		c2 ^= e0;
+		c3 ^= e1;
+		d0 ^= e2;
+		d1 ^= e3;
+
+		/*
+		 * GHASH specification has the bits "reversed" (most
+		 * significant is in fact least significant), which does
+		 * not matter for a carryless multiplication, except that
+		 * the 255-bit result must be shifted by 1 bit.
+		 */
+		zw[0] = c0 << 1;
+		zw[1] = (c1 << 1) | (c0 >> 31);
+		zw[2] = (c2 << 1) | (c1 >> 31);
+		zw[3] = (c3 << 1) | (c2 >> 31);
+		zw[4] = (d0 << 1) | (c3 >> 31);
+		zw[5] = (d1 << 1) | (d0 >> 31);
+		zw[6] = (d2 << 1) | (d1 >> 31);
+		zw[7] = (d3 << 1) | (d2 >> 31);
+
+		/*
+		 * We now do the reduction modulo the field polynomial
+		 * to get back to 128 bits.
+		 */
+		for (i = 0; i < 4; i ++) {
+			uint32_t lw;
+
+			lw = zw[i];
+			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
+			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
+		}
+		memcpy(yw, zw + 4, sizeof yw);
+	}
+
+	/*
+	 * Encode back the result.
+	 */
+	br_enc32be(yb, yw[3]);
+	br_enc32be(yb + 4, yw[2]);
+	br_enc32be(yb + 8, yw[1]);
+	br_enc32be(yb + 12, yw[0]);
+}
diff --git a/third_party/bearssl/src/ghash_ctmul32.c b/third_party/bearssl/src/ghash_ctmul32.c
new file mode 100644
index 0000000..c66af46
--- /dev/null
+++ b/third_party/bearssl/src/ghash_ctmul32.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This implementation uses 32-bit multiplications, and only the low
+ * 32 bits for each multiplication result. This is meant primarily for
+ * the ARM Cortex M0 and M0+, whose multiplication opcode does not yield
+ * the upper 32 bits; but it might also be useful on architectures where
+ * access to the upper 32 bits requires use of specific registers that
+ * create contention (e.g. on i386, "mul" necessarily outputs the result
+ * in edx:eax, while "imul" can use any registers but is limited to the
+ * low 32 bits).
+ *
+ * The implementation trick that is used here is bit-reversing (bit 0
+ * is swapped with bit 31, bit 1 with bit 30, and so on). In GF(2)[X],
+ * for all values x and y, we have:
+ *    rev32(x) * rev32(y) = rev64(x * y)
+ * In other words, if we bit-reverse (over 32 bits) the operands, then we
+ * bit-reverse (over 64 bits) the result.
+ */
+
+/*
+ * Multiplication in GF(2)[X], truncated to its low 32 bits.
+ */
+static inline uint32_t
+bmul32(uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint32_t z0, z1, z2, z3;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+	z0 &= (uint32_t)0x11111111;
+	z1 &= (uint32_t)0x22222222;
+	z2 &= (uint32_t)0x44444444;
+	z3 &= (uint32_t)0x88888888;
+	return z0 | z1 | z2 | z3;
+}
+
+/*
+ * Bit-reverse a 32-bit word.
+ */
+static uint32_t
+rev32(uint32_t x)
+{
+#define RMS(m, s)   do { \
+		x = ((x & (uint32_t)(m)) << (s)) \
+			| ((x >> (s)) & (uint32_t)(m)); \
+	} while (0)
+
+	RMS(0x55555555, 1);
+	RMS(0x33333333, 2);
+	RMS(0x0F0F0F0F, 4);
+	RMS(0x00FF00FF, 8);
+	return (x << 16) | (x >> 16);
+
+#undef RMS
+}
+
+/* see bearssl_hash.h */
+void
+br_ghash_ctmul32(void *y, const void *h, const void *data, size_t len)
+{
+	/*
+	 * This implementation is similar to br_ghash_ctmul() except
+	 * that we have to do the multiplication twice, with the
+	 * "normal" and "bit reversed" operands. Hence we end up with
+	 * eighteen 32-bit multiplications instead of nine.
+	 */
+
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint32_t yw[4];
+	uint32_t hw[4], hwr[4];
+
+	buf = data;
+	yb = y;
+	hb = h;
+	yw[3] = br_dec32be(yb);
+	yw[2] = br_dec32be(yb + 4);
+	yw[1] = br_dec32be(yb + 8);
+	yw[0] = br_dec32be(yb + 12);
+	hw[3] = br_dec32be(hb);
+	hw[2] = br_dec32be(hb + 4);
+	hw[1] = br_dec32be(hb + 8);
+	hw[0] = br_dec32be(hb + 12);
+	hwr[3] = rev32(hw[3]);
+	hwr[2] = rev32(hw[2]);
+	hwr[1] = rev32(hw[1]);
+	hwr[0] = rev32(hw[0]);
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		int i;
+		uint32_t a[18], b[18], c[18];
+		uint32_t d0, d1, d2, d3, d4, d5, d6, d7;
+		uint32_t zw[8];
+
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+		yw[3] ^= br_dec32be(src);
+		yw[2] ^= br_dec32be(src + 4);
+		yw[1] ^= br_dec32be(src + 8);
+		yw[0] ^= br_dec32be(src + 12);
+
+		/*
+		 * We are using Karatsuba: the 128x128 multiplication is
+		 * reduced to three 64x64 multiplications, hence nine
+		 * 32x32 multiplications. With the bit-reversal trick,
+		 * we have to perform 18 32x32 multiplications.
+		 */
+
+		/*
+		 * y[0,1]*h[0,1] -> 0,1,4
+		 * y[2,3]*h[2,3] -> 2,3,5
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,7,8
+		 */
+
+		a[0] = yw[0];
+		a[1] = yw[1];
+		a[2] = yw[2];
+		a[3] = yw[3];
+		a[4] = a[0] ^ a[1];
+		a[5] = a[2] ^ a[3];
+		a[6] = a[0] ^ a[2];
+		a[7] = a[1] ^ a[3];
+		a[8] = a[6] ^ a[7];
+
+		a[ 9] = rev32(yw[0]);
+		a[10] = rev32(yw[1]);
+		a[11] = rev32(yw[2]);
+		a[12] = rev32(yw[3]);
+		a[13] = a[ 9] ^ a[10];
+		a[14] = a[11] ^ a[12];
+		a[15] = a[ 9] ^ a[11];
+		a[16] = a[10] ^ a[12];
+		a[17] = a[15] ^ a[16];
+
+		b[0] = hw[0];
+		b[1] = hw[1];
+		b[2] = hw[2];
+		b[3] = hw[3];
+		b[4] = b[0] ^ b[1];
+		b[5] = b[2] ^ b[3];
+		b[6] = b[0] ^ b[2];
+		b[7] = b[1] ^ b[3];
+		b[8] = b[6] ^ b[7];
+
+		b[ 9] = hwr[0];
+		b[10] = hwr[1];
+		b[11] = hwr[2];
+		b[12] = hwr[3];
+		b[13] = b[ 9] ^ b[10];
+		b[14] = b[11] ^ b[12];
+		b[15] = b[ 9] ^ b[11];
+		b[16] = b[10] ^ b[12];
+		b[17] = b[15] ^ b[16];
+
+		for (i = 0; i < 18; i ++) {
+			c[i] = bmul32(a[i], b[i]);
+		}
+
+		c[4] ^= c[0] ^ c[1];
+		c[5] ^= c[2] ^ c[3];
+		c[8] ^= c[6] ^ c[7];
+
+		c[13] ^= c[ 9] ^ c[10];
+		c[14] ^= c[11] ^ c[12];
+		c[17] ^= c[15] ^ c[16];
+
+		/*
+		 * y[0,1]*h[0,1] -> 0,9^4,1^13,10
+		 * y[2,3]*h[2,3] -> 2,11^5,3^14,12
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,15^8,7^17,16
+		 */
+		d0 = c[0];
+		d1 = c[4] ^ (rev32(c[9]) >> 1);
+		d2 = c[1] ^ c[0] ^ c[2] ^ c[6] ^ (rev32(c[13]) >> 1);
+		d3 = c[4] ^ c[5] ^ c[8]
+			^ (rev32(c[10] ^ c[9] ^ c[11] ^ c[15]) >> 1);
+		d4 = c[2] ^ c[1] ^ c[3] ^ c[7]
+			^ (rev32(c[13] ^ c[14] ^ c[17]) >> 1);
+		d5 = c[5] ^ (rev32(c[11] ^ c[10] ^ c[12] ^ c[16]) >> 1);
+		d6 = c[3] ^ (rev32(c[14]) >> 1);
+		d7 = rev32(c[12]) >> 1;
+
+		zw[0] = d0 << 1;
+		zw[1] = (d1 << 1) | (d0 >> 31);
+		zw[2] = (d2 << 1) | (d1 >> 31);
+		zw[3] = (d3 << 1) | (d2 >> 31);
+		zw[4] = (d4 << 1) | (d3 >> 31);
+		zw[5] = (d5 << 1) | (d4 >> 31);
+		zw[6] = (d6 << 1) | (d5 >> 31);
+		zw[7] = (d7 << 1) | (d6 >> 31);
+
+		for (i = 0; i < 4; i ++) {
+			uint32_t lw;
+
+			lw = zw[i];
+			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
+			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
+		}
+		memcpy(yw, zw + 4, sizeof yw);
+	}
+	br_enc32be(yb, yw[3]);
+	br_enc32be(yb + 4, yw[2]);
+	br_enc32be(yb + 8, yw[1]);
+	br_enc32be(yb + 12, yw[0]);
+}
diff --git a/third_party/bearssl/src/ghash_ctmul64.c b/third_party/bearssl/src/ghash_ctmul64.c
new file mode 100644
index 0000000..a46f16f
--- /dev/null
+++ b/third_party/bearssl/src/ghash_ctmul64.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This is the 64-bit variant of br_ghash_ctmul32(), with 64-bit operands
+ * and bit reversal of 64-bit words.
+ */
+
+static inline uint64_t
+bmul64(uint64_t x, uint64_t y)
+{
+	uint64_t x0, x1, x2, x3;
+	uint64_t y0, y1, y2, y3;
+	uint64_t z0, z1, z2, z3;
+
+	x0 = x & (uint64_t)0x1111111111111111;
+	x1 = x & (uint64_t)0x2222222222222222;
+	x2 = x & (uint64_t)0x4444444444444444;
+	x3 = x & (uint64_t)0x8888888888888888;
+	y0 = y & (uint64_t)0x1111111111111111;
+	y1 = y & (uint64_t)0x2222222222222222;
+	y2 = y & (uint64_t)0x4444444444444444;
+	y3 = y & (uint64_t)0x8888888888888888;
+	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+	z0 &= (uint64_t)0x1111111111111111;
+	z1 &= (uint64_t)0x2222222222222222;
+	z2 &= (uint64_t)0x4444444444444444;
+	z3 &= (uint64_t)0x8888888888888888;
+	return z0 | z1 | z2 | z3;
+}
+
+static uint64_t
+rev64(uint64_t x)
+{
+#define RMS(m, s)   do { \
+		x = ((x & (uint64_t)(m)) << (s)) \
+			| ((x >> (s)) & (uint64_t)(m)); \
+	} while (0)
+
+	RMS(0x5555555555555555,  1);
+	RMS(0x3333333333333333,  2);
+	RMS(0x0F0F0F0F0F0F0F0F,  4);
+	RMS(0x00FF00FF00FF00FF,  8);
+	RMS(0x0000FFFF0000FFFF, 16);
+	return (x << 32) | (x >> 32);
+
+#undef RMS
+}
+
+/* see bearssl_ghash.h */
+void
+br_ghash_ctmul64(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint64_t y0, y1;
+	uint64_t h0, h1, h2, h0r, h1r, h2r;
+
+	buf = data;
+	yb = y;
+	hb = h;
+	y1 = br_dec64be(yb);
+	y0 = br_dec64be(yb + 8);
+	h1 = br_dec64be(hb);
+	h0 = br_dec64be(hb + 8);
+	h0r = rev64(h0);
+	h1r = rev64(h1);
+	h2 = h0 ^ h1;
+	h2r = h0r ^ h1r;
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		uint64_t y0r, y1r, y2, y2r;
+		uint64_t z0, z1, z2, z0h, z1h, z2h;
+		uint64_t v0, v1, v2, v3;
+
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+		y1 ^= br_dec64be(src);
+		y0 ^= br_dec64be(src + 8);
+
+		y0r = rev64(y0);
+		y1r = rev64(y1);
+		y2 = y0 ^ y1;
+		y2r = y0r ^ y1r;
+
+		z0 = bmul64(y0, h0);
+		z1 = bmul64(y1, h1);
+		z2 = bmul64(y2, h2);
+		z0h = bmul64(y0r, h0r);
+		z1h = bmul64(y1r, h1r);
+		z2h = bmul64(y2r, h2r);
+		z2 ^= z0 ^ z1;
+		z2h ^= z0h ^ z1h;
+		z0h = rev64(z0h) >> 1;
+		z1h = rev64(z1h) >> 1;
+		z2h = rev64(z2h) >> 1;
+
+		v0 = z0;
+		v1 = z0h ^ z2;
+		v2 = z1 ^ z2h;
+		v3 = z1h;
+
+		v3 = (v3 << 1) | (v2 >> 63);
+		v2 = (v2 << 1) | (v1 >> 63);
+		v1 = (v1 << 1) | (v0 >> 63);
+		v0 = (v0 << 1);
+
+		v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7);
+		v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57);
+		v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7);
+		v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57);
+
+		y0 = v2;
+		y1 = v3;
+	}
+
+	br_enc64be(yb, y1);
+	br_enc64be(yb + 8, y0);
+}
diff --git a/third_party/bearssl/src/ghash_pclmul.c b/third_party/bearssl/src/ghash_pclmul.c
new file mode 100644
index 0000000..a58e7dc
--- /dev/null
+++ b/third_party/bearssl/src/ghash_pclmul.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+/*
+ * This is the GHASH implementation that leverages the pclmulqdq opcode
+ * (from the AES-NI instructions).
+ */
+
+#if BR_AES_X86NI
+
+/*
+ * Test CPU support for PCLMULQDQ.
+ */
+static inline int
+pclmul_supported(void)
+{
+	/*
+	 * Bit mask for features in ECX:
+	 *    1   PCLMULQDQ support
+	 */
+	return br_cpuid(0, 0, 0x00000002, 0);
+}
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pclmul_get(void)
+{
+	return pclmul_supported() ? &br_ghash_pclmul : 0;
+}
+
+BR_TARGETS_X86_UP
+
+/*
+ * GHASH is defined over elements of GF(2^128) with "full little-endian"
+ * representation: leftmost byte is least significant, and, within each
+ * byte, leftmost _bit_ is least significant. The natural ordering in
+ * x86 is "mixed little-endian": bytes are ordered from least to most
+ * significant, but bits within a byte are in most-to-least significant
+ * order. Going to full little-endian representation would require
+ * reversing bits within each byte, which is doable but expensive.
+ *
+ * Instead, we go to full big-endian representation, by swapping bytes
+ * around, which is done with a single _mm_shuffle_epi8() opcode (it
+ * comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We
+ * can use a full big-endian representation because in a carryless
+ * multiplication, we have a nice bit reversal property:
+ *
+ *    rev_128(x) * rev_128(y) = rev_255(x * y)
+ *
+ * So by using full big-endian, we still get the right result, except
+ * that it is right-shifted by 1 bit. The left-shift is relatively
+ * inexpensive, and it can be mutualised.
+ *
+ *
+ * Since SSE2 opcodes do not have facilities for shitfting full 128-bit
+ * values with bit precision, we have to break down values into 64-bit
+ * chunks. We number chunks from 0 to 3 in left to right order.
+ */
+
+/*
+ * Byte-swap a complete 128-bit value. This normally uses
+ * _mm_shuffle_epi8(), which gets translated to pshufb (an SSSE3 opcode).
+ * However, this crashes old Clang versions, so, for Clang before 3.8,
+ * we use an alternate (and less efficient) version.
+ */
+#if BR_CLANG && !BR_CLANG_3_8
+#define BYTESWAP_DECL
+#define BYTESWAP_PREP   (void)0
+#define BYTESWAP(x)   do { \
+		__m128i byteswap1, byteswap2; \
+		byteswap1 = (x); \
+		byteswap2 = _mm_srli_epi16(byteswap1, 8); \
+		byteswap1 = _mm_slli_epi16(byteswap1, 8); \
+		byteswap1 = _mm_or_si128(byteswap1, byteswap2); \
+		byteswap1 = _mm_shufflelo_epi16(byteswap1, 0x1B); \
+		byteswap1 = _mm_shufflehi_epi16(byteswap1, 0x1B); \
+		(x) = _mm_shuffle_epi32(byteswap1, 0x4E); \
+	} while (0)
+#else
+#define BYTESWAP_DECL   __m128i byteswap_index;
+#define BYTESWAP_PREP   do { \
+		byteswap_index = _mm_set_epi8( \
+			0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+	} while (0)
+#define BYTESWAP(x)   do { \
+		(x) = _mm_shuffle_epi8((x), byteswap_index); \
+	} while (0)
+#endif
+
+/*
+ * Call pclmulqdq. Clang appears to have trouble with the intrinsic, so,
+ * for that compiler, we use inline assembly. Inline assembly is
+ * potentially a bit slower because the compiler does not understand
+ * what the opcode does, and thus cannot optimize instruction
+ * scheduling.
+ *
+ * We use a target of "sse2" only, so that Clang may still handle the
+ * '__m128i' type and allocate SSE2 registers.
+ */
+#if BR_CLANG
+BR_TARGET("sse2")
+static inline __m128i
+pclmulqdq00(__m128i x, __m128i y)
+{
+	__asm__ ("pclmulqdq $0x00, %1, %0" : "+x" (x) : "x" (y));
+	return x;
+}
+BR_TARGET("sse2")
+static inline __m128i
+pclmulqdq11(__m128i x, __m128i y)
+{
+	__asm__ ("pclmulqdq $0x11, %1, %0" : "+x" (x) : "x" (y));
+	return x;
+}
+#else
+#define pclmulqdq00(x, y)   _mm_clmulepi64_si128(x, y, 0x00)
+#define pclmulqdq11(x, y)   _mm_clmulepi64_si128(x, y, 0x11)
+#endif
+
+/*
+ * From a 128-bit value kw, compute kx as the XOR of the two 64-bit
+ * halves of kw (into the right half of kx; left half is unspecified).
+ */
+#define BK(kw, kx)   do { \
+		kx = _mm_xor_si128(kw, _mm_shuffle_epi32(kw, 0x0E)); \
+	} while (0)
+
+/*
+ * Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and
+ * the XOR of the two values (kx).
+ */
+#define PBK(k0, k1, kw, kx)   do { \
+		kw = _mm_unpacklo_epi64(k1, k0); \
+		kx = _mm_xor_si128(k0, k1); \
+	} while (0)
+
+/*
+ * Left-shift by 1 bit a 256-bit value (in four 64-bit words).
+ */
+#define SL_256(x0, x1, x2, x3)   do { \
+		x0 = _mm_or_si128( \
+			_mm_slli_epi64(x0, 1), \
+			_mm_srli_epi64(x1, 63)); \
+		x1 = _mm_or_si128( \
+			_mm_slli_epi64(x1, 1), \
+			_mm_srli_epi64(x2, 63)); \
+		x2 = _mm_or_si128( \
+			_mm_slli_epi64(x2, 1), \
+			_mm_srli_epi64(x3, 63)); \
+		x3 = _mm_slli_epi64(x3, 1); \
+	} while (0)
+
+/*
+ * Perform reduction in GF(2^128). The 256-bit value is in x0..x3;
+ * result is written in x0..x1.
+ */
+#define REDUCE_F128(x0, x1, x2, x3)   do { \
+		x1 = _mm_xor_si128( \
+			x1, \
+			_mm_xor_si128( \
+				_mm_xor_si128( \
+					x3, \
+					_mm_srli_epi64(x3, 1)), \
+				_mm_xor_si128( \
+					_mm_srli_epi64(x3, 2), \
+					_mm_srli_epi64(x3, 7)))); \
+		x2 = _mm_xor_si128( \
+			_mm_xor_si128( \
+				x2, \
+				_mm_slli_epi64(x3, 63)), \
+			_mm_xor_si128( \
+				_mm_slli_epi64(x3, 62), \
+				_mm_slli_epi64(x3, 57))); \
+		x0 = _mm_xor_si128( \
+			x0, \
+			_mm_xor_si128( \
+				_mm_xor_si128( \
+					x2, \
+					_mm_srli_epi64(x2, 1)), \
+				_mm_xor_si128( \
+					_mm_srli_epi64(x2, 2), \
+					_mm_srli_epi64(x2, 7)))); \
+		x1 = _mm_xor_si128( \
+			_mm_xor_si128( \
+				x1, \
+				_mm_slli_epi64(x2, 63)), \
+			_mm_xor_si128( \
+				_mm_slli_epi64(x2, 62), \
+				_mm_slli_epi64(x2, 57))); \
+	} while (0)
+
+/*
+ * Square value kw into (dw,dx).
+ */
+#define SQUARE_F128(kw, dw, dx)   do { \
+		__m128i z0, z1, z2, z3; \
+		z1 = pclmulqdq11(kw, kw); \
+		z3 = pclmulqdq00(kw, kw); \
+		z0 = _mm_shuffle_epi32(z1, 0x0E); \
+		z2 = _mm_shuffle_epi32(z3, 0x0E); \
+		SL_256(z0, z1, z2, z3); \
+		REDUCE_F128(z0, z1, z2, z3); \
+		PBK(z0, z1, dw, dx); \
+	} while (0)
+
+/* see bearssl_hash.h */
+BR_TARGET("ssse3,pclmul")
+void
+br_ghash_pclmul(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+	unsigned char tmp[64];
+	size_t num4, num1;
+	__m128i yw, h1w, h1x;
+	BYTESWAP_DECL
+
+	/*
+	 * We split data into two chunks. First chunk starts at buf1
+	 * and contains num4 blocks of 64-byte values. Second chunk
+	 * starts at buf2 and contains num1 blocks of 16-byte values.
+	 * We want the first chunk to be as large as possible.
+	 */
+	buf1 = data;
+	num4 = len >> 6;
+	len &= 63;
+	buf2 = buf1 + (num4 << 6);
+	num1 = (len + 15) >> 4;
+	if ((len & 15) != 0) {
+		memcpy(tmp, buf2, len);
+		memset(tmp + len, 0, (num1 << 4) - len);
+		buf2 = tmp;
+	}
+
+	/*
+	 * Preparatory step for endian conversions.
+	 */
+	BYTESWAP_PREP;
+
+	/*
+	 * Load y and h.
+	 */
+	yw = _mm_loadu_si128(y);
+	h1w = _mm_loadu_si128(h);
+	BYTESWAP(yw);
+	BYTESWAP(h1w);
+	BK(h1w, h1x);
+
+	if (num4 > 0) {
+		__m128i h2w, h2x, h3w, h3x, h4w, h4x;
+		__m128i t0, t1, t2, t3;
+
+		/*
+		 * Compute h2 = h^2.
+		 */
+		SQUARE_F128(h1w, h2w, h2x);
+
+		/*
+		 * Compute h3 = h^3 = h*(h^2).
+		 */
+		t1 = pclmulqdq11(h1w, h2w);
+		t3 = pclmulqdq00(h1w, h2w);
+		t2 = _mm_xor_si128(pclmulqdq00(h1x, h2x),
+			_mm_xor_si128(t1, t3));
+		t0 = _mm_shuffle_epi32(t1, 0x0E);
+		t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+		t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+		SL_256(t0, t1, t2, t3);
+		REDUCE_F128(t0, t1, t2, t3);
+		PBK(t0, t1, h3w, h3x);
+
+		/*
+		 * Compute h4 = h^4 = (h^2)^2.
+		 */
+		SQUARE_F128(h2w, h4w, h4x);
+
+		while (num4 -- > 0) {
+			__m128i aw0, aw1, aw2, aw3;
+			__m128i ax0, ax1, ax2, ax3;
+
+			aw0 = _mm_loadu_si128((void *)(buf1 +  0));
+			aw1 = _mm_loadu_si128((void *)(buf1 + 16));
+			aw2 = _mm_loadu_si128((void *)(buf1 + 32));
+			aw3 = _mm_loadu_si128((void *)(buf1 + 48));
+			BYTESWAP(aw0);
+			BYTESWAP(aw1);
+			BYTESWAP(aw2);
+			BYTESWAP(aw3);
+			buf1 += 64;
+
+			aw0 = _mm_xor_si128(aw0, yw);
+			BK(aw1, ax1);
+			BK(aw2, ax2);
+			BK(aw3, ax3);
+			BK(aw0, ax0);
+
+			t1 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq11(aw0, h4w),
+					pclmulqdq11(aw1, h3w)),
+				_mm_xor_si128(
+					pclmulqdq11(aw2, h2w),
+					pclmulqdq11(aw3, h1w)));
+			t3 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq00(aw0, h4w),
+					pclmulqdq00(aw1, h3w)),
+				_mm_xor_si128(
+					pclmulqdq00(aw2, h2w),
+					pclmulqdq00(aw3, h1w)));
+			t2 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq00(ax0, h4x),
+					pclmulqdq00(ax1, h3x)),
+				_mm_xor_si128(
+					pclmulqdq00(ax2, h2x),
+					pclmulqdq00(ax3, h1x)));
+			t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+			t0 = _mm_shuffle_epi32(t1, 0x0E);
+			t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+			t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+			SL_256(t0, t1, t2, t3);
+			REDUCE_F128(t0, t1, t2, t3);
+			yw = _mm_unpacklo_epi64(t1, t0);
+		}
+	}
+
+	while (num1 -- > 0) {
+		__m128i aw, ax;
+		__m128i t0, t1, t2, t3;
+
+		aw = _mm_loadu_si128((void *)buf2);
+		BYTESWAP(aw);
+		buf2 += 16;
+
+		aw = _mm_xor_si128(aw, yw);
+		BK(aw, ax);
+
+		t1 = pclmulqdq11(aw, h1w);
+		t3 = pclmulqdq00(aw, h1w);
+		t2 = pclmulqdq00(ax, h1x);
+		t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+		t0 = _mm_shuffle_epi32(t1, 0x0E);
+		t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+		t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+		SL_256(t0, t1, t2, t3);
+		REDUCE_F128(t0, t1, t2, t3);
+		yw = _mm_unpacklo_epi64(t1, t0);
+	}
+
+	BYTESWAP(yw);
+	_mm_storeu_si128(y, yw);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pclmul_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ghash_pwr8.c b/third_party/bearssl/src/ghash_pwr8.c
new file mode 100644
index 0000000..2e7b0f4
--- /dev/null
+++ b/third_party/bearssl/src/ghash_pwr8.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+/*
+ * This is the GHASH implementation that leverages the POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+/*
+ * Some symbolic names for registers.
+ *   HB0 = 16 bytes of value 0
+ *   HB1 = 16 bytes of value 1
+ *   HB2 = 16 bytes of value 2
+ *   HB6 = 16 bytes of value 6
+ *   HB7 = 16 bytes of value 7
+ *   TT0, TT1 and TT2 are temporaries
+ *
+ * BSW holds the pattern for byteswapping 32-bit words; this is set only
+ * on little-endian systems. XBSW is the same register with the +32 offset
+ * for access with the VSX opcodes.
+ */
+#define HB0     0
+#define HB1     1
+#define HB2     2
+#define HB6     3
+#define HB7     4
+#define TT0     5
+#define TT1     6
+#define TT2     7
+
+#define BSW     8
+#define XBSW   40
+
+/*
+ * Macro to initialise the constants.
+ */
+#define INIT \
+		vxor(HB0, HB0, HB0) \
+		vspltisb(HB1, 1) \
+		vspltisb(HB2, 2) \
+		vspltisb(HB6, 6) \
+		vspltisb(HB7, 7) \
+		INIT_BSW
+
+/*
+ * Fix endianness of a value after reading it or before writing it, if
+ * necessary.
+ */
+#if BR_POWER8_LE
+#define INIT_BSW         lxvw4x(XBSW, 0, %[idx2be])
+#define FIX_ENDIAN(xx)   vperm(xx, xx, xx, BSW)
+#else
+#define INIT_BSW
+#define FIX_ENDIAN(xx)
+#endif
+
+/*
+ * Left-shift x0:x1 by one bit to the left. This is a corrective action
+ * needed because GHASH is defined in full little-endian specification,
+ * while the opcodes use full big-endian convention, so the 255-bit product
+ * ends up one bit to the right.
+ */
+#define SL_256(x0, x1) \
+		vsldoi(TT0, HB0, x1, 1) \
+		vsl(x0, x0, HB1) \
+		vsr(TT0, TT0, HB7) \
+		vsl(x1, x1, HB1) \
+		vxor(x0, x0, TT0)
+
+/*
+ * Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as
+ * x0 or x1, or a different register). x0 and x1 are modified.
+ */
+#define REDUCE_F128(xd, x0, x1) \
+		vxor(x0, x0, x1) \
+		vsr(TT0, x1, HB1) \
+		vsr(TT1, x1, HB2) \
+		vsr(TT2, x1, HB7) \
+		vxor(x0, x0, TT0) \
+		vxor(TT1, TT1, TT2) \
+		vxor(x0, x0, TT1) \
+		vsldoi(x1, x1, HB0, 15) \
+		vsl(TT1, x1, HB6) \
+		vsl(TT2, x1, HB1) \
+		vxor(x1, TT1, TT2) \
+		vsr(TT0, x1, HB1) \
+		vsr(TT1, x1, HB2) \
+		vsr(TT2, x1, HB7) \
+		vxor(x0, x0, x1) \
+		vxor(x0, x0, TT0) \
+		vxor(TT1, TT1, TT2) \
+		vxor(xd, x0, TT1)
+
+/* see bearssl_hash.h */
+void
+br_ghash_pwr8(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+	size_t num4, num1;
+	unsigned char tmp[64];
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	buf1 = data;
+
+	/*
+	 * Assembly code requires data into two chunks; first chunk
+	 * must contain a number of blocks which is a multiple of 4.
+	 * Since the processing for the first chunk is faster, we want
+	 * to make it as big as possible.
+	 *
+	 * For the remainder, there are two possibilities:
+	 *  -- if the remainder size is a multiple of 16, then use it
+	 *     in place;
+	 *  -- otherwise, copy it to the tmp[] array and pad it with
+	 *     zeros.
+	 */
+	num4 = len >> 6;
+	buf2 = buf1 + (num4 << 6);
+	len &= 63;
+	num1 = (len + 15) >> 4;
+	if ((len & 15) != 0) {
+		memcpy(tmp, buf2, len);
+		memset(tmp + len, 0, (num1 << 4) - len);
+		buf2 = tmp;
+	}
+
+	cc0 =  0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+		INIT
+
+		/*
+		 * Load current h (denoted hereafter h1) in v9.
+		 */
+		lxvw4x(41, 0, %[h])
+		FIX_ENDIAN(9)
+
+		/*
+		 * Load current y into v28.
+		 */
+		lxvw4x(60, 0, %[y])
+		FIX_ENDIAN(28)
+
+		/*
+		 * Split h1 into three registers:
+		 *   v17 = h1_1:h1_0
+		 *   v18 =    0:h1_0
+		 *   v19 = h1_1:0
+		 */
+		xxpermdi(49, 41, 41, 2)
+		vsldoi(18, HB0, 9, 8)
+		vsldoi(19, 9, HB0, 8)
+
+		/*
+		 * If num4 is 0, skip directly to the second chunk.
+		 */
+		cmpldi(%[num4], 0)
+		beq(chunk1)
+
+		/*
+		 * Compute h2 = h*h in v10.
+		 */
+		vpmsumd(10, 18, 18)
+		vpmsumd(11, 19, 19)
+		SL_256(10, 11)
+		REDUCE_F128(10, 10, 11)
+
+		/*
+		 * Compute h3 = h*h*h in v11.
+		 * We first split h2 into:
+		 *   v10 = h2_0:h2_1
+		 *   v11 =    0:h2_0
+		 *   v12 = h2_1:0
+		 * Then we do the product with h1, and reduce into v11.
+		 */
+		vsldoi(11, HB0, 10, 8)
+		vsldoi(12, 10, HB0, 8)
+		vpmsumd(13, 10, 17)
+		vpmsumd(11, 11, 18)
+		vpmsumd(12, 12, 19)
+		vsldoi(14, HB0, 13, 8)
+		vsldoi(15, 13, HB0, 8)
+		vxor(11, 11, 14)
+		vxor(12, 12, 15)
+		SL_256(11, 12)
+		REDUCE_F128(11, 11, 12)
+
+		/*
+		 * Compute h4 = h*h*h*h in v12. This is done by squaring h2.
+		 */
+		vsldoi(12, HB0, 10, 8)
+		vsldoi(13, 10, HB0, 8)
+		vpmsumd(12, 12, 12)
+		vpmsumd(13, 13, 13)
+		SL_256(12, 13)
+		REDUCE_F128(12, 12, 13)
+
+		/*
+		 * Repack h1, h2, h3 and h4:
+		 *   v13 = h4_0:h3_0
+		 *   v14 = h4_1:h3_1
+		 *   v15 = h2_0:h1_0
+		 *   v16 = h2_1:h1_1
+		 */
+		xxpermdi(45, 44, 43, 0)
+		xxpermdi(46, 44, 43, 3)
+		xxpermdi(47, 42, 41, 0)
+		xxpermdi(48, 42, 41, 3)
+
+		/*
+		 * Loop for each group of four blocks.
+		 */
+		mtctr(%[num4])
+	label(loop4)
+		/*
+		 * Read the four next blocks.
+		 *   v20 = y + a0 = b0
+		 *   v21 = a1     = b1
+		 *   v22 = a2     = b2
+		 *   v23 = a3     = b3
+		 */
+		lxvw4x(52, %[cc0], %[buf1])
+		lxvw4x(53, %[cc1], %[buf1])
+		lxvw4x(54, %[cc2], %[buf1])
+		lxvw4x(55, %[cc3], %[buf1])
+		FIX_ENDIAN(20)
+		FIX_ENDIAN(21)
+		FIX_ENDIAN(22)
+		FIX_ENDIAN(23)
+		addi(%[buf1], %[buf1], 64)
+		vxor(20, 20, 28)
+
+		/*
+		 * Repack the blocks into v9, v10, v11 and v12.
+		 *   v9  = b0_0:b1_0
+		 *   v10 = b0_1:b1_1
+		 *   v11 = b2_0:b3_0
+		 *   v12 = b2_1:b3_1
+		 */
+		xxpermdi(41, 52, 53, 0)
+		xxpermdi(42, 52, 53, 3)
+		xxpermdi(43, 54, 55, 0)
+		xxpermdi(44, 54, 55, 3)
+
+		/*
+		 * Compute the products.
+		 *   v20 = b0_0*h4_0 + b1_0*h3_0
+		 *   v21 = b0_1*h4_0 + b1_1*h3_0
+		 *   v22 = b0_0*h4_1 + b1_0*h3_1
+		 *   v23 = b0_1*h4_1 + b1_1*h3_1
+		 *   v24 = b2_0*h2_0 + b3_0*h1_0
+		 *   v25 = b2_1*h2_0 + b3_1*h1_0
+		 *   v26 = b2_0*h2_1 + b3_0*h1_1
+		 *   v27 = b2_1*h2_1 + b3_1*h1_1
+		 */
+		vpmsumd(20, 13,  9)
+		vpmsumd(21, 13, 10)
+		vpmsumd(22, 14,  9)
+		vpmsumd(23, 14, 10)
+		vpmsumd(24, 15, 11)
+		vpmsumd(25, 15, 12)
+		vpmsumd(26, 16, 11)
+		vpmsumd(27, 16, 12)
+
+		/*
+		 * Sum products into a single 256-bit result in v11:v12.
+		 */
+		vxor(11, 20, 24)
+		vxor(12, 23, 27)
+		vxor( 9, 21, 22)
+		vxor(10, 25, 26)
+		vxor(20,  9, 10)
+		vsldoi( 9, HB0, 20, 8)
+		vsldoi(10, 20, HB0, 8)
+		vxor(11, 11, 9)
+		vxor(12, 12, 10)
+
+		/*
+		 * Fix and reduce in GF(2^128); this is the new y (in v28).
+		 */
+		SL_256(11, 12)
+		REDUCE_F128(28, 11, 12)
+
+		/*
+		 * Loop for next group of four blocks.
+		 */
+		bdnz(loop4)
+
+		/*
+		 * Process second chunk, one block at a time.
+		 */
+	label(chunk1)
+		cmpldi(%[num1], 0)
+		beq(done)
+
+		mtctr(%[num1])
+	label(loop1)
+		/*
+		 * Load next data block and XOR it into y.
+		 */
+		lxvw4x(41, 0, %[buf2])
+#if BR_POWER8_LE
+		FIX_ENDIAN(9)
+#endif
+		addi(%[buf2], %[buf2], 16)
+		vxor(9, 28, 9)
+
+		/*
+		 * Split y into doublewords:
+		 *   v9  = y_0:y_1
+		 *   v10 =   0:y_0
+		 *   v11 = y_1:0
+		 */
+		vsldoi(10, HB0, 9, 8)
+		vsldoi(11, 9, HB0, 8)
+
+		/*
+		 * Compute products with h:
+		 *   v12 = y_0 * h_0
+		 *   v13 = y_1 * h_1
+		 *   v14 = y_1 * h_0 + y_0 * h_1
+		 */
+		vpmsumd(14,  9, 17)
+		vpmsumd(12, 10, 18)
+		vpmsumd(13, 11, 19)
+
+		/*
+		 * Propagate v14 into v12:v13 to finalise product.
+		 */
+		vsldoi(10, HB0, 14, 8)
+		vsldoi(11, 14, HB0, 8)
+		vxor(12, 12, 10)
+		vxor(13, 13, 11)
+
+		/*
+		 * Fix result and reduce into v28 (next value for y).
+		 */
+		SL_256(12, 13)
+		REDUCE_F128(28, 12, 13)
+		bdnz(loop1)
+
+	label(done)
+		/*
+		 * Write back the new y.
+		 */
+		FIX_ENDIAN(28)
+		stxvw4x(60, 0, %[y])
+
+: [buf1] "+b" (buf1), [buf2] "+b" (buf2)
+: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1),
+  [cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pwr8_get(void)
+{
+	return &br_ghash_pwr8;
+}
+
+#else
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pwr8_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/hkdf.c b/third_party/bearssl/src/hkdf.c
new file mode 100644
index 0000000..6a36851
--- /dev/null
+++ b/third_party/bearssl/src/hkdf.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+const unsigned char br_hkdf_no_salt = 0;
+
+/* see bearssl_kdf.h */
+void
+br_hkdf_init(br_hkdf_context *hc, const br_hash_class *digest_vtable,
+	const void *salt, size_t salt_len)
+{
+	br_hmac_key_context kc;
+	unsigned char tmp[64];
+
+	if (salt == BR_HKDF_NO_SALT) {
+		salt = tmp;
+		salt_len = br_digest_size(digest_vtable);
+		memset(tmp, 0, salt_len);
+	}
+	br_hmac_key_init(&kc, digest_vtable, salt, salt_len);
+	br_hmac_init(&hc->u.hmac_ctx, &kc, 0);
+	hc->dig_len = br_hmac_size(&hc->u.hmac_ctx);
+}
+
+/* see bearssl_kdf.h */
+void
+br_hkdf_inject(br_hkdf_context *hc, const void *ikm, size_t ikm_len)
+{
+	br_hmac_update(&hc->u.hmac_ctx, ikm, ikm_len);
+}
+
+/* see bearssl_kdf.h */
+void
+br_hkdf_flip(br_hkdf_context *hc)
+{
+	unsigned char tmp[64];
+
+	br_hmac_out(&hc->u.hmac_ctx, tmp);
+	br_hmac_key_init(&hc->u.prk_ctx,
+		br_hmac_get_digest(&hc->u.hmac_ctx), tmp, hc->dig_len);
+	hc->ptr = hc->dig_len;
+	hc->chunk_num = 0;
+}
+
+/* see bearssl_kdf.h */
+size_t
+br_hkdf_produce(br_hkdf_context *hc,
+	const void *info, size_t info_len, void *out, size_t out_len)
+{
+	size_t tlen;
+
+	tlen = 0;
+	while (out_len > 0) {
+		size_t clen;
+
+		if (hc->ptr == hc->dig_len) {
+			br_hmac_context hmac_ctx;
+			unsigned char x;
+
+			hc->chunk_num ++;
+			if (hc->chunk_num == 256) {
+				return tlen;
+			}
+			x = hc->chunk_num;
+			br_hmac_init(&hmac_ctx, &hc->u.prk_ctx, 0);
+			if (x != 1) {
+				br_hmac_update(&hmac_ctx, hc->buf, hc->dig_len);
+			}
+			br_hmac_update(&hmac_ctx, info, info_len);
+			br_hmac_update(&hmac_ctx, &x, 1);
+			br_hmac_out(&hmac_ctx, hc->buf);
+			hc->ptr = 0;
+		}
+		clen = hc->dig_len - hc->ptr;
+		if (clen > out_len) {
+			clen = out_len;
+		}
+		memcpy(out, hc->buf + hc->ptr, clen);
+		out = (unsigned char *)out + clen;
+		out_len -= clen;
+		hc->ptr += clen;
+		tlen += clen;
+	}
+	return tlen;
+}
diff --git a/third_party/bearssl/src/hmac.c b/third_party/bearssl/src/hmac.c
new file mode 100644
index 0000000..b438798
--- /dev/null
+++ b/third_party/bearssl/src/hmac.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline size_t
+block_size(const br_hash_class *dig)
+{
+	unsigned ls;
+	
+	ls = (unsigned)(dig->desc >> BR_HASHDESC_LBLEN_OFF)
+		& BR_HASHDESC_LBLEN_MASK;
+	return (size_t)1 << ls;
+}
+
+static void
+process_key(const br_hash_class **hc, void *ks,
+	const void *key, size_t key_len, unsigned bb)
+{
+	unsigned char tmp[256];
+	size_t blen, u;
+
+	blen = block_size(*hc);
+	memcpy(tmp, key, key_len);
+	for (u = 0; u < key_len; u ++) {
+		tmp[u] ^= (unsigned char)bb;
+	}
+	memset(tmp + key_len, bb, blen - key_len);
+	(*hc)->init(hc);
+	(*hc)->update(hc, tmp, blen);
+	(*hc)->state(hc, ks);
+}
+
+/* see bearssl.h */
+void
+br_hmac_key_init(br_hmac_key_context *kc,
+	const br_hash_class *dig, const void *key, size_t key_len)
+{
+	br_hash_compat_context hc;
+	unsigned char kbuf[64];
+
+	kc->dig_vtable = dig;
+	hc.vtable = dig;
+	if (key_len > block_size(dig)) {
+		dig->init(&hc.vtable);
+		dig->update(&hc.vtable, key, key_len);
+		dig->out(&hc.vtable, kbuf);
+		key = kbuf;
+		key_len = br_digest_size(dig);
+	}
+	process_key(&hc.vtable, kc->ksi, key, key_len, 0x36);
+	process_key(&hc.vtable, kc->kso, key, key_len, 0x5C);
+}
+
+/* see bearssl.h */
+void
+br_hmac_init(br_hmac_context *ctx,
+	const br_hmac_key_context *kc, size_t out_len)
+{
+	const br_hash_class *dig;
+	size_t blen, hlen;
+
+	dig = kc->dig_vtable;
+	blen = block_size(dig);
+	dig->init(&ctx->dig.vtable);
+	dig->set_state(&ctx->dig.vtable, kc->ksi, (uint64_t)blen);
+	memcpy(ctx->kso, kc->kso, sizeof kc->kso);
+	hlen = br_digest_size(dig);
+	if (out_len > 0 && out_len < hlen) {
+		hlen = out_len;
+	}
+	ctx->out_len = hlen;
+}
+
+/* see bearssl.h */
+void
+br_hmac_update(br_hmac_context *ctx, const void *data, size_t len)
+{
+	ctx->dig.vtable->update(&ctx->dig.vtable, data, len);
+}
+
+/* see bearssl.h */
+size_t
+br_hmac_out(const br_hmac_context *ctx, void *out)
+{
+	const br_hash_class *dig;
+	br_hash_compat_context hc;
+	unsigned char tmp[64];
+	size_t blen, hlen;
+
+	dig = ctx->dig.vtable;
+	dig->out(&ctx->dig.vtable, tmp);
+	blen = block_size(dig);
+	dig->init(&hc.vtable);
+	dig->set_state(&hc.vtable, ctx->kso, (uint64_t)blen);
+	hlen = br_digest_size(dig);
+	dig->update(&hc.vtable, tmp, hlen);
+	dig->out(&hc.vtable, tmp);
+	memcpy(out, tmp, ctx->out_len);
+	return ctx->out_len;
+}
diff --git a/third_party/bearssl/src/hmac_ct.c b/third_party/bearssl/src/hmac_ct.c
new file mode 100644
index 0000000..e1c1d80
--- /dev/null
+++ b/third_party/bearssl/src/hmac_ct.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline size_t
+hash_size(const br_hash_class *dig)
+{
+	return (unsigned)(dig->desc >> BR_HASHDESC_OUT_OFF)
+		& BR_HASHDESC_OUT_MASK;
+}
+
+static inline size_t
+block_size(const br_hash_class *dig)
+{
+	unsigned ls;
+	
+	ls = (unsigned)(dig->desc >> BR_HASHDESC_LBLEN_OFF)
+		& BR_HASHDESC_LBLEN_MASK;
+	return (size_t)1 << ls;
+}
+
+/* see bearssl.h */
+size_t
+br_hmac_outCT(const br_hmac_context *ctx,
+	const void *data, size_t len, size_t min_len, size_t max_len,
+	void *out)
+{
+	/*
+	 * Method implemented here is inspired from the descriptions on:
+	 *    https://www.imperialviolet.org/2013/02/04/luckythirteen.html
+	 *
+	 * Principle: we input bytes one by one. We use a MUX to push
+	 * padding bytes instead of data bytes when appropriate. At each
+	 * block limit, we get the current hash function state: this is
+	 * a potential output, since we handle MD padding ourselves.
+	 *
+	 * be     1 for big-endian, 0 for little-endian
+	 * po     minimal MD padding length
+	 * bs     block size (always a power of 2)
+	 * hlen   hash output size
+	 */
+
+	const br_hash_class *dig;
+	br_hash_compat_context hc;
+	int be;
+	uint32_t po, bs;
+	uint32_t kr, km, kl, kz, u;
+	uint64_t count, ncount, bit_len;
+	unsigned char tmp1[64], tmp2[64];
+	size_t hlen;
+
+	/*
+	 * Copy the current hash context.
+	 */
+	hc = ctx->dig;
+
+	/*
+	 * Get function-specific information.
+	 */
+	dig = hc.vtable;
+	be = (dig->desc & BR_HASHDESC_MD_PADDING_BE) != 0;
+	po = 9;
+	if (dig->desc & BR_HASHDESC_MD_PADDING_128) {
+		po += 8;
+	}
+	bs = block_size(dig);
+	hlen = hash_size(dig);
+
+	/*
+	 * Get current input length and compute total bit length.
+	 */
+	count = dig->state(&hc.vtable, tmp1);
+	bit_len = (count + (uint64_t)len) << 3;
+
+	/*
+	 * We can input the blocks that we are sure we will use.
+	 * This offers better performance (no MUX for these blocks)
+	 * and also ensures that the remaining lengths fit on 32 bits.
+	 */
+	ncount = (count + (uint64_t)min_len) & ~(uint64_t)(bs - 1);
+	if (ncount > count) {
+		size_t zlen;
+
+		zlen = (size_t)(ncount - count);
+		dig->update(&hc.vtable, data, zlen);
+		data = (const unsigned char *)data + zlen;
+		len -= zlen;
+		max_len -= zlen;
+		count = ncount;
+	}
+
+	/*
+	 * At that point:
+	 * -- 'count' contains the number of bytes already processed
+	 * (in total).
+	 * -- We must input 'len' bytes. 'min_len' is unimportant: we
+	 * used it to know how many full blocks we could process
+	 * directly. Now only len and max_len matter.
+	 *
+	 * We compute kr, kl, kz and km.
+	 *  kr   number of input bytes already in the current block
+	 *  km   index of the first byte after the end of the last padding
+	 *       block, if length is max_len
+	 *  kz   index of the last byte of the actual last padding block
+	 *  kl   index of the start of the encoded length
+	 *
+	 * km, kz and kl are counted from the current offset in the
+	 * input data.
+	 */
+	kr = (uint32_t)count & (bs - 1);
+	kz = ((kr + (uint32_t)len + po + bs - 1) & ~(bs - 1)) - 1 - kr;
+	kl = kz - 7;
+	km = ((kr + (uint32_t)max_len + po + bs - 1) & ~(bs - 1)) - kr;
+
+	/*
+	 * We must now process km bytes. For index u from 0 to km-1:
+	 *   d is from data[] if u < max_len, 0x00 otherwise
+	 *   e is an encoded length byte or 0x00, depending on u
+	 * The tests for d and e need not be constant-time, since
+	 * they relate only to u and max_len, not to the actual length.
+	 *
+	 * Actual input length is then:
+	 *   d      if u < len
+	 *   0x80   if u == len
+	 *   0x00   if u > len and u < kl
+	 *   e      if u >= kl
+	 *
+	 * Hash state is obtained whenever we reach a full block. This
+	 * is the result we want if and only if u == kz.
+	 */
+	memset(tmp2, 0, sizeof tmp2);
+	for (u = 0; u < km; u ++) {
+		uint32_t v;
+		uint32_t d, e, x0, x1;
+		unsigned char x[1];
+
+		d = (u < max_len) ? ((const unsigned char *)data)[u] : 0x00;
+		v = (kr + u) & (bs - 1);
+		if (v >= (bs - 8)) {
+			unsigned j;
+
+			j = (v - (bs - 8)) << 3;
+			if (be) {
+				e = (uint32_t)(bit_len >> (56 - j));
+			} else {
+				e = (uint32_t)(bit_len >> j);
+			}
+			e &= 0xFF;
+		} else {
+			e = 0x00;
+		}
+		x0 = MUX(EQ(u, (uint32_t)len), 0x80, d);
+		x1 = MUX(LT(u, kl), 0x00, e);
+		x[0] = MUX(LE(u, (uint32_t)len), x0, x1);
+		dig->update(&hc.vtable, x, 1);
+		if (v == (bs - 1)) {
+			dig->state(&hc.vtable, tmp1);
+			CCOPY(EQ(u, kz), tmp2, tmp1, hlen);
+		}
+	}
+
+	/*
+	 * Inner hash output is in tmp2[]; we finish processing.
+	 */
+	dig->init(&hc.vtable);
+	dig->set_state(&hc.vtable, ctx->kso, (uint64_t)bs);
+	dig->update(&hc.vtable, tmp2, hlen);
+	dig->out(&hc.vtable, tmp2);
+	memcpy(out, tmp2, ctx->out_len);
+	return ctx->out_len;
+}
diff --git a/third_party/bearssl/src/hmac_drbg.c b/third_party/bearssl/src/hmac_drbg.c
new file mode 100644
index 0000000..d746756
--- /dev/null
+++ b/third_party/bearssl/src/hmac_drbg.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_hmac_drbg_init(br_hmac_drbg_context *ctx,
+	const br_hash_class *digest_class, const void *seed, size_t len)
+{
+	size_t hlen;
+
+	ctx->vtable = &br_hmac_drbg_vtable;
+	hlen = br_digest_size(digest_class);
+	memset(ctx->K, 0x00, hlen);
+	memset(ctx->V, 0x01, hlen);
+	ctx->digest_class = digest_class;
+	br_hmac_drbg_update(ctx, seed, len);
+}
+
+/* see bearssl.h */
+void
+br_hmac_drbg_generate(br_hmac_drbg_context *ctx, void *out, size_t len)
+{
+	const br_hash_class *dig;
+	br_hmac_key_context kc;
+	br_hmac_context hc;
+	size_t hlen;
+	unsigned char *buf;
+	unsigned char x;
+
+	dig = ctx->digest_class;
+	hlen = br_digest_size(dig);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+	buf = out;
+	while (len > 0) {
+		size_t clen;
+
+		br_hmac_init(&hc, &kc, 0);
+		br_hmac_update(&hc, ctx->V, hlen);
+		br_hmac_out(&hc, ctx->V);
+		clen = hlen;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(buf, ctx->V, clen);
+		buf += clen;
+		len -= clen;
+	}
+
+	/*
+	 * To prepare the state for the next request, we should call
+	 * br_hmac_drbg_update() with an empty additional seed. However,
+	 * we already have an initialized HMAC context with the right
+	 * initial key, and we don't want to push another one on the
+	 * stack, so we inline that update() call here.
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	x = 0x00;
+	br_hmac_update(&hc, &x, 1);
+	br_hmac_out(&hc, ctx->K);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	br_hmac_out(&hc, ctx->V);
+}
+
+/* see bearssl.h */
+void
+br_hmac_drbg_update(br_hmac_drbg_context *ctx, const void *seed, size_t len)
+{
+	const br_hash_class *dig;
+	br_hmac_key_context kc;
+	br_hmac_context hc;
+	size_t hlen;
+	unsigned char x;
+
+	dig = ctx->digest_class;
+	hlen = br_digest_size(dig);
+
+	/*
+	 * 1. K = HMAC(K, V || 0x00 || seed)
+	 */
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	x = 0x00;
+	br_hmac_update(&hc, &x, 1);
+	br_hmac_update(&hc, seed, len);
+	br_hmac_out(&hc, ctx->K);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+
+	/*
+	 * 2. V = HMAC(K, V)
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	br_hmac_out(&hc, ctx->V);
+
+	/*
+	 * 3. If the additional seed is empty, then stop here.
+	 */
+	if (len == 0) {
+		return;
+	}
+
+	/*
+	 * 4. K = HMAC(K, V || 0x01 || seed)
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	x = 0x01;
+	br_hmac_update(&hc, &x, 1);
+	br_hmac_update(&hc, seed, len);
+	br_hmac_out(&hc, ctx->K);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+
+	/*
+	 * 5. V = HMAC(K, V)
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	br_hmac_out(&hc, ctx->V);
+}
+
+/* see bearssl.h */
+const br_prng_class br_hmac_drbg_vtable = {
+	sizeof(br_hmac_drbg_context),
+	(void (*)(const br_prng_class **, const void *, const void *, size_t))
+		&br_hmac_drbg_init,
+	(void (*)(const br_prng_class **, void *, size_t))
+		&br_hmac_drbg_generate,
+	(void (*)(const br_prng_class **, const void *, size_t))
+		&br_hmac_drbg_update
+};
diff --git a/third_party/bearssl/src/i15_add.c b/third_party/bearssl/src/i15_add.c
new file mode 100644
index 0000000..97e29b8
--- /dev/null
+++ b/third_party/bearssl/src/i15_add.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 31) >> 4;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+		cc = naw >> 15;
+		a[u] = MUX(ctl, naw & 0x7FFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i15_bitlen.c b/third_party/bearssl/src/i15_bitlen.c
new file mode 100644
index 0000000..ad74467
--- /dev/null
+++ b/third_party/bearssl/src/i15_bitlen.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_bit_length(uint16_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 4) + BIT_LENGTH(tw);
+}
diff --git a/third_party/bearssl/src/i15_decmod.c b/third_party/bearssl/src/i15_decmod.c
new file mode 100644
index 0000000..6076c57
--- /dev/null
+++ b/third_party/bearssl/src/i15_decmod.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_decode_mod(uint16_t *x, const void *src, size_t len, const uint16_t *m)
+{
+	/*
+	 * Two-pass algorithm: in the first pass, we determine whether the
+	 * value fits; in the second pass, we do the actual write.
+	 *
+	 * During the first pass, 'r' contains the comparison result so
+	 * far:
+	 *  0x00000000   value is equal to the modulus
+	 *  0x00000001   value is greater than the modulus
+	 *  0xFFFFFFFF   value is lower than the modulus
+	 *
+	 * Since we iterate starting with the least significant bytes (at
+	 * the end of src[]), each new comparison overrides the previous
+	 * except when the comparison yields 0 (equal).
+	 *
+	 * During the second pass, 'r' is either 0xFFFFFFFF (value fits)
+	 * or 0x00000000 (value does not fit).
+	 *
+	 * We must iterate over all bytes of the source, _and_ possibly
+	 * some extra virtual bytes (with value 0) so as to cover the
+	 * complete modulus as well. We also add 4 such extra bytes beyond
+	 * the modulus length because it then guarantees that no accumulated
+	 * partial word remains to be processed.
+	 */
+	const unsigned char *buf;
+	size_t mlen, tlen;
+	int pass;
+	uint32_t r;
+
+	buf = src;
+	mlen = (m[0] + 15) >> 4;
+	tlen = (mlen << 1);
+	if (tlen < len) {
+		tlen = len;
+	}
+	tlen += 4;
+	r = 0;
+	for (pass = 0; pass < 2; pass ++) {
+		size_t u, v;
+		uint32_t acc;
+		int acc_len;
+
+		v = 1;
+		acc = 0;
+		acc_len = 0;
+		for (u = 0; u < tlen; u ++) {
+			uint32_t b;
+
+			if (u < len) {
+				b = buf[len - 1 - u];
+			} else {
+				b = 0;
+			}
+			acc |= (b << acc_len);
+			acc_len += 8;
+			if (acc_len >= 15) {
+				uint32_t xw;
+
+				xw = acc & (uint32_t)0x7FFF;
+				acc_len -= 15;
+				acc = b >> (8 - acc_len);
+				if (v <= mlen) {
+					if (pass) {
+						x[v] = r & xw;
+					} else {
+						uint32_t cc;
+
+						cc = (uint32_t)CMP(xw, m[v]);
+						r = MUX(EQ(cc, 0), r, cc);
+					}
+				} else {
+					if (!pass) {
+						r = MUX(EQ(xw, 0), r, 1);
+					}
+				}
+				v ++;
+			}
+		}
+
+		/*
+		 * When we reach this point at the end of the first pass:
+		 * r is either 0, 1 or -1; we want to set r to 0 if it
+		 * is equal to 0 or 1, and leave it to -1 otherwise.
+		 *
+		 * When we reach this point at the end of the second pass:
+		 * r is either 0 or -1; we want to leave that value
+		 * untouched. This is a subcase of the previous.
+		 */
+		r >>= 1;
+		r |= (r << 1);
+	}
+
+	x[0] = m[0];
+	return r & (uint32_t)1;
+}
diff --git a/third_party/bearssl/src/i15_decode.c b/third_party/bearssl/src/i15_decode.c
new file mode 100644
index 0000000..fc2c0be
--- /dev/null
+++ b/third_party/bearssl/src/i15_decode.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_decode(uint16_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t v;
+	uint32_t acc;
+	int acc_len;
+
+	buf = src;
+	v = 1;
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = buf[len];
+		acc |= (b << acc_len);
+		acc_len += 8;
+		if (acc_len >= 15) {
+			x[v ++] = acc & 0x7FFF;
+			acc_len -= 15;
+			acc >>= 15;
+		}
+	}
+	if (acc_len != 0) {
+		x[v ++] = acc;
+	}
+	x[0] = br_i15_bit_length(x + 1, v - 1);
+}
diff --git a/third_party/bearssl/src/i15_decred.c b/third_party/bearssl/src/i15_decred.c
new file mode 100644
index 0000000..81e7dd1
--- /dev/null
+++ b/third_party/bearssl/src/i15_decred.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_decode_reduce(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m)
+{
+	uint32_t m_ebitlen, m_rbitlen;
+	size_t mblen, k;
+	const unsigned char *buf;
+	uint32_t acc;
+	int acc_len;
+
+	/*
+	 * Get the encoded bit length.
+	 */
+	m_ebitlen = m[0];
+
+	/*
+	 * Special case for an invalid (null) modulus.
+	 */
+	if (m_ebitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i15_zero(x, m_ebitlen);
+
+	/*
+	 * First decode directly as many bytes as possible. This requires
+	 * computing the actual bit length.
+	 */
+	m_rbitlen = m_ebitlen >> 4;
+	m_rbitlen = (m_ebitlen & 15) + (m_rbitlen << 4) - m_rbitlen;
+	mblen = (m_rbitlen + 7) >> 3;
+	k = mblen - 1;
+	if (k >= len) {
+		br_i15_decode(x, src, len);
+		x[0] = m_ebitlen;
+		return;
+	}
+	buf = src;
+	br_i15_decode(x, buf, k);
+	x[0] = m_ebitlen;
+
+	/*
+	 * Input remaining bytes, using 15-bit words.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (k < len) {
+		uint32_t v;
+
+		v = buf[k ++];
+		acc = (acc << 8) | v;
+		acc_len += 8;
+		if (acc_len >= 15) {
+			br_i15_muladd_small(x, acc >> (acc_len - 15), m);
+			acc_len -= 15;
+			acc &= ~((uint32_t)-1 << acc_len);
+		}
+	}
+
+	/*
+	 * We may have some bits accumulated. We then perform a shift to
+	 * be able to inject these bits as a full 15-bit word.
+	 */
+	if (acc_len != 0) {
+		acc = (acc | (x[1] << acc_len)) & 0x7FFF;
+		br_i15_rshift(x, 15 - acc_len);
+		br_i15_muladd_small(x, acc, m);
+	}
+}
diff --git a/third_party/bearssl/src/i15_encode.c b/third_party/bearssl/src/i15_encode.c
new file mode 100644
index 0000000..50668f4
--- /dev/null
+++ b/third_party/bearssl/src/i15_encode.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_encode(void *dst, size_t len, const uint16_t *x)
+{
+	unsigned char *buf;
+	size_t u, xlen;
+	uint32_t acc;
+	int acc_len;
+
+	xlen = (x[0] + 15) >> 4;
+	if (xlen == 0) {
+		memset(dst, 0, len);
+		return;
+	}
+	u = 1;
+	acc = 0;
+	acc_len = 0;
+	buf = dst;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			if (u <= xlen) {
+				acc += (uint32_t)x[u ++] << acc_len;
+			}
+			acc_len += 15;
+		}
+		buf[len] = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
diff --git a/third_party/bearssl/src/i15_fmont.c b/third_party/bearssl/src/i15_fmont.c
new file mode 100644
index 0000000..3450b72
--- /dev/null
+++ b/third_party/bearssl/src/i15_fmont.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 15) >> 4;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, cc;
+
+		f = MUL15(x[1], m0i) & 0x7FFF;
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint32_t z;
+
+			z = (uint32_t)x[v + 1] + MUL15(f, m[v + 1]) + cc;
+			cc = z >> 15;
+			if (v != 0) {
+				x[v] = z & 0x7FFF;
+			}
+		}
+		x[len] = cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i15_sub(x, m, NOT(br_i15_sub(x, m, 0)));
+}
diff --git a/third_party/bearssl/src/i15_iszero.c b/third_party/bearssl/src/i15_iszero.c
new file mode 100644
index 0000000..d4b6f10
--- /dev/null
+++ b/third_party/bearssl/src/i15_iszero.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_iszero(const uint16_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 15) >> 4; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
diff --git a/third_party/bearssl/src/i15_moddiv.c b/third_party/bearssl/src/i15_moddiv.c
new file mode 100644
index 0000000..45af756
--- /dev/null
+++ b/third_party/bearssl/src/i15_moddiv.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * In this file, we handle big integers with a custom format, i.e.
+ * without the usual one-word header. Value is split into 15-bit words,
+ * each stored in a 16-bit slot (top bit is zero) in little-endian
+ * order. The length (in words) is provided explicitly. In some cases,
+ * the value can be negative (using two's complement representation). In
+ * some cases, the top word is allowed to have a 16th bit.
+ */
+
+/*
+ * Negate big integer conditionally. The value consists of 'len' words,
+ * with 15 bits in each word (the top bit of each word should be 0,
+ * except possibly for the last word). If 'ctl' is 1, the negation is
+ * computed; otherwise, if 'ctl' is 0, then the value is unchanged.
+ */
+static void
+cond_negate(uint16_t *a, size_t len, uint32_t ctl)
+{
+	size_t k;
+	uint32_t cc, xm;
+
+	cc = ctl;
+	xm = 0x7FFF & -ctl;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw;
+
+		aw = a[k];
+		aw = (aw ^ xm) + cc;
+		a[k] = aw & 0x7FFF;
+		cc = (aw >> 15) & 1;
+	}
+}
+
+/*
+ * Finish modular reduction. Rules on input parameters:
+ *
+ *   if neg = 1, then -m <= a < 0
+ *   if neg = 0, then 0 <= a < 2*m
+ *
+ * If neg = 0, then the top word of a[] may use 16 bits.
+ *
+ * Also, modulus m must be odd.
+ */
+static void
+finish_mod(uint16_t *a, size_t len, const uint16_t *m, uint32_t neg)
+{
+	size_t k;
+	uint32_t cc, xm, ym;
+
+	/*
+	 * First pass: compare a (assumed nonnegative) with m.
+	 */
+	cc = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = m[k];
+		cc = (aw - mw - cc) >> 31;
+	}
+
+	/*
+	 * At this point:
+	 *   if neg = 1, then we must add m (regardless of cc)
+	 *   if neg = 0 and cc = 0, then we must subtract m
+	 *   if neg = 0 and cc = 1, then we must do nothing
+	 */
+	xm = 0x7FFF & -neg;
+	ym = -(neg | (1 - cc));
+	cc = neg;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = (m[k] ^ xm) & ym;
+		aw = aw - mw - cc;
+		a[k] = aw & 0x7FFF;
+		cc = aw >> 31;
+	}
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^15)
+ *   b <- (a*qa+b*qb)/(2^15)
+ * The division is assumed to be exact (i.e. the low word is dropped).
+ * If the final a is negative, then it is negated. Similarly for b.
+ * Returned value is the combination of two bits:
+ *   bit 0: 1 if a had to be negated, 0 otherwise
+ *   bit 1: 1 if b had to be negated, 0 otherwise
+ *
+ * Factors pa, pb, qa and qb must be at most 2^15 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 16th bit.
+ */
+static uint32_t
+co_reduce(uint16_t *a, uint16_t *b, size_t len,
+	int32_t pa, int32_t pb, int32_t qa, int32_t qb)
+{
+	size_t k;
+	int32_t cca, ccb;
+	uint32_t nega, negb;
+
+	cca = 0;
+	ccb = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb, za, zb;
+		uint16_t tta, ttb;
+
+		/*
+		 * Since:
+		 *   |pa| <= 2^15
+		 *   |pb| <= 2^15
+		 *   0 <= wa <= 2^15 - 1
+		 *   0 <= wb <= 2^15 - 1
+		 *   |cca| <= 2^16 - 1
+		 * Then:
+		 *   |za| <= (2^15-1)*(2^16) + (2^16-1) = 2^31 - 1
+		 *
+		 * Thus, the new value of cca is such that |cca| <= 2^16 - 1.
+		 * The same applies to ccb.
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint32_t)pa + wb * (uint32_t)pb + (uint32_t)cca;
+		zb = wa * (uint32_t)qa + wb * (uint32_t)qb + (uint32_t)ccb;
+		if (k > 0) {
+			a[k - 1] = za & 0x7FFF;
+			b[k - 1] = zb & 0x7FFF;
+		}
+		tta = za >> 15;
+		ttb = zb >> 15;
+		cca = *(int16_t *)&tta;
+		ccb = *(int16_t *)&ttb;
+	}
+	a[len - 1] = (uint16_t)cca;
+	b[len - 1] = (uint16_t)ccb;
+	nega = (uint32_t)cca >> 31;
+	negb = (uint32_t)ccb >> 31;
+	cond_negate(a, len, nega);
+	cond_negate(b, len, negb);
+	return nega | (negb << 1);
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^15) mod m
+ *   b <- (a*qa+b*qb)/(2^15) mod m
+ *
+ * m0i is equal to -1/m[0] mod 2^15.
+ *
+ * Factors pa, pb, qa and qb must be at most 2^15 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 16th bit.
+ */
+static void
+co_reduce_mod(uint16_t *a, uint16_t *b, size_t len,
+	int32_t pa, int32_t pb, int32_t qa, int32_t qb,
+	const uint16_t *m, uint16_t m0i)
+{
+	size_t k;
+	int32_t cca, ccb, fa, fb;
+
+	cca = 0;
+	ccb = 0;
+	fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFF;
+	fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFF;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb, za, zb;
+		uint32_t tta, ttb;
+
+		/*
+		 * In this loop, carries 'cca' and 'ccb' always fit on
+		 * 17 bits (in absolute value).
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint32_t)pa + wb * (uint32_t)pb
+			+ m[k] * (uint32_t)fa + (uint32_t)cca;
+		zb = wa * (uint32_t)qa + wb * (uint32_t)qb
+			+ m[k] * (uint32_t)fb + (uint32_t)ccb;
+		if (k > 0) {
+			a[k - 1] = za & 0x7FFF;
+			b[k - 1] = zb & 0x7FFF;
+		}
+
+		/*
+		 * The XOR-and-sub construction below does an arithmetic
+		 * right shift in a portable way (technically, right-shifting
+		 * a negative signed value is implementation-defined in C).
+		 */
+#define M   ((uint32_t)1 << 16)
+		tta = za >> 15;
+		ttb = zb >> 15;
+		tta = (tta ^ M) - M;
+		ttb = (ttb ^ M) - M;
+		cca = *(int32_t *)&tta;
+		ccb = *(int32_t *)&ttb;
+#undef M
+	}
+	a[len - 1] = (uint32_t)cca;
+	b[len - 1] = (uint32_t)ccb;
+
+	/*
+	 * At this point:
+	 *   -m <= a < 2*m
+	 *   -m <= b < 2*m
+	 * (this is a case of Montgomery reduction)
+	 * The top word of 'a' and 'b' may have a 16-th bit set.
+	 * We may have to add or subtract the modulus.
+	 */
+	finish_mod(a, len, m, (uint32_t)cca >> 31);
+	finish_mod(b, len, m, (uint32_t)ccb >> 31);
+}
+
+/* see inner.h */
+uint32_t
+br_i15_moddiv(uint16_t *x, const uint16_t *y, const uint16_t *m, uint16_t m0i,
+	uint16_t *t)
+{
+	/*
+	 * Algorithm is an extended binary GCD. We maintain four values
+	 * a, b, u and v, with the following invariants:
+	 *
+	 *   a * x = y * u mod m
+	 *   b * x = y * v mod m
+	 *
+	 * Starting values are:
+	 *
+	 *   a = y
+	 *   b = m
+	 *   u = x
+	 *   v = 0
+	 *
+	 * The formal definition of the algorithm is a sequence of steps:
+	 *
+	 *   - If a is even, then a <- a/2 and u <- u/2 mod m.
+	 *   - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m.
+	 *   - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m.
+	 *   - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m.
+	 *
+	 * Algorithm stops when a = b. At that point, they both are equal
+	 * to GCD(y,m); the modular division succeeds if that value is 1.
+	 * The result of the modular division is then u (or v: both are
+	 * equal at that point).
+	 *
+	 * Each step makes either a or b shrink by at least one bit; hence,
+	 * if m has bit length k bits, then 2k-2 steps are sufficient.
+	 *
+	 *
+	 * Though complexity is quadratic in the size of m, the bit-by-bit
+	 * processing is not very efficient. We can speed up processing by
+	 * remarking that the decisions are taken based only on observation
+	 * of the top and low bits of a and b.
+	 *
+	 * In the loop below, at each iteration, we use the two top words
+	 * of a and b, and the low words of a and b, to compute reduction
+	 * parameters pa, pb, qa and qb such that the new values for a
+	 * and b are:
+	 *
+	 *   a' = (a*pa + b*pb) / (2^15)
+	 *   b' = (a*qa + b*qb) / (2^15)
+	 *
+	 * the division being exact.
+	 *
+	 * Since the choices are based on the top words, they may be slightly
+	 * off, requiring an optional correction: if a' < 0, then we replace
+	 * pa with -pa, and pb with -pb. The total length of a and b is
+	 * thus reduced by at least 14 bits at each iteration.
+	 *
+	 * The stopping conditions are still the same, though: when a
+	 * and b become equal, they must be both odd (since m is odd,
+	 * the GCD cannot be even), therefore the next operation is a
+	 * subtraction, and one of the values becomes 0. At that point,
+	 * nothing else happens, i.e. one value is stuck at 0, and the
+	 * other one is the GCD.
+	 */
+	size_t len, k;
+	uint16_t *a, *b, *u, *v;
+	uint32_t num, r;
+
+	len = (m[0] + 15) >> 4;
+	a = t;
+	b = a + len;
+	u = x + 1;
+	v = b + len;
+	memcpy(a, y + 1, len * sizeof *y);
+	memcpy(b, m + 1, len * sizeof *m);
+	memset(v, 0, len * sizeof *v);
+
+	/*
+	 * Loop below ensures that a and b are reduced by some bits each,
+	 * for a total of at least 14 bits.
+	 */
+	for (num = ((m[0] - (m[0] >> 4)) << 1) + 14; num >= 14; num -= 14) {
+		size_t j;
+		uint32_t c0, c1;
+		uint32_t a0, a1, b0, b1;
+		uint32_t a_hi, b_hi, a_lo, b_lo;
+		int32_t pa, pb, qa, qb;
+		int i;
+
+		/*
+		 * Extract top words of a and b. If j is the highest
+		 * index >= 1 such that a[j] != 0 or b[j] != 0, then we want
+		 * (a[j] << 15) + a[j - 1], and (b[j] << 15) + b[j - 1].
+		 * If a and b are down to one word each, then we use a[0]
+		 * and b[0].
+		 */
+		c0 = (uint32_t)-1;
+		c1 = (uint32_t)-1;
+		a0 = 0;
+		a1 = 0;
+		b0 = 0;
+		b1 = 0;
+		j = len;
+		while (j -- > 0) {
+			uint32_t aw, bw;
+
+			aw = a[j];
+			bw = b[j];
+			a0 ^= (a0 ^ aw) & c0;
+			a1 ^= (a1 ^ aw) & c1;
+			b0 ^= (b0 ^ bw) & c0;
+			b1 ^= (b1 ^ bw) & c1;
+			c1 = c0;
+			c0 &= (((aw | bw) + 0xFFFF) >> 16) - (uint32_t)1;
+		}
+
+		/*
+		 * If c1 = 0, then we grabbed two words for a and b.
+		 * If c1 != 0 but c0 = 0, then we grabbed one word. It
+		 * is not possible that c1 != 0 and c0 != 0, because that
+		 * would mean that both integers are zero.
+		 */
+		a1 |= a0 & c1;
+		a0 &= ~c1;
+		b1 |= b0 & c1;
+		b0 &= ~c1;
+		a_hi = (a0 << 15) + a1;
+		b_hi = (b0 << 15) + b1;
+		a_lo = a[0];
+		b_lo = b[0];
+
+		/*
+		 * Compute reduction factors:
+		 *
+		 *   a' = a*pa + b*pb
+		 *   b' = a*qa + b*qb
+		 *
+		 * such that a' and b' are both multiple of 2^15, but are
+		 * only marginally larger than a and b.
+		 */
+		pa = 1;
+		pb = 0;
+		qa = 0;
+		qb = 1;
+		for (i = 0; i < 15; i ++) {
+			/*
+			 * At each iteration:
+			 *
+			 *   a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi
+			 *   b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi
+			 *   a <- a/2 if: a is even
+			 *   b <- b/2 if: a is odd, b is even
+			 *
+			 * We multiply a_lo and b_lo by 2 at each
+			 * iteration, thus a division by 2 really is a
+			 * non-multiplication by 2.
+			 */
+			uint32_t r, oa, ob, cAB, cBA, cA;
+
+			/*
+			 * cAB = 1 if b must be subtracted from a
+			 * cBA = 1 if a must be subtracted from b
+			 * cA = 1 if a is divided by 2, 0 otherwise
+			 *
+			 * Rules:
+			 *
+			 *   cAB and cBA cannot be both 1.
+			 *   if a is not divided by 2, b is.
+			 */
+			r = GT(a_hi, b_hi);
+			oa = (a_lo >> i) & 1;
+			ob = (b_lo >> i) & 1;
+			cAB = oa & ob & r;
+			cBA = oa & ob & NOT(r);
+			cA = cAB | NOT(oa);
+
+			/*
+			 * Conditional subtractions.
+			 */
+			a_lo -= b_lo & -cAB;
+			a_hi -= b_hi & -cAB;
+			pa -= qa & -(int32_t)cAB;
+			pb -= qb & -(int32_t)cAB;
+			b_lo -= a_lo & -cBA;
+			b_hi -= a_hi & -cBA;
+			qa -= pa & -(int32_t)cBA;
+			qb -= pb & -(int32_t)cBA;
+
+			/*
+			 * Shifting.
+			 */
+			a_lo += a_lo & (cA - 1);
+			pa += pa & ((int32_t)cA - 1);
+			pb += pb & ((int32_t)cA - 1);
+			a_hi ^= (a_hi ^ (a_hi >> 1)) & -cA;
+			b_lo += b_lo & -cA;
+			qa += qa & -(int32_t)cA;
+			qb += qb & -(int32_t)cA;
+			b_hi ^= (b_hi ^ (b_hi >> 1)) & (cA - 1);
+		}
+
+		/*
+		 * Replace a and b with new values a' and b'.
+		 */
+		r = co_reduce(a, b, len, pa, pb, qa, qb);
+		pa -= pa * ((r & 1) << 1);
+		pb -= pb * ((r & 1) << 1);
+		qa -= qa * (r & 2);
+		qb -= qb * (r & 2);
+		co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i);
+	}
+
+	/*
+	 * Now one of the arrays should be 0, and the other contains
+	 * the GCD. If a is 0, then u is 0 as well, and v contains
+	 * the division result.
+	 * Result is correct if and only if GCD is 1.
+	 */
+	r = (a[0] | b[0]) ^ 1;
+	u[0] |= v[0];
+	for (k = 1; k < len; k ++) {
+		r |= a[k] | b[k];
+		u[k] |= v[k];
+	}
+	return EQ0(r);
+}
diff --git a/third_party/bearssl/src/i15_modpow.c b/third_party/bearssl/src/i15_modpow.c
new file mode 100644
index 0000000..9bf304e
--- /dev/null
+++ b/third_party/bearssl/src/i15_modpow.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_modpow(uint16_t *x,
+	const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2)
+{
+	size_t mlen;
+	unsigned k;
+
+	mlen = ((m[0] + 31) >> 4) * sizeof m[0];
+	memcpy(t1, x, mlen);
+	br_i15_to_monty(t1, m);
+	br_i15_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((unsigned)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i15_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i15_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
diff --git a/third_party/bearssl/src/i15_modpow2.c b/third_party/bearssl/src/i15_modpow2.c
new file mode 100644
index 0000000..4b32118
--- /dev/null
+++ b/third_party/bearssl/src/i15_modpow2.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_modpow_opt(uint16_t *x,
+	const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen)
+{
+	size_t mlen, mwlen;
+	uint16_t *t1, *t2, *base;
+	size_t u, v;
+	uint32_t acc;
+	int acc_len, win_len;
+
+	/*
+	 * Get modulus size.
+	 */
+	mwlen = (m[0] + 31) >> 4;
+	mlen = mwlen * sizeof m[0];
+	mwlen += (mwlen & 1);
+	t1 = tmp;
+	t2 = tmp + mwlen;
+
+	/*
+	 * Compute possible window size, with a maximum of 5 bits.
+	 * When the window has size 1 bit, we use a specific code
+	 * that requires only two temporaries. Otherwise, for a
+	 * window of k bits, we need 2^k+1 temporaries.
+	 */
+	if (twlen < (mwlen << 1)) {
+		return 0;
+	}
+	for (win_len = 5; win_len > 1; win_len --) {
+		if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) {
+			break;
+		}
+	}
+
+	/*
+	 * Everything is done in Montgomery representation.
+	 */
+	br_i15_to_monty(x, m);
+
+	/*
+	 * Compute window contents. If the window has size one bit only,
+	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
+	 * t2[k] is set to x^k (for k >= 1).
+	 */
+	if (win_len == 1) {
+		memcpy(t2, x, mlen);
+	} else {
+		memcpy(t2 + mwlen, x, mlen);
+		base = t2 + mwlen;
+		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
+			br_i15_montymul(base + mwlen, base, x, m, m0i);
+			base += mwlen;
+		}
+	}
+
+	/*
+	 * We need to set x to 1, in Montgomery representation. This can
+	 * be done efficiently by setting the high word to 1, then doing
+	 * one word-sized shift.
+	 */
+	br_i15_zero(x, m[0]);
+	x[(m[0] + 15) >> 4] = 1;
+	br_i15_muladd_small(x, 0, m);
+
+	/*
+	 * We process bits from most to least significant. At each
+	 * loop iteration, we have acc_len bits in acc.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (acc_len > 0 || elen > 0) {
+		int i, k;
+		uint32_t bits;
+
+		/*
+		 * Get the next bits.
+		 */
+		k = win_len;
+		if (acc_len < win_len) {
+			if (elen > 0) {
+				acc = (acc << 8) | *e ++;
+				elen --;
+				acc_len += 8;
+			} else {
+				k = acc_len;
+			}
+		}
+		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
+		acc_len -= k;
+
+		/*
+		 * We could get exactly k bits. Compute k squarings.
+		 */
+		for (i = 0; i < k; i ++) {
+			br_i15_montymul(t1, x, x, m, m0i);
+			memcpy(x, t1, mlen);
+		}
+
+		/*
+		 * Window lookup: we want to set t2 to the window
+		 * lookup value, assuming the bits are non-zero. If
+		 * the window length is 1 bit only, then t2 is
+		 * already set; otherwise, we do a constant-time lookup.
+		 */
+		if (win_len > 1) {
+			br_i15_zero(t2, m[0]);
+			base = t2 + mwlen;
+			for (u = 1; u < ((uint32_t)1 << k); u ++) {
+				uint32_t mask;
+
+				mask = -EQ(u, bits);
+				for (v = 1; v < mwlen; v ++) {
+					t2[v] |= mask & base[v];
+				}
+				base += mwlen;
+			}
+		}
+
+		/*
+		 * Multiply with the looked-up value. We keep the
+		 * product only if the exponent bits are not all-zero.
+		 */
+		br_i15_montymul(t1, x, t2, m, m0i);
+		CCOPY(NEQ(bits, 0), x, t1, mlen);
+	}
+
+	/*
+	 * Convert back from Montgomery representation, and exit.
+	 */
+	br_i15_from_monty(x, m, m0i);
+	return 1;
+}
diff --git a/third_party/bearssl/src/i15_montmul.c b/third_party/bearssl/src/i15_montmul.c
new file mode 100644
index 0000000..e98bc32
--- /dev/null
+++ b/third_party/bearssl/src/i15_montmul.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i)
+{
+	size_t len, len4, u, v;
+	uint32_t dh;
+
+	len = (m[0] + 15) >> 4;
+	len4 = len & ~(size_t)3;
+	br_i15_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, xu, r, zh;
+
+		xu = x[u + 1];
+		f = MUL15((d[1] + MUL15(x[u + 1], y[1])) & 0x7FFF, m0i)
+			& 0x7FFF;
+#if BR_ARMEL_CORTEXM_GCC
+		if (len4 != 0) {
+			uint16_t *limit;
+
+			limit = d + len4;
+			asm volatile (
+"\n\
+	@ carry: r=r2                                              \n\
+	@ multipliers: xu=r3 f=r4                                  \n\
+	@ base registers: d+v=r5 y+v=r6 m+v=r7                     \n\
+	@ r8 contains 0x7FFF                                       \n\
+	@ r9 contains d+len4                                       \n\
+	ldr	r0, %[limit]                                       \n\
+	ldr	r3, %[xu]                                          \n\
+	mov	r9, r0                                             \n\
+	ldr	r4, %[f]                                           \n\
+	eor	r2, r2                                             \n\
+	ldr	r5, %[d]                                           \n\
+	sub	r1, r2, #1                                         \n\
+	ldr	r6, %[y]                                           \n\
+	lsr	r1, r1, #17                                        \n\
+	ldr	r7, %[m]                                           \n\
+	mov	r8, r1                                             \n\
+loop%=:                                                            \n\
+	ldrh	r0, [r6, #2]                                       \n\
+	ldrh	r1, [r7, #2]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #2]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #0]                                       \n\
+		                                                   \n\
+	ldrh	r0, [r6, #4]                                       \n\
+	ldrh	r1, [r7, #4]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #4]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #2]                                       \n\
+		                                                   \n\
+	ldrh	r0, [r6, #6]                                       \n\
+	ldrh	r1, [r7, #6]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #6]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #4]                                       \n\
+		                                                   \n\
+	ldrh	r0, [r6, #8]                                       \n\
+	ldrh	r1, [r7, #8]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #8]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #6]                                       \n\
+		                                                   \n\
+	add	r5, r5, #8                                         \n\
+	add	r6, r6, #8                                         \n\
+	add	r7, r7, #8                                         \n\
+	cmp	r5, r9                                             \n\
+	bne	loop%=                                             \n\
+		                                                   \n\
+	str	r2, %[carry]                                       \n\
+"
+: [carry] "=m" (r)
+: [xu] "m" (xu), [f] "m" (f), [d] "m" (d), [y] "m" (y),
+	[m] "m" (m), [limit] "m" (limit)
+: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" );
+		} else {
+			r = 0;
+		}
+		v = len4;
+#else
+		r = 0;
+		for (v = 0; v < len4; v += 4) {
+			uint32_t z;
+
+			z = d[v + 1] + MUL15(xu, y[v + 1])
+				+ MUL15(f, m[v + 1]) + r;
+			r = z >> 15;
+			d[v + 0] = z & 0x7FFF;
+			z = d[v + 2] + MUL15(xu, y[v + 2])
+				+ MUL15(f, m[v + 2]) + r;
+			r = z >> 15;
+			d[v + 1] = z & 0x7FFF;
+			z = d[v + 3] + MUL15(xu, y[v + 3])
+				+ MUL15(f, m[v + 3]) + r;
+			r = z >> 15;
+			d[v + 2] = z & 0x7FFF;
+			z = d[v + 4] + MUL15(xu, y[v + 4])
+				+ MUL15(f, m[v + 4]) + r;
+			r = z >> 15;
+			d[v + 3] = z & 0x7FFF;
+		}
+#endif
+		for (; v < len; v ++) {
+			uint32_t z;
+
+			z = d[v + 1] + MUL15(xu, y[v + 1])
+				+ MUL15(f, m[v + 1]) + r;
+			r = z >> 15;
+			d[v + 0] = z & 0x7FFF;
+		}
+
+		zh = dh + r;
+		d[len] = zh & 0x7FFF;
+		dh = zh >> 15;
+	}
+
+	/*
+	 * Restore the bit length (it was overwritten in the loop above).
+	 */
+	d[0] = m[0];
+
+	/*
+	 * d[] may be greater than m[], but it is still lower than twice
+	 * the modulus.
+	 */
+	br_i15_sub(d, m, NEQ(dh, 0) | NOT(br_i15_sub(d, m, 0)));
+}
diff --git a/third_party/bearssl/src/i15_mulacc.c b/third_party/bearssl/src/i15_mulacc.c
new file mode 100644
index 0000000..7a073ac
--- /dev/null
+++ b/third_party/bearssl/src/i15_mulacc.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	size_t alen, blen, u;
+	unsigned dl, dh;
+
+	alen = (a[0] + 15) >> 4;
+	blen = (b[0] + 15) >> 4;
+
+	/*
+	 * Announced bit length of d[] will be the sum of the announced
+	 * bit lengths of a[] and b[]; but the lengths are encoded.
+	 */
+	dl = (a[0] & 15) + (b[0] & 15);
+	dh = (a[0] >> 4) + (b[0] >> 4);
+	d[0] = (dh << 4) + dl + (~(uint32_t)(dl - 15) >> 31);
+
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+		uint32_t cc;
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint32_t z;
+
+			z = (uint32_t)d[1 + u + v] + MUL15(f, a[1 + v]) + cc;
+			cc = z >> 15;
+			d[1 + u + v] = z & 0x7FFF;
+		}
+		d[1 + u + alen] = cc;
+	}
+}
diff --git a/third_party/bearssl/src/i15_muladd.c b/third_party/bearssl/src/i15_muladd.c
new file mode 100644
index 0000000..c4b7216
--- /dev/null
+++ b/third_party/bearssl/src/i15_muladd.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Constant-time division. The divisor must not be larger than 16 bits,
+ * and the quotient must fit on 17 bits.
+ */
+static uint32_t
+divrem16(uint32_t x, uint32_t d, uint32_t *r)
+{
+	int i;
+	uint32_t q;
+
+	q = 0;
+	d <<= 16;
+	for (i = 16; i >= 0; i --) {
+		uint32_t ctl;
+
+		ctl = LE(d, x);
+		q |= ctl << i;
+		x -= (-ctl) & d;
+		d >>= 1;
+	}
+	if (r != NULL) {
+		*r = x;
+	}
+	return q;
+}
+
+/* see inner.h */
+void
+br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m)
+{
+	/*
+	 * Constant-time: we accept to leak the exact bit length of the
+	 * modulus m.
+	 */
+	unsigned m_bitlen, mblr;
+	size_t u, mlen;
+	uint32_t hi, a0, a, b, q;
+	uint32_t cc, tb, over, under;
+
+	/*
+	 * Simple case: the modulus fits on one word.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 15) {
+		uint32_t rem;
+
+		divrem16(((uint32_t)x[1] << 15) | z, m[1], &rem);
+		x[1] = rem;
+		return;
+	}
+	mlen = (m_bitlen + 15) >> 4;
+	mblr = m_bitlen & 15;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^15+z)/m by
+	 * doing a 30/15 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^15
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), then we can estimate q by
+	 * using a division on the top words:
+	 *   a0*w + a1 = b0*u + v (with 0 <= v < b0)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	hi = x[mlen];
+	if (mblr == 0) {
+		a0 = x[mlen];
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a = (a0 << 15) + x[mlen];
+		b = m[mlen];
+	} else {
+		a0 = (x[mlen] << (15 - mblr)) | (x[mlen - 1] >> mblr);
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a = (a0 << 15) | (((x[mlen] << (15 - mblr))
+			| (x[mlen - 1] >> mblr)) & 0x7FFF);
+		b = (m[mlen] << (15 - mblr)) | (m[mlen - 1] >> mblr);
+	}
+	q = divrem16(a, b, NULL);
+
+	/*
+	 * We computed an estimate for q, but the real one may be q,
+	 * q-1 or q-2; moreover, the division may have returned a value
+	 * 8000 or even 8001 if the two high words were identical, and
+	 * we want to avoid values beyond 7FFF. We thus adjust q so
+	 * that the "true" multiplier will be q+1, q or q-1, and q is
+	 * in the 0000..7FFF range.
+	 */
+	q = MUX(EQ(b, a0), 0x7FFF, q - 1 + ((q - 1) >> 31));
+
+	/*
+	 * We subtract q*m from x (x has an extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zl, xw, nxw;
+
+		mw = m[u];
+		zl = MUL15(mw, q) + cc;
+		cc = zl >> 15;
+		zl &= 0x7FFF;
+		xw = x[u];
+		nxw = xw - zl;
+		cc += nxw >> 31;
+		nxw &= 0x7FFF;
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus).
+	 *
+	 * If we overestimated q, then cc > hi.
+	 */
+	over = GT(cc, hi);
+	under = ~over & (tb | LT(cc, hi));
+	br_i15_add(x, m, over);
+	br_i15_sub(x, m, under);
+}
diff --git a/third_party/bearssl/src/i15_ninv15.c b/third_party/bearssl/src/i15_ninv15.c
new file mode 100644
index 0000000..de3a3ba
--- /dev/null
+++ b/third_party/bearssl/src/i15_ninv15.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint16_t
+br_i15_ninv15(uint16_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y = MUL15(y, 2 - MUL15(x, y));
+	y = MUL15(y, 2 - MUL15(x, y));
+	y = MUL15(y, 2 - MUL15(x, y));
+	return MUX(x & 1, -y, 0) & 0x7FFF;
+}
diff --git a/third_party/bearssl/src/i15_reduce.c b/third_party/bearssl/src/i15_reduce.c
new file mode 100644
index 0000000..0931b10
--- /dev/null
+++ b/third_party/bearssl/src/i15_reduce.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 15) >> 4;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 15) >> 4;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i15_muladd_small(x, a[u], m);
+	}
+}
diff --git a/third_party/bearssl/src/i15_rshift.c b/third_party/bearssl/src/i15_rshift.c
new file mode 100644
index 0000000..f9991ab
--- /dev/null
+++ b/third_party/bearssl/src/i15_rshift.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_rshift(uint16_t *x, int count)
+{
+	size_t u, len;
+	unsigned r;
+
+	len = (x[0] + 15) >> 4;
+	if (len == 0) {
+		return;
+	}
+	r = x[1] >> count;
+	for (u = 2; u <= len; u ++) {
+		unsigned w;
+
+		w = x[u];
+		x[u - 1] = ((w << (15 - count)) | r) & 0x7FFF;
+		r = w >> count;
+	}
+	x[len] = r;
+}
diff --git a/third_party/bearssl/src/i15_sub.c b/third_party/bearssl/src/i15_sub.c
new file mode 100644
index 0000000..1983c4d
--- /dev/null
+++ b/third_party/bearssl/src/i15_sub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 31) >> 4;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & 0x7FFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i15_tmont.c b/third_party/bearssl/src/i15_tmont.c
new file mode 100644
index 0000000..d5c4b8b
--- /dev/null
+++ b/third_party/bearssl/src/i15_tmont.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_to_monty(uint16_t *x, const uint16_t *m)
+{
+	unsigned k;
+
+	for (k = (m[0] + 15) >> 4; k > 0; k --) {
+		br_i15_muladd_small(x, 0, m);
+	}
+}
diff --git a/third_party/bearssl/src/i31_add.c b/third_party/bearssl/src/i31_add.c
new file mode 100644
index 0000000..2ca47c6
--- /dev/null
+++ b/third_party/bearssl/src/i31_add.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & (uint32_t)0x7FFFFFFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i31_bitlen.c b/third_party/bearssl/src/i31_bitlen.c
new file mode 100644
index 0000000..3e127c2
--- /dev/null
+++ b/third_party/bearssl/src/i31_bitlen.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_bit_length(uint32_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 5) + BIT_LENGTH(tw);
+}
diff --git a/third_party/bearssl/src/i31_decmod.c b/third_party/bearssl/src/i31_decmod.c
new file mode 100644
index 0000000..3cd7bfe
--- /dev/null
+++ b/third_party/bearssl/src/i31_decmod.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_decode_mod(uint32_t *x, const void *src, size_t len, const uint32_t *m)
+{
+	/*
+	 * Two-pass algorithm: in the first pass, we determine whether the
+	 * value fits; in the second pass, we do the actual write.
+	 *
+	 * During the first pass, 'r' contains the comparison result so
+	 * far:
+	 *  0x00000000   value is equal to the modulus
+	 *  0x00000001   value is greater than the modulus
+	 *  0xFFFFFFFF   value is lower than the modulus
+	 *
+	 * Since we iterate starting with the least significant bytes (at
+	 * the end of src[]), each new comparison overrides the previous
+	 * except when the comparison yields 0 (equal).
+	 *
+	 * During the second pass, 'r' is either 0xFFFFFFFF (value fits)
+	 * or 0x00000000 (value does not fit).
+	 *
+	 * We must iterate over all bytes of the source, _and_ possibly
+	 * some extra virtual bytes (with value 0) so as to cover the
+	 * complete modulus as well. We also add 4 such extra bytes beyond
+	 * the modulus length because it then guarantees that no accumulated
+	 * partial word remains to be processed.
+	 */
+	const unsigned char *buf;
+	size_t mlen, tlen;
+	int pass;
+	uint32_t r;
+
+	buf = src;
+	mlen = (m[0] + 31) >> 5;
+	tlen = (mlen << 2);
+	if (tlen < len) {
+		tlen = len;
+	}
+	tlen += 4;
+	r = 0;
+	for (pass = 0; pass < 2; pass ++) {
+		size_t u, v;
+		uint32_t acc;
+		int acc_len;
+
+		v = 1;
+		acc = 0;
+		acc_len = 0;
+		for (u = 0; u < tlen; u ++) {
+			uint32_t b;
+
+			if (u < len) {
+				b = buf[len - 1 - u];
+			} else {
+				b = 0;
+			}
+			acc |= (b << acc_len);
+			acc_len += 8;
+			if (acc_len >= 31) {
+				uint32_t xw;
+
+				xw = acc & (uint32_t)0x7FFFFFFF;
+				acc_len -= 31;
+				acc = b >> (8 - acc_len);
+				if (v <= mlen) {
+					if (pass) {
+						x[v] = r & xw;
+					} else {
+						uint32_t cc;
+
+						cc = (uint32_t)CMP(xw, m[v]);
+						r = MUX(EQ(cc, 0), r, cc);
+					}
+				} else {
+					if (!pass) {
+						r = MUX(EQ(xw, 0), r, 1);
+					}
+				}
+				v ++;
+			}
+		}
+
+		/*
+		 * When we reach this point at the end of the first pass:
+		 * r is either 0, 1 or -1; we want to set r to 0 if it
+		 * is equal to 0 or 1, and leave it to -1 otherwise.
+		 *
+		 * When we reach this point at the end of the second pass:
+		 * r is either 0 or -1; we want to leave that value
+		 * untouched. This is a subcase of the previous.
+		 */
+		r >>= 1;
+		r |= (r << 1);
+	}
+
+	x[0] = m[0];
+	return r & (uint32_t)1;
+}
diff --git a/third_party/bearssl/src/i31_decode.c b/third_party/bearssl/src/i31_decode.c
new file mode 100644
index 0000000..8ec6d90
--- /dev/null
+++ b/third_party/bearssl/src/i31_decode.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_decode(uint32_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t u, v;
+	uint32_t acc;
+	int acc_len;
+
+	buf = src;
+	u = len;
+	v = 1;
+	acc = 0;
+	acc_len = 0;
+	while (u -- > 0) {
+		uint32_t b;
+
+		b = buf[u];
+		acc |= (b << acc_len);
+		acc_len += 8;
+		if (acc_len >= 31) {
+			x[v ++] = acc & (uint32_t)0x7FFFFFFF;
+			acc_len -= 31;
+			acc = b >> (8 - acc_len);
+		}
+	}
+	if (acc_len != 0) {
+		x[v ++] = acc;
+	}
+	x[0] = br_i31_bit_length(x + 1, v - 1);
+}
diff --git a/third_party/bearssl/src/i31_decred.c b/third_party/bearssl/src/i31_decred.c
new file mode 100644
index 0000000..43db662
--- /dev/null
+++ b/third_party/bearssl/src/i31_decred.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m)
+{
+	uint32_t m_ebitlen, m_rbitlen;
+	size_t mblen, k;
+	const unsigned char *buf;
+	uint32_t acc;
+	int acc_len;
+
+	/*
+	 * Get the encoded bit length.
+	 */
+	m_ebitlen = m[0];
+
+	/*
+	 * Special case for an invalid (null) modulus.
+	 */
+	if (m_ebitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i31_zero(x, m_ebitlen);
+
+	/*
+	 * First decode directly as many bytes as possible. This requires
+	 * computing the actual bit length.
+	 */
+	m_rbitlen = m_ebitlen >> 5;
+	m_rbitlen = (m_ebitlen & 31) + (m_rbitlen << 5) - m_rbitlen;
+	mblen = (m_rbitlen + 7) >> 3;
+	k = mblen - 1;
+	if (k >= len) {
+		br_i31_decode(x, src, len);
+		x[0] = m_ebitlen;
+		return;
+	}
+	buf = src;
+	br_i31_decode(x, buf, k);
+	x[0] = m_ebitlen;
+
+	/*
+	 * Input remaining bytes, using 31-bit words.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (k < len) {
+		uint32_t v;
+
+		v = buf[k ++];
+		if (acc_len >= 23) {
+			acc_len -= 23;
+			acc <<= (8 - acc_len);
+			acc |= v >> acc_len;
+			br_i31_muladd_small(x, acc, m);
+			acc = v & (0xFF >> (8 - acc_len));
+		} else {
+			acc = (acc << 8) | v;
+			acc_len += 8;
+		}
+	}
+
+	/*
+	 * We may have some bits accumulated. We then perform a shift to
+	 * be able to inject these bits as a full 31-bit word.
+	 */
+	if (acc_len != 0) {
+		acc = (acc | (x[1] << acc_len)) & 0x7FFFFFFF;
+		br_i31_rshift(x, 31 - acc_len);
+		br_i31_muladd_small(x, acc, m);
+	}
+}
diff --git a/third_party/bearssl/src/i31_encode.c b/third_party/bearssl/src/i31_encode.c
new file mode 100644
index 0000000..b6b40c4
--- /dev/null
+++ b/third_party/bearssl/src/i31_encode.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_encode(void *dst, size_t len, const uint32_t *x)
+{
+	unsigned char *buf;
+	size_t k, xlen;
+	uint32_t acc;
+	int acc_len;
+
+	xlen = (x[0] + 31) >> 5;
+	if (xlen == 0) {
+		memset(dst, 0, len);
+		return;
+	}
+	buf = (unsigned char *)dst + len;
+	k = 1;
+	acc = 0;
+	acc_len = 0;
+	while (len != 0) {
+		uint32_t w;
+
+		w = (k <= xlen) ? x[k] : 0;
+		k ++;
+		if (acc_len == 0) {
+			acc = w;
+			acc_len = 31;
+		} else {
+			uint32_t z;
+
+			z = acc | (w << acc_len);
+			acc_len --;
+			acc = w >> (31 - acc_len);
+			if (len >= 4) {
+				buf -= 4;
+				len -= 4;
+				br_enc32be(buf, z);
+			} else {
+				switch (len) {
+				case 3:
+					buf[-3] = (unsigned char)(z >> 16);
+					/* fall through */
+				case 2:
+					buf[-2] = (unsigned char)(z >> 8);
+					/* fall through */
+				case 1:
+					buf[-1] = (unsigned char)z;
+					break;
+				}
+				return;
+			}
+		}
+	}
+}
diff --git a/third_party/bearssl/src/i31_fmont.c b/third_party/bearssl/src/i31_fmont.c
new file mode 100644
index 0000000..c24b417
--- /dev/null
+++ b/third_party/bearssl/src/i31_fmont.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 31) >> 5;
+	for (u = 0; u < len; u ++) {
+		uint32_t f;
+		uint64_t cc;
+
+		f = MUL31_lo(x[1], m0i);
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)x[v + 1] + MUL31(f, m[v + 1]) + cc;
+			cc = z >> 31;
+			if (v != 0) {
+				x[v] = (uint32_t)z & 0x7FFFFFFF;
+			}
+		}
+		x[len] = (uint32_t)cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i31_sub(x, m, NOT(br_i31_sub(x, m, 0)));
+}
diff --git a/third_party/bearssl/src/i31_iszero.c b/third_party/bearssl/src/i31_iszero.c
new file mode 100644
index 0000000..8a7ea44
--- /dev/null
+++ b/third_party/bearssl/src/i31_iszero.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_iszero(const uint32_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 31) >> 5; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
diff --git a/third_party/bearssl/src/i31_moddiv.c b/third_party/bearssl/src/i31_moddiv.c
new file mode 100644
index 0000000..9950591
--- /dev/null
+++ b/third_party/bearssl/src/i31_moddiv.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * In this file, we handle big integers with a custom format, i.e.
+ * without the usual one-word header. Value is split into 31-bit words,
+ * each stored in a 32-bit slot (top bit is zero) in little-endian
+ * order. The length (in words) is provided explicitly. In some cases,
+ * the value can be negative (using two's complement representation). In
+ * some cases, the top word is allowed to have a 32th bit.
+ */
+
+/*
+ * Negate big integer conditionally. The value consists of 'len' words,
+ * with 31 bits in each word (the top bit of each word should be 0,
+ * except possibly for the last word). If 'ctl' is 1, the negation is
+ * computed; otherwise, if 'ctl' is 0, then the value is unchanged.
+ */
+static void
+cond_negate(uint32_t *a, size_t len, uint32_t ctl)
+{
+	size_t k;
+	uint32_t cc, xm;
+
+	cc = ctl;
+	xm = -ctl >> 1;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw;
+
+		aw = a[k];
+		aw = (aw ^ xm) + cc;
+		a[k] = aw & 0x7FFFFFFF;
+		cc = aw >> 31;
+	}
+}
+
+/*
+ * Finish modular reduction. Rules on input parameters:
+ *
+ *   if neg = 1, then -m <= a < 0
+ *   if neg = 0, then 0 <= a < 2*m
+ *
+ * If neg = 0, then the top word of a[] may use 32 bits.
+ *
+ * Also, modulus m must be odd.
+ */
+static void
+finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg)
+{
+	size_t k;
+	uint32_t cc, xm, ym;
+
+	/*
+	 * First pass: compare a (assumed nonnegative) with m.
+	 * Note that if the final word uses the top extra bit, then
+	 * subtracting m must yield a value less than 2^31, since we
+	 * assumed that a < 2*m.
+	 */
+	cc = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = m[k];
+		cc = (aw - mw - cc) >> 31;
+	}
+
+	/*
+	 * At this point:
+	 *   if neg = 1, then we must add m (regardless of cc)
+	 *   if neg = 0 and cc = 0, then we must subtract m
+	 *   if neg = 0 and cc = 1, then we must do nothing
+	 */
+	xm = -neg >> 1;
+	ym = -(neg | (1 - cc));
+	cc = neg;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = (m[k] ^ xm) & ym;
+		aw = aw - mw - cc;
+		a[k] = aw & 0x7FFFFFFF;
+		cc = aw >> 31;
+	}
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^31)
+ *   b <- (a*qa+b*qb)/(2^31)
+ * The division is assumed to be exact (i.e. the low word is dropped).
+ * If the final a is negative, then it is negated. Similarly for b.
+ * Returned value is the combination of two bits:
+ *   bit 0: 1 if a had to be negated, 0 otherwise
+ *   bit 1: 1 if b had to be negated, 0 otherwise
+ *
+ * Factors pa, pb, qa and qb must be at most 2^31 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 32th bit.
+ */
+static uint32_t
+co_reduce(uint32_t *a, uint32_t *b, size_t len,
+	int64_t pa, int64_t pb, int64_t qa, int64_t qb)
+{
+	size_t k;
+	int64_t cca, ccb;
+	uint32_t nega, negb;
+
+	cca = 0;
+	ccb = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb;
+		uint64_t za, zb;
+		uint64_t tta, ttb;
+
+		/*
+		 * Since:
+		 *   |pa| <= 2^31
+		 *   |pb| <= 2^31
+		 *   0 <= wa <= 2^31 - 1
+		 *   0 <= wb <= 2^31 - 1
+		 *   |cca| <= 2^32 - 1
+		 * Then:
+		 *   |za| <= (2^31-1)*(2^32) + (2^32-1) = 2^63 - 1
+		 *
+		 * Thus, the new value of cca is such that |cca| <= 2^32 - 1.
+		 * The same applies to ccb.
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint64_t)pa + wb * (uint64_t)pb + (uint64_t)cca;
+		zb = wa * (uint64_t)qa + wb * (uint64_t)qb + (uint64_t)ccb;
+		if (k > 0) {
+			a[k - 1] = za & 0x7FFFFFFF;
+			b[k - 1] = zb & 0x7FFFFFFF;
+		}
+
+		/*
+		 * For the new values of cca and ccb, we need a signed
+		 * right-shift; since, in C, right-shifting a signed
+		 * negative value is implementation-defined, we use a
+		 * custom portable sign extension expression.
+		 */
+#define M   ((uint64_t)1 << 32)
+		tta = za >> 31;
+		ttb = zb >> 31;
+		tta = (tta ^ M) - M;
+		ttb = (ttb ^ M) - M;
+		cca = *(int64_t *)&tta;
+		ccb = *(int64_t *)&ttb;
+#undef M
+	}
+	a[len - 1] = (uint32_t)cca;
+	b[len - 1] = (uint32_t)ccb;
+
+	nega = (uint32_t)((uint64_t)cca >> 63);
+	negb = (uint32_t)((uint64_t)ccb >> 63);
+	cond_negate(a, len, nega);
+	cond_negate(b, len, negb);
+	return nega | (negb << 1);
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^31) mod m
+ *   b <- (a*qa+b*qb)/(2^31) mod m
+ *
+ * m0i is equal to -1/m[0] mod 2^31.
+ *
+ * Factors pa, pb, qa and qb must be at most 2^31 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 32th bit.
+ */
+static void
+co_reduce_mod(uint32_t *a, uint32_t *b, size_t len,
+	int64_t pa, int64_t pb, int64_t qa, int64_t qb,
+	const uint32_t *m, uint32_t m0i)
+{
+	size_t k;
+	int64_t cca, ccb;
+	uint32_t fa, fb;
+
+	cca = 0;
+	ccb = 0;
+	fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFFFFFF;
+	fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFFFFFF;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb;
+		uint64_t za, zb;
+		uint64_t tta, ttb;
+
+		/*
+		 * In this loop, carries 'cca' and 'ccb' always fit on
+		 * 33 bits (in absolute value).
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint64_t)pa + wb * (uint64_t)pb
+			+ m[k] * (uint64_t)fa + (uint64_t)cca;
+		zb = wa * (uint64_t)qa + wb * (uint64_t)qb
+			+ m[k] * (uint64_t)fb + (uint64_t)ccb;
+		if (k > 0) {
+			a[k - 1] = (uint32_t)za & 0x7FFFFFFF;
+			b[k - 1] = (uint32_t)zb & 0x7FFFFFFF;
+		}
+
+#define M   ((uint64_t)1 << 32)
+		tta = za >> 31;
+		ttb = zb >> 31;
+		tta = (tta ^ M) - M;
+		ttb = (ttb ^ M) - M;
+		cca = *(int64_t *)&tta;
+		ccb = *(int64_t *)&ttb;
+#undef M
+	}
+	a[len - 1] = (uint32_t)cca;
+	b[len - 1] = (uint32_t)ccb;
+
+	/*
+	 * At this point:
+	 *   -m <= a < 2*m
+	 *   -m <= b < 2*m
+	 * (this is a case of Montgomery reduction)
+	 * The top word of 'a' and 'b' may have a 32-th bit set.
+	 * We may have to add or subtract the modulus.
+	 */
+	finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63));
+	finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63));
+}
+
+/* see inner.h */
+uint32_t
+br_i31_moddiv(uint32_t *x, const uint32_t *y, const uint32_t *m, uint32_t m0i,
+	uint32_t *t)
+{
+	/*
+	 * Algorithm is an extended binary GCD. We maintain four values
+	 * a, b, u and v, with the following invariants:
+	 *
+	 *   a * x = y * u mod m
+	 *   b * x = y * v mod m
+	 *
+	 * Starting values are:
+	 *
+	 *   a = y
+	 *   b = m
+	 *   u = x
+	 *   v = 0
+	 *
+	 * The formal definition of the algorithm is a sequence of steps:
+	 *
+	 *   - If a is even, then a <- a/2 and u <- u/2 mod m.
+	 *   - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m.
+	 *   - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m.
+	 *   - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m.
+	 *
+	 * Algorithm stops when a = b. At that point, they both are equal
+	 * to GCD(y,m); the modular division succeeds if that value is 1.
+	 * The result of the modular division is then u (or v: both are
+	 * equal at that point).
+	 *
+	 * Each step makes either a or b shrink by at least one bit; hence,
+	 * if m has bit length k bits, then 2k-2 steps are sufficient.
+	 *
+	 *
+	 * Though complexity is quadratic in the size of m, the bit-by-bit
+	 * processing is not very efficient. We can speed up processing by
+	 * remarking that the decisions are taken based only on observation
+	 * of the top and low bits of a and b.
+	 *
+	 * In the loop below, at each iteration, we use the two top words
+	 * of a and b, and the low words of a and b, to compute reduction
+	 * parameters pa, pb, qa and qb such that the new values for a
+	 * and b are:
+	 *
+	 *   a' = (a*pa + b*pb) / (2^31)
+	 *   b' = (a*qa + b*qb) / (2^31)
+	 *
+	 * the division being exact.
+	 *
+	 * Since the choices are based on the top words, they may be slightly
+	 * off, requiring an optional correction: if a' < 0, then we replace
+	 * pa with -pa, and pb with -pb. The total length of a and b is
+	 * thus reduced by at least 30 bits at each iteration.
+	 *
+	 * The stopping conditions are still the same, though: when a
+	 * and b become equal, they must be both odd (since m is odd,
+	 * the GCD cannot be even), therefore the next operation is a
+	 * subtraction, and one of the values becomes 0. At that point,
+	 * nothing else happens, i.e. one value is stuck at 0, and the
+	 * other one is the GCD.
+	 */
+	size_t len, k;
+	uint32_t *a, *b, *u, *v;
+	uint32_t num, r;
+
+	len = (m[0] + 31) >> 5;
+	a = t;
+	b = a + len;
+	u = x + 1;
+	v = b + len;
+	memcpy(a, y + 1, len * sizeof *y);
+	memcpy(b, m + 1, len * sizeof *m);
+	memset(v, 0, len * sizeof *v);
+
+	/*
+	 * Loop below ensures that a and b are reduced by some bits each,
+	 * for a total of at least 30 bits.
+	 */
+	for (num = ((m[0] - (m[0] >> 5)) << 1) + 30; num >= 30; num -= 30) {
+		size_t j;
+		uint32_t c0, c1;
+		uint32_t a0, a1, b0, b1;
+		uint64_t a_hi, b_hi;
+		uint32_t a_lo, b_lo;
+		int64_t pa, pb, qa, qb;
+		int i;
+
+		/*
+		 * Extract top words of a and b. If j is the highest
+		 * index >= 1 such that a[j] != 0 or b[j] != 0, then we want
+		 * (a[j] << 31) + a[j - 1], and (b[j] << 31) + b[j - 1].
+		 * If a and b are down to one word each, then we use a[0]
+		 * and b[0].
+		 */
+		c0 = (uint32_t)-1;
+		c1 = (uint32_t)-1;
+		a0 = 0;
+		a1 = 0;
+		b0 = 0;
+		b1 = 0;
+		j = len;
+		while (j -- > 0) {
+			uint32_t aw, bw;
+
+			aw = a[j];
+			bw = b[j];
+			a0 ^= (a0 ^ aw) & c0;
+			a1 ^= (a1 ^ aw) & c1;
+			b0 ^= (b0 ^ bw) & c0;
+			b1 ^= (b1 ^ bw) & c1;
+			c1 = c0;
+			c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1;
+		}
+
+		/*
+		 * If c1 = 0, then we grabbed two words for a and b.
+		 * If c1 != 0 but c0 = 0, then we grabbed one word. It
+		 * is not possible that c1 != 0 and c0 != 0, because that
+		 * would mean that both integers are zero.
+		 */
+		a1 |= a0 & c1;
+		a0 &= ~c1;
+		b1 |= b0 & c1;
+		b0 &= ~c1;
+		a_hi = ((uint64_t)a0 << 31) + a1;
+		b_hi = ((uint64_t)b0 << 31) + b1;
+		a_lo = a[0];
+		b_lo = b[0];
+
+		/*
+		 * Compute reduction factors:
+		 *
+		 *   a' = a*pa + b*pb
+		 *   b' = a*qa + b*qb
+		 *
+		 * such that a' and b' are both multiple of 2^31, but are
+		 * only marginally larger than a and b.
+		 */
+		pa = 1;
+		pb = 0;
+		qa = 0;
+		qb = 1;
+		for (i = 0; i < 31; i ++) {
+			/*
+			 * At each iteration:
+			 *
+			 *   a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi
+			 *   b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi
+			 *   a <- a/2 if: a is even
+			 *   b <- b/2 if: a is odd, b is even
+			 *
+			 * We multiply a_lo and b_lo by 2 at each
+			 * iteration, thus a division by 2 really is a
+			 * non-multiplication by 2.
+			 */
+			uint32_t r, oa, ob, cAB, cBA, cA;
+			uint64_t rz;
+
+			/*
+			 * r = GT(a_hi, b_hi)
+			 * But the GT() function works on uint32_t operands,
+			 * so we inline a 64-bit version here.
+			 */
+			rz = b_hi - a_hi;
+			r = (uint32_t)((rz ^ ((a_hi ^ b_hi)
+				& (a_hi ^ rz))) >> 63);
+
+			/*
+			 * cAB = 1 if b must be subtracted from a
+			 * cBA = 1 if a must be subtracted from b
+			 * cA = 1 if a is divided by 2, 0 otherwise
+			 *
+			 * Rules:
+			 *
+			 *   cAB and cBA cannot be both 1.
+			 *   if a is not divided by 2, b is.
+			 */
+			oa = (a_lo >> i) & 1;
+			ob = (b_lo >> i) & 1;
+			cAB = oa & ob & r;
+			cBA = oa & ob & NOT(r);
+			cA = cAB | NOT(oa);
+
+			/*
+			 * Conditional subtractions.
+			 */
+			a_lo -= b_lo & -cAB;
+			a_hi -= b_hi & -(uint64_t)cAB;
+			pa -= qa & -(int64_t)cAB;
+			pb -= qb & -(int64_t)cAB;
+			b_lo -= a_lo & -cBA;
+			b_hi -= a_hi & -(uint64_t)cBA;
+			qa -= pa & -(int64_t)cBA;
+			qb -= pb & -(int64_t)cBA;
+
+			/*
+			 * Shifting.
+			 */
+			a_lo += a_lo & (cA - 1);
+			pa += pa & ((int64_t)cA - 1);
+			pb += pb & ((int64_t)cA - 1);
+			a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA;
+			b_lo += b_lo & -cA;
+			qa += qa & -(int64_t)cA;
+			qb += qb & -(int64_t)cA;
+			b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1);
+		}
+
+		/*
+		 * Replace a and b with new values a' and b'.
+		 */
+		r = co_reduce(a, b, len, pa, pb, qa, qb);
+		pa -= pa * ((r & 1) << 1);
+		pb -= pb * ((r & 1) << 1);
+		qa -= qa * (r & 2);
+		qb -= qb * (r & 2);
+		co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i);
+	}
+
+	/*
+	 * Now one of the arrays should be 0, and the other contains
+	 * the GCD. If a is 0, then u is 0 as well, and v contains
+	 * the division result.
+	 * Result is correct if and only if GCD is 1.
+	 */
+	r = (a[0] | b[0]) ^ 1;
+	u[0] |= v[0];
+	for (k = 1; k < len; k ++) {
+		r |= a[k] | b[k];
+		u[k] |= v[k];
+	}
+	return EQ0(r);
+}
diff --git a/third_party/bearssl/src/i31_modpow.c b/third_party/bearssl/src/i31_modpow.c
new file mode 100644
index 0000000..4ef3f5d
--- /dev/null
+++ b/third_party/bearssl/src/i31_modpow.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_modpow(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2)
+{
+	size_t mlen;
+	uint32_t k;
+
+	/*
+	 * 'mlen' is the length of m[] expressed in bytes (including
+	 * the "bit length" first field).
+	 */
+	mlen = ((m[0] + 63) >> 5) * sizeof m[0];
+
+	/*
+	 * Throughout the algorithm:
+	 * -- t1[] is in Montgomery representation; it contains x, x^2,
+	 * x^4, x^8...
+	 * -- The result is accumulated, in normal representation, in
+	 * the x[] array.
+	 * -- t2[] is used as destination buffer for each multiplication.
+	 *
+	 * Note that there is no need to call br_i32_from_monty().
+	 */
+	memcpy(t1, x, mlen);
+	br_i31_to_monty(t1, m);
+	br_i31_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((uint32_t)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i31_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i31_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
diff --git a/third_party/bearssl/src/i31_modpow2.c b/third_party/bearssl/src/i31_modpow2.c
new file mode 100644
index 0000000..0b8f8cf
--- /dev/null
+++ b/third_party/bearssl/src/i31_modpow2.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_modpow_opt(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen)
+{
+	size_t mlen, mwlen;
+	uint32_t *t1, *t2, *base;
+	size_t u, v;
+	uint32_t acc;
+	int acc_len, win_len;
+
+	/*
+	 * Get modulus size.
+	 */
+	mwlen = (m[0] + 63) >> 5;
+	mlen = mwlen * sizeof m[0];
+	mwlen += (mwlen & 1);
+	t1 = tmp;
+	t2 = tmp + mwlen;
+
+	/*
+	 * Compute possible window size, with a maximum of 5 bits.
+	 * When the window has size 1 bit, we use a specific code
+	 * that requires only two temporaries. Otherwise, for a
+	 * window of k bits, we need 2^k+1 temporaries.
+	 */
+	if (twlen < (mwlen << 1)) {
+		return 0;
+	}
+	for (win_len = 5; win_len > 1; win_len --) {
+		if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) {
+			break;
+		}
+	}
+
+	/*
+	 * Everything is done in Montgomery representation.
+	 */
+	br_i31_to_monty(x, m);
+
+	/*
+	 * Compute window contents. If the window has size one bit only,
+	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
+	 * t2[k] is set to x^k (for k >= 1).
+	 */
+	if (win_len == 1) {
+		memcpy(t2, x, mlen);
+	} else {
+		memcpy(t2 + mwlen, x, mlen);
+		base = t2 + mwlen;
+		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
+			br_i31_montymul(base + mwlen, base, x, m, m0i);
+			base += mwlen;
+		}
+	}
+
+	/*
+	 * We need to set x to 1, in Montgomery representation. This can
+	 * be done efficiently by setting the high word to 1, then doing
+	 * one word-sized shift.
+	 */
+	br_i31_zero(x, m[0]);
+	x[(m[0] + 31) >> 5] = 1;
+	br_i31_muladd_small(x, 0, m);
+
+	/*
+	 * We process bits from most to least significant. At each
+	 * loop iteration, we have acc_len bits in acc.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (acc_len > 0 || elen > 0) {
+		int i, k;
+		uint32_t bits;
+
+		/*
+		 * Get the next bits.
+		 */
+		k = win_len;
+		if (acc_len < win_len) {
+			if (elen > 0) {
+				acc = (acc << 8) | *e ++;
+				elen --;
+				acc_len += 8;
+			} else {
+				k = acc_len;
+			}
+		}
+		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
+		acc_len -= k;
+
+		/*
+		 * We could get exactly k bits. Compute k squarings.
+		 */
+		for (i = 0; i < k; i ++) {
+			br_i31_montymul(t1, x, x, m, m0i);
+			memcpy(x, t1, mlen);
+		}
+
+		/*
+		 * Window lookup: we want to set t2 to the window
+		 * lookup value, assuming the bits are non-zero. If
+		 * the window length is 1 bit only, then t2 is
+		 * already set; otherwise, we do a constant-time lookup.
+		 */
+		if (win_len > 1) {
+			br_i31_zero(t2, m[0]);
+			base = t2 + mwlen;
+			for (u = 1; u < ((uint32_t)1 << k); u ++) {
+				uint32_t mask;
+
+				mask = -EQ(u, bits);
+				for (v = 1; v < mwlen; v ++) {
+					t2[v] |= mask & base[v];
+				}
+				base += mwlen;
+			}
+		}
+
+		/*
+		 * Multiply with the looked-up value. We keep the
+		 * product only if the exponent bits are not all-zero.
+		 */
+		br_i31_montymul(t1, x, t2, m, m0i);
+		CCOPY(NEQ(bits, 0), x, t1, mlen);
+	}
+
+	/*
+	 * Convert back from Montgomery representation, and exit.
+	 */
+	br_i31_from_monty(x, m, m0i);
+	return 1;
+}
diff --git a/third_party/bearssl/src/i31_montmul.c b/third_party/bearssl/src/i31_montmul.c
new file mode 100644
index 0000000..758f8f4
--- /dev/null
+++ b/third_party/bearssl/src/i31_montmul.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i)
+{
+	/*
+	 * Each outer loop iteration computes:
+	 *   d <- (d + xu*y + f*m) / 2^31
+	 * We have xu <= 2^31-1 and f <= 2^31-1.
+	 * Thus, if d <= 2*m-1 on input, then:
+	 *   2*m-1 + 2*(2^31-1)*m <= (2^32)*m-1
+	 * and the new d value is less than 2*m.
+	 *
+	 * We represent d over 31-bit words, with an extra word 'dh'
+	 * which can thus be only 0 or 1.
+	 */
+	size_t len, len4, u, v;
+	uint32_t dh;
+
+	len = (m[0] + 31) >> 5;
+	len4 = len & ~(size_t)3;
+	br_i31_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		/*
+		 * The carry for each operation fits on 32 bits:
+		 *   d[v+1] <= 2^31-1
+		 *   xu*y[v+1] <= (2^31-1)*(2^31-1)
+		 *   f*m[v+1] <= (2^31-1)*(2^31-1)
+		 *   r <= 2^32-1
+		 *   (2^31-1) + 2*(2^31-1)*(2^31-1) + (2^32-1) = 2^63 - 2^31
+		 * After division by 2^31, the new r is then at most 2^32-1
+		 *
+		 * Using a 32-bit carry has performance benefits on 32-bit
+		 * systems; however, on 64-bit architectures, we prefer to
+		 * keep the carry (r) in a 64-bit register, thus avoiding some
+		 * "clear high bits" operations.
+		 */
+		uint32_t f, xu;
+#if BR_64
+		uint64_t r;
+#else
+		uint32_t r;
+#endif
+
+		xu = x[u + 1];
+		f = MUL31_lo((d[1] + MUL31_lo(x[u + 1], y[1])), m0i);
+
+		r = 0;
+		for (v = 0; v < len4; v += 4) {
+			uint64_t z;
+
+			z = (uint64_t)d[v + 1] + MUL31(xu, y[v + 1])
+				+ MUL31(f, m[v + 1]) + r;
+			r = z >> 31;
+			d[v + 0] = (uint32_t)z & 0x7FFFFFFF;
+			z = (uint64_t)d[v + 2] + MUL31(xu, y[v + 2])
+				+ MUL31(f, m[v + 2]) + r;
+			r = z >> 31;
+			d[v + 1] = (uint32_t)z & 0x7FFFFFFF;
+			z = (uint64_t)d[v + 3] + MUL31(xu, y[v + 3])
+				+ MUL31(f, m[v + 3]) + r;
+			r = z >> 31;
+			d[v + 2] = (uint32_t)z & 0x7FFFFFFF;
+			z = (uint64_t)d[v + 4] + MUL31(xu, y[v + 4])
+				+ MUL31(f, m[v + 4]) + r;
+			r = z >> 31;
+			d[v + 3] = (uint32_t)z & 0x7FFFFFFF;
+		}
+		for (; v < len; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)d[v + 1] + MUL31(xu, y[v + 1])
+				+ MUL31(f, m[v + 1]) + r;
+			r = z >> 31;
+			d[v] = (uint32_t)z & 0x7FFFFFFF;
+		}
+
+		/*
+		 * Since the new dh can only be 0 or 1, the addition of
+		 * the old dh with the carry MUST fit on 32 bits, and
+		 * thus can be done into dh itself.
+		 */
+		dh += r;
+		d[len] = dh & 0x7FFFFFFF;
+		dh >>= 31;
+	}
+
+	/*
+	 * We must write back the bit length because it was overwritten in
+	 * the loop (not overwriting it would require a test in the loop,
+	 * which would yield bigger and slower code).
+	 */
+	d[0] = m[0];
+
+	/*
+	 * d[] may still be greater than m[] at that point; notably, the
+	 * 'dh' word may be non-zero.
+	 */
+	br_i31_sub(d, m, NEQ(dh, 0) | NOT(br_i31_sub(d, m, 0)));
+}
diff --git a/third_party/bearssl/src/i31_mulacc.c b/third_party/bearssl/src/i31_mulacc.c
new file mode 100644
index 0000000..7410e54
--- /dev/null
+++ b/third_party/bearssl/src/i31_mulacc.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	size_t alen, blen, u;
+	uint32_t dl, dh;
+
+	alen = (a[0] + 31) >> 5;
+	blen = (b[0] + 31) >> 5;
+
+	/*
+	 * We want to add the two bit lengths, but these are encoded,
+	 * which requires some extra care.
+	 */
+	dl = (a[0] & 31) + (b[0] & 31);
+	dh = (a[0] >> 5) + (b[0] >> 5);
+	d[0] = (dh << 5) + dl + (~(uint32_t)(dl - 31) >> 31);
+
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+
+		/*
+		 * Carry always fits on 31 bits; we want to keep it in a
+		 * 32-bit register on 32-bit architectures (on a 64-bit
+		 * architecture, cast down from 64 to 32 bits means
+		 * clearing the high bits, which is not free; on a 32-bit
+		 * architecture, the same operation really means ignoring
+		 * the top register, which has negative or zero cost).
+		 */
+#if BR_64
+		uint64_t cc;
+#else
+		uint32_t cc;
+#endif
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)d[1 + u + v] + MUL31(f, a[1 + v]) + cc;
+			cc = z >> 31;
+			d[1 + u + v] = (uint32_t)z & 0x7FFFFFFF;
+		}
+		d[1 + u + alen] = (uint32_t)cc;
+	}
+}
diff --git a/third_party/bearssl/src/i31_muladd.c b/third_party/bearssl/src/i31_muladd.c
new file mode 100644
index 0000000..eecd9e2
--- /dev/null
+++ b/third_party/bearssl/src/i31_muladd.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	unsigned mblr;
+	size_t u, mlen;
+	uint32_t a0, a1, b0, hi, g, q, tb;
+	uint32_t under, over;
+	uint32_t cc;
+
+	/*
+	 * We can test on the modulus bit length since we accept to
+	 * leak that length.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 31) {
+		uint32_t lo;
+
+		hi = x[1] >> 1;
+		lo = (x[1] << 31) | z;
+		x[1] = br_rem(hi, lo, m[1]);
+		return;
+	}
+	mlen = (m_bitlen + 31) >> 5;
+	mblr = (unsigned)m_bitlen & 31;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^31+z)/m by
+	 * doing a 64/32 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^31
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
+	 * doing an Euclidean division on the top words:
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < b0)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	hi = x[mlen];
+	if (mblr == 0) {
+		a0 = x[mlen];
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a1 = x[mlen];
+		b0 = m[mlen];
+	} else {
+		a0 = ((x[mlen] << (31 - mblr)) | (x[mlen - 1] >> mblr))
+			& 0x7FFFFFFF;
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a1 = ((x[mlen] << (31 - mblr)) | (x[mlen - 1] >> mblr))
+			& 0x7FFFFFFF;
+		b0 = ((m[mlen] << (31 - mblr)) | (m[mlen - 1] >> mblr))
+			& 0x7FFFFFFF;
+	}
+
+	/*
+	 * We estimate a divisor q. If the quotient returned by br_div()
+	 * is g:
+	 * -- If a0 == b0 then g == 0; we want q = 0x7FFFFFFF.
+	 * -- Otherwise:
+	 *    -- if g == 0 then we set q = 0;
+	 *    -- otherwise, we set q = g - 1.
+	 * The properties described above then ensure that the true
+	 * quotient is q-1, q or q+1.
+	 *
+	 * Take care that a0, a1 and b0 are 31-bit words, not 32-bit. We
+	 * must adjust the parameters to br_div() accordingly.
+	 */
+	g = br_div(a0 >> 1, a1 | (a0 << 31), b0);
+	q = MUX(EQ(a0, b0), 0x7FFFFFFF, MUX(EQ(g, 0), 0, g - 1));
+
+	/*
+	 * We subtract q*m from x (with the extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zw, xw, nxw;
+		uint64_t zl;
+
+		mw = m[u];
+		zl = MUL31(mw, q) + cc;
+		cc = (uint32_t)(zl >> 31);
+		zw = (uint32_t)zl & (uint32_t)0x7FFFFFFF;
+		xw = x[u];
+		nxw = xw - zw;
+		cc += nxw >> 31;
+		nxw &= 0x7FFFFFFF;
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus). In
+	 * these cases we must subtract m once.
+	 *
+	 * Otherwise, we may have overestimated, which will show as
+	 * cc > hi (thus a negative result). Correction is adding m once.
+	 */
+	over = GT(cc, hi);
+	under = ~over & (tb | LT(cc, hi));
+	br_i31_add(x, m, over);
+	br_i31_sub(x, m, under);
+}
diff --git a/third_party/bearssl/src/i31_ninv31.c b/third_party/bearssl/src/i31_ninv31.c
new file mode 100644
index 0000000..dd83c96
--- /dev/null
+++ b/third_party/bearssl/src/i31_ninv31.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_ninv31(uint32_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	return MUX(x & 1, -y, 0) & 0x7FFFFFFF;
+}
diff --git a/third_party/bearssl/src/i31_reduce.c b/third_party/bearssl/src/i31_reduce.c
new file mode 100644
index 0000000..5c9523e
--- /dev/null
+++ b/third_party/bearssl/src/i31_reduce.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 31) >> 5;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 31) >> 5;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i31_muladd_small(x, a[u], m);
+	}
+}
diff --git a/third_party/bearssl/src/i31_rshift.c b/third_party/bearssl/src/i31_rshift.c
new file mode 100644
index 0000000..db6ba0b
--- /dev/null
+++ b/third_party/bearssl/src/i31_rshift.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_rshift(uint32_t *x, int count)
+{
+	size_t u, len;
+	uint32_t r;
+
+	len = (x[0] + 31) >> 5;
+	if (len == 0) {
+		return;
+	}
+	r = x[1] >> count;
+	for (u = 2; u <= len; u ++) {
+		uint32_t w;
+
+		w = x[u];
+		x[u - 1] = ((w << (31 - count)) | r) & 0x7FFFFFFF;
+		r = w >> count;
+	}
+	x[len] = r;
+}
diff --git a/third_party/bearssl/src/i31_sub.c b/third_party/bearssl/src/i31_sub.c
new file mode 100644
index 0000000..3910895
--- /dev/null
+++ b/third_party/bearssl/src/i31_sub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & 0x7FFFFFFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i31_tmont.c b/third_party/bearssl/src/i31_tmont.c
new file mode 100644
index 0000000..4798ff6
--- /dev/null
+++ b/third_party/bearssl/src/i31_tmont.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_to_monty(uint32_t *x, const uint32_t *m)
+{
+	uint32_t k;
+
+	for (k = (m[0] + 31) >> 5; k > 0; k --) {
+		br_i31_muladd_small(x, 0, m);
+	}
+}
diff --git a/third_party/bearssl/src/i32_add.c b/third_party/bearssl/src/i32_add.c
new file mode 100644
index 0000000..620baff
--- /dev/null
+++ b/third_party/bearssl/src/i32_add.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+
+		/*
+		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i32_bitlen.c b/third_party/bearssl/src/i32_bitlen.c
new file mode 100644
index 0000000..40ce9fa
--- /dev/null
+++ b/third_party/bearssl/src/i32_bitlen.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_bit_length(uint32_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 5) + BIT_LENGTH(tw);
+}
diff --git a/third_party/bearssl/src/i32_decmod.c b/third_party/bearssl/src/i32_decmod.c
new file mode 100644
index 0000000..a859af1
--- /dev/null
+++ b/third_party/bearssl/src/i32_decmod.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_decode_mod(uint32_t *x, const void *src, size_t len, const uint32_t *m)
+{
+	const unsigned char *buf;
+	uint32_t r;
+	size_t u, v, mlen;
+
+	buf = src;
+
+	/*
+	 * First pass: determine whether the value fits. The 'r' value
+	 * will contain the comparison result, as 0x00000000 (value is
+	 * equal to the modulus), 0x00000001 (value is greater than the
+	 * modulus), or 0xFFFFFFFF (value is lower than the modulus).
+	 */
+	mlen = (m[0] + 7) >> 3;
+	r = 0;
+	for (u = (mlen > len) ? mlen : len; u > 0; u --) {
+		uint32_t mb, xb;
+
+		v = u - 1;
+		if (v >= mlen) {
+			mb = 0;
+		} else {
+			mb = (m[1 + (v >> 2)] >> ((v & 3) << 3)) & 0xFF;
+		}
+		if (v >= len) {
+			xb = 0;
+		} else {
+			xb = buf[len - u];
+		}
+		r = MUX(EQ(r, 0), (uint32_t)CMP(xb, mb), r);
+	}
+
+	/*
+	 * Only r == 0xFFFFFFFF is acceptable. We want to set r to 0xFF if
+	 * the value fits, 0x00 otherwise.
+	 */
+	r >>= 24;
+	br_i32_zero(x, m[0]);
+	u = (mlen > len) ? len : mlen;
+	while (u > 0) {
+		uint32_t xb;
+
+		xb = buf[len - u] & r;
+		u --;
+		x[1 + (u >> 2)] |= xb << ((u & 3) << 3);
+	}
+	return r >> 7;
+}
diff --git a/third_party/bearssl/src/i32_decode.c b/third_party/bearssl/src/i32_decode.c
new file mode 100644
index 0000000..f289038
--- /dev/null
+++ b/third_party/bearssl/src/i32_decode.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_decode(uint32_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t u, v;
+
+	buf = src;
+	u = len;
+	v = 1;
+	for (;;) {
+		if (u < 4) {
+			uint32_t w;
+
+			if (u < 2) {
+				if (u == 0) {
+					break;
+				} else {
+					w = buf[0];
+				}
+			} else {
+				if (u == 2) {
+					w = br_dec16be(buf);
+				} else {
+					w = ((uint32_t)buf[0] << 16)
+						| br_dec16be(buf + 1);
+				}
+			}
+			x[v ++] = w;
+			break;
+		} else {
+			u -= 4;
+			x[v ++] = br_dec32be(buf + u);
+		}
+	}
+	x[0] = br_i32_bit_length(x + 1, v - 1);
+}
diff --git a/third_party/bearssl/src/i32_decred.c b/third_party/bearssl/src/i32_decred.c
new file mode 100644
index 0000000..dc476db
--- /dev/null
+++ b/third_party/bearssl/src/i32_decred.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	size_t mblen, k, q;
+	const unsigned char *buf;
+
+	m_bitlen = m[0];
+
+	/*
+	 * Special case for an invalid modulus.
+	 */
+	if (m_bitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i32_zero(x, m_bitlen);
+
+	/*
+	 * First decode directly as many bytes as possible without
+	 * reduction, taking care to leave a number of bytes which
+	 * is a multiple of 4.
+	 */
+	mblen = (m_bitlen + 7) >> 3;
+	k = mblen - 1;
+
+	/*
+	 * Up to k bytes can be safely decoded.
+	 */
+	if (k >= len) {
+		br_i32_decode(x, src, len);
+		x[0] = m_bitlen;
+		return;
+	}
+
+	/*
+	 * We want to first inject some bytes with direct decoding,
+	 * then extra bytes by whole 32-bit words. First compute
+	 * the size that should be injected that way.
+	 */
+	buf = src;
+	q = (len - k + 3) & ~(size_t)3;
+
+	/*
+	 * It may happen that this is more than what we already have
+	 * (by at most 3 bytes). Such a case may happen only with
+	 * a very short modulus. In that case, we must process the first
+	 * bytes "manually".
+	 */
+	if (q > len) {
+		int i;
+		uint32_t w;
+
+		w = 0;
+		for (i = 0; i < 4; i ++) {
+			w <<= 8;
+			if (q <= len) {
+				w |= buf[len - q];
+			}
+			q --;
+		}
+		br_i32_muladd_small(x, w, m);
+	} else {
+		br_i32_decode(x, buf, len - q);
+		x[0] = m_bitlen;
+	}
+
+	/*
+	 * At that point, we have exactly q bytes to inject, and q is
+	 * a multiple of 4.
+	 */
+	for (k = len - q; k < len; k += 4) {
+		br_i32_muladd_small(x, br_dec32be(buf + k), m);
+	}
+}
diff --git a/third_party/bearssl/src/i32_div32.c b/third_party/bearssl/src/i32_div32.c
new file mode 100644
index 0000000..d8b8023
--- /dev/null
+++ b/third_party/bearssl/src/i32_div32.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
+{
+	/* TODO: optimize this */
+	uint32_t q;
+	uint32_t ch, cf;
+	int k;
+
+	q = 0;
+	ch = EQ(hi, d);
+	hi = MUX(ch, 0, hi);
+	for (k = 31; k > 0; k --) {
+		int j;
+		uint32_t w, ctl, hi2, lo2;
+
+		j = 32 - k;
+		w = (hi << j) | (lo >> k);
+		ctl = GE(w, d) | (hi >> k);
+		hi2 = (w - d) >> j;
+		lo2 = lo - (d << k);
+		hi = MUX(ctl, hi2, hi);
+		lo = MUX(ctl, lo2, lo);
+		q |= ctl << k;
+	}
+	cf = GE(lo, d) | hi;
+	q |= cf;
+	*r = MUX(cf, lo - d, lo);
+	return q;
+}
diff --git a/third_party/bearssl/src/i32_encode.c b/third_party/bearssl/src/i32_encode.c
new file mode 100644
index 0000000..303652f
--- /dev/null
+++ b/third_party/bearssl/src/i32_encode.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_encode(void *dst, size_t len, const uint32_t *x)
+{
+	unsigned char *buf;
+	size_t k;
+
+	buf = dst;
+
+	/*
+	 * Compute the announced size of x in bytes; extra bytes are
+	 * filled with zeros.
+	 */
+	k = (x[0] + 7) >> 3;
+	while (len > k) {
+		*buf ++ = 0;
+		len --;
+	}
+
+	/*
+	 * Now we use k as index within x[]. That index starts at 1;
+	 * we initialize it to the topmost complete word, and process
+	 * any remaining incomplete word.
+	 */
+	k = (len + 3) >> 2;
+	switch (len & 3) {
+	case 3:
+		*buf ++ = x[k] >> 16;
+		/* fall through */
+	case 2:
+		*buf ++ = x[k] >> 8;
+		/* fall through */
+	case 1:
+		*buf ++ = x[k];
+		k --;
+	}
+
+	/*
+	 * Encode all complete words.
+	 */
+	while (k > 0) {
+		br_enc32be(buf, x[k]);
+		k --;
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/i32_fmont.c b/third_party/bearssl/src/i32_fmont.c
new file mode 100644
index 0000000..dc1c934
--- /dev/null
+++ b/third_party/bearssl/src/i32_fmont.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 31) >> 5;
+	for (u = 0; u < len; u ++) {
+		uint32_t f;
+		uint64_t cc;
+
+		f = x[1] * m0i;
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)x[v + 1] + MUL(f, m[v + 1]) + cc;
+			cc = z >> 32;
+			if (v != 0) {
+				x[v] = (uint32_t)z;
+			}
+		}
+		x[len] = (uint32_t)cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i32_sub(x, m, NOT(br_i32_sub(x, m, 0)));
+}
diff --git a/third_party/bearssl/src/i32_iszero.c b/third_party/bearssl/src/i32_iszero.c
new file mode 100644
index 0000000..659df7f
--- /dev/null
+++ b/third_party/bearssl/src/i32_iszero.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_iszero(const uint32_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 31) >> 5; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
diff --git a/third_party/bearssl/src/i32_modpow.c b/third_party/bearssl/src/i32_modpow.c
new file mode 100644
index 0000000..034aba0
--- /dev/null
+++ b/third_party/bearssl/src/i32_modpow.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_modpow(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2)
+{
+	size_t mlen;
+	uint32_t k;
+
+	/*
+	 * 'mlen' is the length of m[] expressed in bytes (including
+	 * the "bit length" first field).
+	 */
+	mlen = ((m[0] + 63) >> 5) * sizeof m[0];
+
+	/*
+	 * Throughout the algorithm:
+	 * -- t1[] is in Montgomery representation; it contains x, x^2,
+	 * x^4, x^8...
+	 * -- The result is accumulated, in normal representation, in
+	 * the x[] array.
+	 * -- t2[] is used as destination buffer for each multiplication.
+	 *
+	 * Note that there is no need to call br_i32_from_monty().
+	 */
+	memcpy(t1, x, mlen);
+	br_i32_to_monty(t1, m);
+	br_i32_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((uint32_t)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i32_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i32_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
diff --git a/third_party/bearssl/src/i32_montmul.c b/third_party/bearssl/src/i32_montmul.c
new file mode 100644
index 0000000..7edb376
--- /dev/null
+++ b/third_party/bearssl/src/i32_montmul.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i)
+{
+	size_t len, u, v;
+	uint64_t dh;
+
+	len = (m[0] + 31) >> 5;
+	br_i32_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, xu;
+		uint64_t r1, r2, zh;
+
+		xu = x[u + 1];
+		f = (d[1] + x[u + 1] * y[1]) * m0i;
+		r1 = 0;
+		r2 = 0;
+		for (v = 0; v < len; v ++) {
+			uint64_t z;
+			uint32_t t;
+
+			z = (uint64_t)d[v + 1] + MUL(xu, y[v + 1]) + r1;
+			r1 = z >> 32;
+			t = (uint32_t)z;
+			z = (uint64_t)t + MUL(f, m[v + 1]) + r2;
+			r2 = z >> 32;
+			if (v != 0) {
+				d[v] = (uint32_t)z;
+			}
+		}
+		zh = dh + r1 + r2;
+		d[len] = (uint32_t)zh;
+		dh = zh >> 32;
+	}
+
+	/*
+	 * d[] may still be greater than m[] at that point; notably, the
+	 * 'dh' word may be non-zero.
+	 */
+	br_i32_sub(d, m, NEQ(dh, 0) | NOT(br_i32_sub(d, m, 0)));
+}
diff --git a/third_party/bearssl/src/i32_mulacc.c b/third_party/bearssl/src/i32_mulacc.c
new file mode 100644
index 0000000..55da385
--- /dev/null
+++ b/third_party/bearssl/src/i32_mulacc.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	size_t alen, blen, u;
+
+	alen = (a[0] + 31) >> 5;
+	blen = (b[0] + 31) >> 5;
+	d[0] = a[0] + b[0];
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+#if BR_64
+		uint64_t cc;
+#else
+		uint32_t cc;
+#endif
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)d[1 + u + v] + MUL(f, a[1 + v]) + cc;
+			cc = z >> 32;
+			d[1 + u + v] = (uint32_t)z;
+		}
+		d[1 + u + alen] = (uint32_t)cc;
+	}
+}
diff --git a/third_party/bearssl/src/i32_muladd.c b/third_party/bearssl/src/i32_muladd.c
new file mode 100644
index 0000000..dd526ad
--- /dev/null
+++ b/third_party/bearssl/src/i32_muladd.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	size_t u, mlen;
+	uint32_t a0, a1, b0, hi, g, q, tb;
+	uint32_t chf, clow, under, over;
+	uint64_t cc;
+
+	/*
+	 * We can test on the modulus bit length since we accept to
+	 * leak that length.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 32) {
+		x[1] = br_rem(x[1], z, m[1]);
+		return;
+	}
+	mlen = (m_bitlen + 31) >> 5;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^32+z)/m by
+	 * doing a 64/32 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^32
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
+	 * doing an Euclidean division on the top words:
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	a0 = br_i32_word(x, m_bitlen - 32);
+	hi = x[mlen];
+	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+	x[1] = z;
+	a1 = br_i32_word(x, m_bitlen - 32);
+	b0 = br_i32_word(m, m_bitlen - 32);
+
+	/*
+	 * We estimate a divisor q. If the quotient returned by br_div()
+	 * is g:
+	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
+	 * -- Otherwise:
+	 *    -- if g == 0 then we set q = 0;
+	 *    -- otherwise, we set q = g - 1.
+	 * The properties described above then ensure that the true
+	 * quotient is q-1, q or q+1.
+	 */
+	g = br_div(a0, a1, b0);
+	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
+
+	/*
+	 * We subtract q*m from x (with the extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zw, xw, nxw;
+		uint64_t zl;
+
+		mw = m[u];
+		zl = MUL(mw, q) + cc;
+		cc = (uint32_t)(zl >> 32);
+		zw = (uint32_t)zl;
+		xw = x[u];
+		nxw = xw - zw;
+		cc += (uint64_t)GT(nxw, xw);
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus). In
+	 * these cases we must subtract m once.
+	 *
+	 * Otherwise, we may have overestimated, which will show as
+	 * cc > hi (thus a negative result). Correction is adding m once.
+	 */
+	chf = (uint32_t)(cc >> 32);
+	clow = (uint32_t)cc;
+	over = chf | GT(clow, hi);
+	under = ~over & (tb | (~chf & LT(clow, hi)));
+	br_i32_add(x, m, over);
+	br_i32_sub(x, m, under);
+}
diff --git a/third_party/bearssl/src/i32_ninv32.c b/third_party/bearssl/src/i32_ninv32.c
new file mode 100644
index 0000000..6564434
--- /dev/null
+++ b/third_party/bearssl/src/i32_ninv32.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_ninv32(uint32_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	return MUX(x & 1, -y, 0);
+}
diff --git a/third_party/bearssl/src/i32_reduce.c b/third_party/bearssl/src/i32_reduce.c
new file mode 100644
index 0000000..90fff09
--- /dev/null
+++ b/third_party/bearssl/src/i32_reduce.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 31) >> 5;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 31) >> 5;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i32_muladd_small(x, a[u], m);
+	}
+}
diff --git a/third_party/bearssl/src/i32_sub.c b/third_party/bearssl/src/i32_sub.c
new file mode 100644
index 0000000..9c50023
--- /dev/null
+++ b/third_party/bearssl/src/i32_sub.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+
+		/*
+		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i32_tmont.c b/third_party/bearssl/src/i32_tmont.c
new file mode 100644
index 0000000..058cd88
--- /dev/null
+++ b/third_party/bearssl/src/i32_tmont.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_to_monty(uint32_t *x, const uint32_t *m)
+{
+	uint32_t k;
+
+	for (k = (m[0] + 31) >> 5; k > 0; k --) {
+		br_i32_muladd_small(x, 0, m);
+	}
+}
diff --git a/third_party/bearssl/src/i62_modpow2.c b/third_party/bearssl/src/i62_modpow2.c
new file mode 100644
index 0000000..2db537f
--- /dev/null
+++ b/third_party/bearssl/src/i62_modpow2.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_INT128
+
+/*
+ * Compute x*y+v1+v2. Operands are 64-bit, and result is 128-bit, with
+ * high word in "hi" and low word in "lo".
+ */
+#define FMA1(hi, lo, x, y, v1, v2)   do { \
+		unsigned __int128 fmaz; \
+		fmaz = (unsigned __int128)(x) * (unsigned __int128)(y) \
+			+ (unsigned __int128)(v1) + (unsigned __int128)(v2); \
+		(hi) = (uint64_t)(fmaz >> 64); \
+		(lo) = (uint64_t)fmaz; \
+	} while (0)
+
+/*
+ * Compute x1*y1+x2*y2+v1+v2. Operands are 64-bit, and result is 128-bit,
+ * with high word in "hi" and low word in "lo".
+ *
+ * Callers should ensure that the two inner products, and the v1 and v2
+ * operands, are multiple of 4 (this is not used by this specific definition
+ * but may help other implementations).
+ */
+#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2)   do { \
+		unsigned __int128 fmaz; \
+		fmaz = (unsigned __int128)(x1) * (unsigned __int128)(y1) \
+			+ (unsigned __int128)(x2) * (unsigned __int128)(y2) \
+			+ (unsigned __int128)(v1) + (unsigned __int128)(v2); \
+		(hi) = (uint64_t)(fmaz >> 64); \
+		(lo) = (uint64_t)fmaz; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#include <intrin.h>
+
+#define FMA1(hi, lo, x, y, v1, v2)   do { \
+		uint64_t fmahi, fmalo; \
+		unsigned char fmacc; \
+		fmalo = _umul128((x), (y), &fmahi); \
+		fmacc = _addcarry_u64(0, fmalo, (v1), &fmalo); \
+		_addcarry_u64(fmacc, fmahi, 0, &fmahi); \
+		fmacc = _addcarry_u64(0, fmalo, (v2), &(lo)); \
+		_addcarry_u64(fmacc, fmahi, 0, &(hi)); \
+	} while (0)
+
+/*
+ * Normally we should use _addcarry_u64() for FMA2 too, but it makes
+ * Visual Studio crash. Instead we use this version, which leverages
+ * the fact that the vx operands, and the products, are multiple of 4.
+ * This is unfortunately slower.
+ */
+#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2)   do { \
+		uint64_t fma1hi, fma1lo; \
+		uint64_t fma2hi, fma2lo; \
+		uint64_t fmatt; \
+		fma1lo = _umul128((x1), (y1), &fma1hi); \
+		fma2lo = _umul128((x2), (y2), &fma2hi); \
+		fmatt = (fma1lo >> 2) + (fma2lo >> 2) \
+			+ ((v1) >> 2) + ((v2) >> 2); \
+		(lo) = fmatt << 2; \
+		(hi) = fma1hi + fma2hi + (fmatt >> 62); \
+	} while (0)
+
+/*
+ * The FMA2 macro definition we would prefer to use, but it triggers
+ * an internal compiler error in Visual Studio 2015.
+ *
+#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2)   do { \
+		uint64_t fma1hi, fma1lo; \
+		uint64_t fma2hi, fma2lo; \
+		unsigned char fmacc; \
+		fma1lo = _umul128((x1), (y1), &fma1hi); \
+		fma2lo = _umul128((x2), (y2), &fma2hi); \
+		fmacc = _addcarry_u64(0, fma1lo, (v1), &fma1lo); \
+		_addcarry_u64(fmacc, fma1hi, 0, &fma1hi); \
+		fmacc = _addcarry_u64(0, fma2lo, (v2), &fma2lo); \
+		_addcarry_u64(fmacc, fma2hi, 0, &fma2hi); \
+		fmacc = _addcarry_u64(0, fma1lo, fma2lo, &(lo)); \
+		_addcarry_u64(fmacc, fma1hi, fma2hi, &(hi)); \
+	} while (0)
+ */
+
+#endif
+
+#define MASK62           ((uint64_t)0x3FFFFFFFFFFFFFFF)
+#define MUL62_lo(x, y)   (((uint64_t)(x) * (uint64_t)(y)) & MASK62)
+
+/*
+ * Subtract b from a, and return the final carry. If 'ctl32' is 0, then
+ * a[] is kept unmodified, but the final carry is still computed and
+ * returned.
+ */
+static uint32_t
+i62_sub(uint64_t *a, const uint64_t *b, size_t num, uint32_t ctl32)
+{
+	uint64_t cc, mask;
+	size_t u;
+
+	cc = 0;
+	ctl32 = -ctl32;
+	mask = (uint64_t)ctl32 | ((uint64_t)ctl32 << 32);
+	for (u = 0; u < num; u ++) {
+		uint64_t aw, bw, dw;
+
+		aw = a[u];
+		bw = b[u];
+		dw = aw - bw - cc;
+		cc = dw >> 63;
+		dw &= MASK62;
+		a[u] = aw ^ (mask & (dw ^ aw));
+	}
+	return (uint32_t)cc;
+}
+
+/*
+ * Montgomery multiplication, over arrays of 62-bit values. The
+ * destination array (d) must be distinct from the other operands
+ * (x, y and m). All arrays are in little-endian format (least
+ * significant word comes first) over 'num' words.
+ */
+static void
+montymul(uint64_t *d, const uint64_t *x, const uint64_t *y,
+	const uint64_t *m, size_t num, uint64_t m0i)
+{
+	uint64_t dh;
+	size_t u, num4;
+
+	num4 = 1 + ((num - 1) & ~(size_t)3);
+	memset(d, 0, num * sizeof *d);
+	dh = 0;
+	for (u = 0; u < num; u ++) {
+		size_t v;
+		uint64_t f, xu;
+		uint64_t r, zh;
+		uint64_t hi, lo;
+
+		xu = x[u] << 2;
+		f = MUL62_lo(d[0] + MUL62_lo(x[u], y[0]), m0i) << 2;
+
+		FMA2(hi, lo, xu, y[0], f, m[0], d[0] << 2, 0);
+		r = hi;
+
+		for (v = 1; v < num4; v += 4) {
+			FMA2(hi, lo, xu, y[v + 0],
+				f, m[v + 0], d[v + 0] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v - 1] = lo >> 2;
+			FMA2(hi, lo, xu, y[v + 1],
+				f, m[v + 1], d[v + 1] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v + 0] = lo >> 2;
+			FMA2(hi, lo, xu, y[v + 2],
+				f, m[v + 2], d[v + 2] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v + 1] = lo >> 2;
+			FMA2(hi, lo, xu, y[v + 3],
+				f, m[v + 3], d[v + 3] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v + 2] = lo >> 2;
+		}
+		for (; v < num; v ++) {
+			FMA2(hi, lo, xu, y[v], f, m[v], d[v] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v - 1] = lo >> 2;
+		}
+
+		zh = dh + r;
+		d[num - 1] = zh & MASK62;
+		dh = zh >> 62;
+	}
+	i62_sub(d, m, num, (uint32_t)dh | NOT(i62_sub(d, m, num, 0)));
+}
+
+/*
+ * Conversion back from Montgomery representation.
+ */
+static void
+frommonty(uint64_t *x, const uint64_t *m, size_t num, uint64_t m0i)
+{
+	size_t u, v;
+
+	for (u = 0; u < num; u ++) {
+		uint64_t f, cc;
+
+		f = MUL62_lo(x[0], m0i) << 2;
+		cc = 0;
+		for (v = 0; v < num; v ++) {
+			uint64_t hi, lo;
+
+			FMA1(hi, lo, f, m[v], x[v] << 2, cc);
+			cc = hi << 2;
+			if (v != 0) {
+				x[v - 1] = lo >> 2;
+			}
+		}
+		x[num - 1] = cc >> 2;
+	}
+	i62_sub(x, m, num, NOT(i62_sub(x, m, num, 0)));
+}
+
+/* see inner.h */
+uint32_t
+br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen)
+{
+	size_t u, mw31num, mw62num;
+	uint64_t *x, *m, *t1, *t2;
+	uint64_t m0i;
+	uint32_t acc;
+	int win_len, acc_len;
+
+	/*
+	 * Get modulus size, in words.
+	 */
+	mw31num = (m31[0] + 31) >> 5;
+	mw62num = (mw31num + 1) >> 1;
+
+	/*
+	 * In order to apply this function, we must have enough room to
+	 * copy the operand and modulus into the temporary array, along
+	 * with at least two temporaries. If there is not enough room,
+	 * switch to br_i31_modpow(). We also use br_i31_modpow() if the
+	 * modulus length is not at least four words (94 bits or more).
+	 */
+	if (mw31num < 4 || (mw62num << 2) > twlen) {
+		/*
+		 * We assume here that we can split an aligned uint64_t
+		 * into two properly aligned uint32_t. Since both types
+		 * are supposed to have an exact width with no padding,
+		 * then this property must hold.
+		 */
+		size_t txlen;
+
+		txlen = mw31num + 1;
+		if (twlen < txlen) {
+			return 0;
+		}
+		br_i31_modpow(x31, e, elen, m31, m0i31,
+			(uint32_t *)tmp, (uint32_t *)tmp + txlen);
+		return 1;
+	}
+
+	/*
+	 * Convert x to Montgomery representation: this means that
+	 * we replace x with x*2^z mod m, where z is the smallest multiple
+	 * of the word size such that 2^z >= m. We want to reuse the 31-bit
+	 * functions here (for constant-time operation), but we need z
+	 * for a 62-bit word size.
+	 */
+	for (u = 0; u < mw62num; u ++) {
+		br_i31_muladd_small(x31, 0, m31);
+		br_i31_muladd_small(x31, 0, m31);
+	}
+
+	/*
+	 * Assemble operands into arrays of 62-bit words. Note that
+	 * all the arrays of 62-bit words that we will handle here
+	 * are without any leading size word.
+	 *
+	 * We also adjust tmp and twlen to account for the words used
+	 * for these extra arrays.
+	 */
+	m = tmp;
+	x = tmp + mw62num;
+	tmp += (mw62num << 1);
+	twlen -= (mw62num << 1);
+	for (u = 0; u < mw31num; u += 2) {
+		size_t v;
+
+		v = u >> 1;
+		if ((u + 1) == mw31num) {
+			m[v] = (uint64_t)m31[u + 1];
+			x[v] = (uint64_t)x31[u + 1];
+		} else {
+			m[v] = (uint64_t)m31[u + 1]
+				+ ((uint64_t)m31[u + 2] << 31);
+			x[v] = (uint64_t)x31[u + 1]
+				+ ((uint64_t)x31[u + 2] << 31);
+		}
+	}
+
+	/*
+	 * Compute window size. We support windows up to 5 bits; for a
+	 * window of size k bits, we need 2^k+1 temporaries (for k = 1,
+	 * we use special code that uses only 2 temporaries).
+	 */
+	for (win_len = 5; win_len > 1; win_len --) {
+		if ((((uint32_t)1 << win_len) + 1) * mw62num <= twlen) {
+			break;
+		}
+	}
+
+	t1 = tmp;
+	t2 = tmp + mw62num;
+
+	/*
+	 * Compute m0i, which is equal to -(1/m0) mod 2^62. We were
+	 * provided with m0i31, which already fulfills this property
+	 * modulo 2^31; the single expression below is then sufficient.
+	 */
+	m0i = (uint64_t)m0i31;
+	m0i = MUL62_lo(m0i, (uint64_t)2 + MUL62_lo(m0i, m[0]));
+
+	/*
+	 * Compute window contents. If the window has size one bit only,
+	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
+	 * t2[k] is set to x^k (for k >= 1).
+	 */
+	if (win_len == 1) {
+		memcpy(t2, x, mw62num * sizeof *x);
+	} else {
+		uint64_t *base;
+
+		memcpy(t2 + mw62num, x, mw62num * sizeof *x);
+		base = t2 + mw62num;
+		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
+			montymul(base + mw62num, base, x, m, mw62num, m0i);
+			base += mw62num;
+		}
+	}
+
+	/*
+	 * Set x to 1, in Montgomery representation. We again use the
+	 * 31-bit code.
+	 */
+	br_i31_zero(x31, m31[0]);
+	x31[(m31[0] + 31) >> 5] = 1;
+	br_i31_muladd_small(x31, 0, m31);
+	if (mw31num & 1) {
+		br_i31_muladd_small(x31, 0, m31);
+	}
+	for (u = 0; u < mw31num; u += 2) {
+		size_t v;
+
+		v = u >> 1;
+		if ((u + 1) == mw31num) {
+			x[v] = (uint64_t)x31[u + 1];
+		} else {
+			x[v] = (uint64_t)x31[u + 1]
+				+ ((uint64_t)x31[u + 2] << 31);
+		}
+	}
+
+	/*
+	 * We process bits from most to least significant. At each
+	 * loop iteration, we have acc_len bits in acc.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (acc_len > 0 || elen > 0) {
+		int i, k;
+		uint32_t bits;
+		uint64_t mask1, mask2;
+
+		/*
+		 * Get the next bits.
+		 */
+		k = win_len;
+		if (acc_len < win_len) {
+			if (elen > 0) {
+				acc = (acc << 8) | *e ++;
+				elen --;
+				acc_len += 8;
+			} else {
+				k = acc_len;
+			}
+		}
+		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
+		acc_len -= k;
+
+		/*
+		 * We could get exactly k bits. Compute k squarings.
+		 */
+		for (i = 0; i < k; i ++) {
+			montymul(t1, x, x, m, mw62num, m0i);
+			memcpy(x, t1, mw62num * sizeof *x);
+		}
+
+		/*
+		 * Window lookup: we want to set t2 to the window
+		 * lookup value, assuming the bits are non-zero. If
+		 * the window length is 1 bit only, then t2 is
+		 * already set; otherwise, we do a constant-time lookup.
+		 */
+		if (win_len > 1) {
+			uint64_t *base;
+
+			memset(t2, 0, mw62num * sizeof *t2);
+			base = t2 + mw62num;
+			for (u = 1; u < ((uint32_t)1 << k); u ++) {
+				uint64_t mask;
+				size_t v;
+
+				mask = -(uint64_t)EQ(u, bits);
+				for (v = 0; v < mw62num; v ++) {
+					t2[v] |= mask & base[v];
+				}
+				base += mw62num;
+			}
+		}
+
+		/*
+		 * Multiply with the looked-up value. We keep the product
+		 * only if the exponent bits are not all-zero.
+		 */
+		montymul(t1, x, t2, m, mw62num, m0i);
+		mask1 = -(uint64_t)EQ(bits, 0);
+		mask2 = ~mask1;
+		for (u = 0; u < mw62num; u ++) {
+			x[u] = (mask1 & x[u]) | (mask2 & t1[u]);
+		}
+	}
+
+	/*
+	 * Convert back from Montgomery representation.
+	 */
+	frommonty(x, m, mw62num, m0i);
+
+	/*
+	 * Convert result into 31-bit words.
+	 */
+	for (u = 0; u < mw31num; u += 2) {
+		uint64_t zw;
+
+		zw = x[u >> 1];
+		x31[u + 1] = (uint32_t)zw & 0x7FFFFFFF;
+		if ((u + 1) < mw31num) {
+			x31[u + 2] = (uint32_t)(zw >> 31);
+		}
+	}
+	return 1;
+}
+
+#else
+
+/* see inner.h */
+uint32_t
+br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen)
+{
+	size_t mwlen;
+
+	mwlen = (m31[0] + 63) >> 5;
+	if (twlen < mwlen) {
+		return 0;
+	}
+	return br_i31_modpow_opt(x31, e, elen, m31, m0i31,
+		(uint32_t *)tmp, twlen << 1);
+}
+
+#endif
+
+/* see inner.h */
+uint32_t
+br_i62_modpow_opt_as_i31(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint32_t *tmp, size_t twlen)
+{
+	/*
+	 * As documented, this function expects the 'tmp' argument to be
+	 * 64-bit aligned. This is OK since this function is internal (it
+	 * is not part of BearSSL's public API).
+	 */
+	return br_i62_modpow_opt(x31, e, elen, m31, m0i31,
+		(uint64_t *)tmp, twlen >> 1);
+}
diff --git a/third_party/bearssl/src/inner.h b/third_party/bearssl/src/inner.h
new file mode 100644
index 0000000..0d40825
--- /dev/null
+++ b/third_party/bearssl/src/inner.h
@@ -0,0 +1,2559 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INNER_H__
+#define INNER_H__
+
+#include <string.h>
+#include <limits.h>
+
+#include "config.h"
+#include "bearssl.h"
+
+/*
+ * On MSVC, disable the warning about applying unary minus on an
+ * unsigned type: it is standard, we do it all the time, and for
+ * good reasons.
+ */
+#if _MSC_VER
+#pragma warning( disable : 4146 )
+#endif
+
+/*
+ * Maximum size for a RSA modulus (in bits). Allocated stack buffers
+ * depend on that size, so this value should be kept small. Currently,
+ * 2048-bit RSA keys offer adequate security, and should still do so for
+ * the next few decades; however, a number of widespread PKI have
+ * already set their root keys to RSA-4096, so we should be able to
+ * process such keys.
+ *
+ * This value MUST be a multiple of 64. This value MUST NOT exceed 47666
+ * (some computations in RSA key generation rely on the factor size being
+ * no more than 23833 bits). RSA key sizes beyond 3072 bits don't make a
+ * lot of sense anyway.
+ */
+#define BR_MAX_RSA_SIZE   4096
+
+/*
+ * Minimum size for a RSA modulus (in bits); this value is used only to
+ * filter out invalid parameters for key pair generation. Normally,
+ * applications should not use RSA keys smaller than 2048 bits; but some
+ * specific cases might need shorter keys, for legacy or research
+ * purposes.
+ */
+#define BR_MIN_RSA_SIZE   512
+
+/*
+ * Maximum size for a RSA factor (in bits). This is for RSA private-key
+ * operations. Default is to support factors up to a bit more than half
+ * the maximum modulus size.
+ *
+ * This value MUST be a multiple of 32.
+ */
+#define BR_MAX_RSA_FACTOR   ((BR_MAX_RSA_SIZE + 64) >> 1)
+
+/*
+ * Maximum size for an EC curve (modulus or order), in bits. Size of
+ * stack buffers depends on that parameter. This size MUST be a multiple
+ * of 8 (so that decoding an integer with that many bytes does not
+ * overflow).
+ */
+#define BR_MAX_EC_SIZE   528
+
+/*
+ * Some macros to recognize the current architecture. Right now, we are
+ * interested into automatically recognizing architecture with efficient
+ * 64-bit types so that we may automatically use implementations that
+ * use 64-bit registers in that case. Future versions may detect, e.g.,
+ * availability of SSE2 intrinsics.
+ *
+ * If 'unsigned long' is a 64-bit type, then we assume that 64-bit types
+ * are efficient. Otherwise, we rely on macros that depend on compiler,
+ * OS and architecture. In any case, failure to detect the architecture
+ * as 64-bit means that the 32-bit code will be used, and that code
+ * works also on 64-bit architectures (the 64-bit code may simply be
+ * more efficient).
+ *
+ * The test on 'unsigned long' should already catch most cases, the one
+ * notable exception being Windows code where 'unsigned long' is kept to
+ * 32-bit for compatibility with all the legacy code that liberally uses
+ * the 'DWORD' type for 32-bit values.
+ *
+ * Macro names are taken from: http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros
+ */
+#ifndef BR_64
+#if ((ULONG_MAX >> 31) >> 31) == 3
+#define BR_64   1
+#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64)
+#define BR_64   1
+#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
+	|| defined(__64BIT__) || defined(_LP64) || defined(__LP64__)
+#define BR_64   1
+#elif defined(__sparc64__)
+#define BR_64   1
+#elif defined(__x86_64__) || defined(_M_X64)
+#define BR_64   1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define BR_64   1
+#elif defined(__mips64)
+#define BR_64   1
+#endif
+#endif
+
+/*
+ * Set BR_LOMUL on platforms where it makes sense.
+ */
+#ifndef BR_LOMUL
+#if BR_ARMEL_CORTEXM_GCC
+#define BR_LOMUL   1
+#endif
+#endif
+
+/*
+ * Architecture detection.
+ */
+#ifndef BR_i386
+#if __i386__ || _M_IX86
+#define BR_i386   1
+#endif
+#endif
+
+#ifndef BR_amd64
+#if __x86_64__ || _M_X64
+#define BR_amd64   1
+#endif
+#endif
+
+/*
+ * Compiler brand and version.
+ *
+ * Implementations that use intrinsics need to detect the compiler type
+ * and version because some specific actions may be needed to activate
+ * the corresponding opcodes, both for header inclusion, and when using
+ * them in a function.
+ *
+ * BR_GCC, BR_CLANG and BR_MSC will be set to 1 for, respectively, GCC,
+ * Clang and MS Visual C. For each of them, sub-macros will be defined
+ * for versions; each sub-macro is set whenever the compiler version is
+ * at least as recent as the one corresponding to the macro.
+ */
+
+/*
+ * GCC thresholds are on versions 4.4 to 4.9 and 5.0.
+ */
+#ifndef BR_GCC
+#if __GNUC__ && !__clang__
+#define BR_GCC   1
+
+#if __GNUC__ > 4
+#define BR_GCC_5_0   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
+#define BR_GCC_4_9   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8
+#define BR_GCC_4_8   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7
+#define BR_GCC_4_7   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6
+#define BR_GCC_4_6   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 5
+#define BR_GCC_4_5   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 4
+#define BR_GCC_4_4   1
+#endif
+
+#if BR_GCC_5_0
+#define BR_GCC_4_9   1
+#endif
+#if BR_GCC_4_9
+#define BR_GCC_4_8   1
+#endif
+#if BR_GCC_4_8
+#define BR_GCC_4_7   1
+#endif
+#if BR_GCC_4_7
+#define BR_GCC_4_6   1
+#endif
+#if BR_GCC_4_6
+#define BR_GCC_4_5   1
+#endif
+#if BR_GCC_4_5
+#define BR_GCC_4_4   1
+#endif
+
+#endif
+#endif
+
+/*
+ * Clang thresholds are on versions 3.7.0 and 3.8.0.
+ */
+#ifndef BR_CLANG
+#if __clang__
+#define BR_CLANG   1
+
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
+#define BR_CLANG_3_8   1
+#elif __clang_major__ == 3 && __clang_minor__ >= 7
+#define BR_CLANG_3_7   1
+#endif
+
+#if BR_CLANG_3_8
+#define BR_CLANG_3_7   1
+#endif
+
+#endif
+#endif
+
+/*
+ * MS Visual C thresholds are on Visual Studio 2005 to 2015.
+ */
+#ifndef BR_MSC
+#if _MSC_VER
+#define BR_MSC   1
+
+#if _MSC_VER >= 1900
+#define BR_MSC_2015   1
+#elif _MSC_VER >= 1800
+#define BR_MSC_2013   1
+#elif _MSC_VER >= 1700
+#define BR_MSC_2012   1
+#elif _MSC_VER >= 1600
+#define BR_MSC_2010   1
+#elif _MSC_VER >= 1500
+#define BR_MSC_2008   1
+#elif _MSC_VER >= 1400
+#define BR_MSC_2005   1
+#endif
+
+#if BR_MSC_2015
+#define BR_MSC_2013   1
+#endif
+#if BR_MSC_2013
+#define BR_MSC_2012   1
+#endif
+#if BR_MSC_2012
+#define BR_MSC_2010   1
+#endif
+#if BR_MSC_2010
+#define BR_MSC_2008   1
+#endif
+#if BR_MSC_2008
+#define BR_MSC_2005   1
+#endif
+
+#endif
+#endif
+
+/*
+ * GCC 4.4+ and Clang 3.7+ allow tagging specific functions with a
+ * 'target' attribute that activates support for specific opcodes.
+ */
+#if BR_GCC_4_4 || BR_CLANG_3_7
+#define BR_TARGET(x)   __attribute__((target(x)))
+#else
+#define BR_TARGET(x)
+#endif
+
+/*
+ * AES-NI intrinsics are available on x86 (32-bit and 64-bit) with
+ * GCC 4.8+, Clang 3.7+ and MSC 2012+.
+ */
+#ifndef BR_AES_X86NI
+#if (BR_i386 || BR_amd64) && (BR_GCC_4_8 || BR_CLANG_3_7 || BR_MSC_2012)
+#define BR_AES_X86NI   1
+#endif
+#endif
+
+/*
+ * SSE2 intrinsics are available on x86 (32-bit and 64-bit) with
+ * GCC 4.4+, Clang 3.7+ and MSC 2005+.
+ */
+#ifndef BR_SSE2
+#if (BR_i386 || BR_amd64) && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)
+#define BR_SSE2   1
+#endif
+#endif
+
+/*
+ * RDRAND intrinsics are available on x86 (32-bit and 64-bit) with
+ * GCC 4.6+, Clang 3.7+ and MSC 2012+.
+ */
+#ifndef BR_RDRAND
+#if (BR_i386 || BR_amd64) && (BR_GCC_4_6 || BR_CLANG_3_7 || BR_MSC_2012)
+#define BR_RDRAND   1
+#endif
+#endif
+
+/*
+ * Determine type of OS for random number generation. Macro names and
+ * values are documented on:
+ *    https://sourceforge.net/p/predef/wiki/OperatingSystems/
+ *
+ * Win32's CryptGenRandom() should be available on Windows systems.
+ *
+ * /dev/urandom should work on all Unix-like systems (including macOS X).
+ *
+ * getentropy() is present on Linux (Glibc 2.25+), FreeBSD (12.0+) and
+ * OpenBSD (5.6+). For OpenBSD, there does not seem to be easy to use
+ * macros to test the minimum version, so we just assume that it is
+ * recent enough (last version without getentropy() has gone out of
+ * support in May 2015).
+ *
+ * Ideally we should use getentropy() on macOS (10.12+) too, but I don't
+ * know how to test the exact OS version with preprocessor macros.
+ *
+ * TODO: enrich the list of detected system.
+ */
+
+#ifndef BR_USE_URANDOM
+#if defined _AIX \
+	|| defined __ANDROID__ \
+	|| defined __FreeBSD__ \
+	|| defined __NetBSD__ \
+	|| defined __OpenBSD__ \
+	|| defined __DragonFly__ \
+	|| defined __linux__ \
+	|| (defined __sun && (defined __SVR4 || defined __svr4__)) \
+	|| (defined __APPLE__ && defined __MACH__)
+#define BR_USE_URANDOM   1
+#endif
+#endif
+
+#ifndef BR_USE_GETENTROPY
+#if (defined __linux__ \
+	&& (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))) \
+	|| (defined __FreeBSD__ && __FreeBSD__ >= 12) \
+	|| defined __OpenBSD__
+#define BR_USE_GETENTROPY   1
+#endif
+#endif
+
+#ifndef BR_USE_WIN32_RAND
+#if defined _WIN32 || defined _WIN64
+#define BR_USE_WIN32_RAND   1
+#endif
+#endif
+
+/*
+ * POWER8 crypto support. We rely on compiler macros for the
+ * architecture, since we do not have a reliable, simple way to detect
+ * the required support at runtime (we could try running an opcode, and
+ * trapping the exception or signal on illegal instruction, but this
+ * induces some non-trivial OS dependencies that we would prefer to
+ * avoid if possible).
+ */
+#ifndef BR_POWER8
+#if __GNUC__ && ((_ARCH_PWR8 || _ARCH_PPC) && __CRYPTO__)
+#define BR_POWER8   1
+#endif
+#endif
+
+/*
+ * Detect endinanness on POWER8.
+ */
+#if BR_POWER8
+#if defined BR_POWER8_LE
+#undef BR_POWER8_BE
+#if BR_POWER8_LE
+#define BR_POWER8_BE   0
+#else
+#define BR_POWER8_BE   1
+#endif
+#elif defined BR_POWER8_BE
+#undef BR_POWER8_LE
+#if BR_POWER8_BE
+#define BR_POWER8_LE   0
+#else
+#define BR_POWER8_LE   1
+#endif
+#else
+#if __LITTLE_ENDIAN__
+#define BR_POWER8_LE   1
+#define BR_POWER8_BE   0
+#else
+#define BR_POWER8_LE   0
+#define BR_POWER8_BE   1
+#endif
+#endif
+#endif
+
+/*
+ * Detect support for 128-bit integers.
+ */
+#if !defined BR_INT128 && !defined BR_UMUL128
+#ifdef __SIZEOF_INT128__
+#define BR_INT128    1
+#elif _M_X64
+#define BR_UMUL128   1
+#endif
+#endif
+
+/*
+ * Detect support for unaligned accesses with known endianness.
+ *
+ *  x86 (both 32-bit and 64-bit) is little-endian and allows unaligned
+ *  accesses.
+ *
+ *  POWER/PowerPC allows unaligned accesses when big-endian. POWER8 and
+ *  later also allow unaligned accesses when little-endian.
+ */
+#if !defined BR_LE_UNALIGNED && !defined BR_BE_UNALIGNED
+
+#if __i386 || __i386__ || __x86_64__ || _M_IX86 || _M_X64
+#define BR_LE_UNALIGNED   1
+#elif BR_POWER8_BE
+#define BR_BE_UNALIGNED   1
+#elif BR_POWER8_LE
+#define BR_LE_UNALIGNED   1
+#elif (__powerpc__ || __powerpc64__ || _M_PPC || _ARCH_PPC || _ARCH_PPC64) \
+	&& __BIG_ENDIAN__
+#define BR_BE_UNALIGNED   1
+#endif
+
+#endif
+
+/* ==================================================================== */
+/*
+ * Encoding/decoding functions.
+ *
+ * 32-bit and 64-bit decoding, both little-endian and big-endian, is
+ * implemented with the inline functions below.
+ *
+ * When allowed by some compile-time options (autodetected or provided),
+ * optimised code is used, to perform direct memory access when the
+ * underlying architecture supports it, both for endianness and
+ * alignment. This, however, may trigger strict aliasing issues; the
+ * code below uses unions to perform (supposedly) safe type punning.
+ * Since the C aliasing rules are relatively complex and were amended,
+ * or at least re-explained with different phrasing, in all successive
+ * versions of the C standard, it is always a bit risky to bet that any
+ * specific version of a C compiler got it right, for some notion of
+ * "right".
+ */
+
+typedef union {
+	uint16_t u;
+	unsigned char b[sizeof(uint16_t)];
+} br_union_u16;
+
+typedef union {
+	uint32_t u;
+	unsigned char b[sizeof(uint32_t)];
+} br_union_u32;
+
+typedef union {
+	uint64_t u;
+	unsigned char b[sizeof(uint64_t)];
+} br_union_u64;
+
+static inline void
+br_enc16le(void *dst, unsigned x)
+{
+#if BR_LE_UNALIGNED
+	((br_union_u16 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)x;
+	buf[1] = (unsigned char)(x >> 8);
+#endif
+}
+
+static inline void
+br_enc16be(void *dst, unsigned x)
+{
+#if BR_BE_UNALIGNED
+	((br_union_u16 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)(x >> 8);
+	buf[1] = (unsigned char)x;
+#endif
+}
+
+static inline unsigned
+br_dec16le(const void *src)
+{
+#if BR_LE_UNALIGNED
+	return ((const br_union_u16 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return (unsigned)buf[0] | ((unsigned)buf[1] << 8);
+#endif
+}
+
+static inline unsigned
+br_dec16be(const void *src)
+{
+#if BR_BE_UNALIGNED
+	return ((const br_union_u16 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return ((unsigned)buf[0] << 8) | (unsigned)buf[1];
+#endif
+}
+
+static inline void
+br_enc32le(void *dst, uint32_t x)
+{
+#if BR_LE_UNALIGNED
+	((br_union_u32 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)x;
+	buf[1] = (unsigned char)(x >> 8);
+	buf[2] = (unsigned char)(x >> 16);
+	buf[3] = (unsigned char)(x >> 24);
+#endif
+}
+
+static inline void
+br_enc32be(void *dst, uint32_t x)
+{
+#if BR_BE_UNALIGNED
+	((br_union_u32 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)(x >> 24);
+	buf[1] = (unsigned char)(x >> 16);
+	buf[2] = (unsigned char)(x >> 8);
+	buf[3] = (unsigned char)x;
+#endif
+}
+
+static inline uint32_t
+br_dec32le(const void *src)
+{
+#if BR_LE_UNALIGNED
+	return ((const br_union_u32 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return (uint32_t)buf[0]
+		| ((uint32_t)buf[1] << 8)
+		| ((uint32_t)buf[2] << 16)
+		| ((uint32_t)buf[3] << 24);
+#endif
+}
+
+static inline uint32_t
+br_dec32be(const void *src)
+{
+#if BR_BE_UNALIGNED
+	return ((const br_union_u32 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return ((uint32_t)buf[0] << 24)
+		| ((uint32_t)buf[1] << 16)
+		| ((uint32_t)buf[2] << 8)
+		| (uint32_t)buf[3];
+#endif
+}
+
+static inline void
+br_enc64le(void *dst, uint64_t x)
+{
+#if BR_LE_UNALIGNED
+	((br_union_u64 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	br_enc32le(buf, (uint32_t)x);
+	br_enc32le(buf + 4, (uint32_t)(x >> 32));
+#endif
+}
+
+static inline void
+br_enc64be(void *dst, uint64_t x)
+{
+#if BR_BE_UNALIGNED
+	((br_union_u64 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	br_enc32be(buf, (uint32_t)(x >> 32));
+	br_enc32be(buf + 4, (uint32_t)x);
+#endif
+}
+
+static inline uint64_t
+br_dec64le(const void *src)
+{
+#if BR_LE_UNALIGNED
+	return ((const br_union_u64 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return (uint64_t)br_dec32le(buf)
+		| ((uint64_t)br_dec32le(buf + 4) << 32);
+#endif
+}
+
+static inline uint64_t
+br_dec64be(const void *src)
+{
+#if BR_BE_UNALIGNED
+	return ((const br_union_u64 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return ((uint64_t)br_dec32be(buf) << 32)
+		| (uint64_t)br_dec32be(buf + 4);
+#endif
+}
+
+/*
+ * Range decoding and encoding (for several successive values).
+ */
+void br_range_dec16le(uint16_t *v, size_t num, const void *src);
+void br_range_dec16be(uint16_t *v, size_t num, const void *src);
+void br_range_enc16le(void *dst, const uint16_t *v, size_t num);
+void br_range_enc16be(void *dst, const uint16_t *v, size_t num);
+
+void br_range_dec32le(uint32_t *v, size_t num, const void *src);
+void br_range_dec32be(uint32_t *v, size_t num, const void *src);
+void br_range_enc32le(void *dst, const uint32_t *v, size_t num);
+void br_range_enc32be(void *dst, const uint32_t *v, size_t num);
+
+void br_range_dec64le(uint64_t *v, size_t num, const void *src);
+void br_range_dec64be(uint64_t *v, size_t num, const void *src);
+void br_range_enc64le(void *dst, const uint64_t *v, size_t num);
+void br_range_enc64be(void *dst, const uint64_t *v, size_t num);
+
+/*
+ * Byte-swap a 32-bit integer.
+ */
+static inline uint32_t
+br_swap32(uint32_t x)
+{
+	x = ((x & (uint32_t)0x00FF00FF) << 8)
+		| ((x >> 8) & (uint32_t)0x00FF00FF);
+	return (x << 16) | (x >> 16);
+}
+
+/* ==================================================================== */
+/*
+ * Support code for hash functions.
+ */
+
+/*
+ * IV for MD5, SHA-1, SHA-224 and SHA-256.
+ */
+extern const uint32_t br_md5_IV[];
+extern const uint32_t br_sha1_IV[];
+extern const uint32_t br_sha224_IV[];
+extern const uint32_t br_sha256_IV[];
+
+/*
+ * Round functions for MD5, SHA-1, SHA-224 and SHA-256 (SHA-224 and
+ * SHA-256 use the same round function).
+ */
+void br_md5_round(const unsigned char *buf, uint32_t *val);
+void br_sha1_round(const unsigned char *buf, uint32_t *val);
+void br_sha2small_round(const unsigned char *buf, uint32_t *val);
+
+/*
+ * The core function for the TLS PRF. It computes
+ * P_hash(secret, label + seed), and XORs the result into the dst buffer.
+ */
+void br_tls_phash(void *dst, size_t len,
+	const br_hash_class *dig,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/*
+ * Copy all configured hash implementations from a multihash context
+ * to another.
+ */
+static inline void
+br_multihash_copyimpl(br_multihash_context *dst,
+	const br_multihash_context *src)
+{
+	memcpy((void *)dst->impl, src->impl, sizeof src->impl);
+}
+
+/* ==================================================================== */
+/*
+ * Constant-time primitives. These functions manipulate 32-bit values in
+ * order to provide constant-time comparisons and multiplexers.
+ *
+ * Boolean values (the "ctl" bits) MUST have value 0 or 1.
+ *
+ * Implementation notes:
+ * =====================
+ *
+ * The uintN_t types are unsigned and with width exactly N bits; the C
+ * standard guarantees that computations are performed modulo 2^N, and
+ * there can be no overflow. Negation (unary '-') works on unsigned types
+ * as well.
+ *
+ * The intN_t types are guaranteed to have width exactly N bits, with no
+ * padding bit, and using two's complement representation. Casting
+ * intN_t to uintN_t really is conversion modulo 2^N. Beware that intN_t
+ * types, being signed, trigger implementation-defined behaviour on
+ * overflow (including raising some signal): with GCC, while modular
+ * arithmetics are usually applied, the optimizer may assume that
+ * overflows don't occur (unless the -fwrapv command-line option is
+ * added); Clang has the additional -ftrapv option to explicitly trap on
+ * integer overflow or underflow.
+ */
+
+/*
+ * Negate a boolean.
+ */
+static inline uint32_t
+NOT(uint32_t ctl)
+{
+	return ctl ^ 1;
+}
+
+/*
+ * Multiplexer: returns x if ctl == 1, y if ctl == 0.
+ */
+static inline uint32_t
+MUX(uint32_t ctl, uint32_t x, uint32_t y)
+{
+	return y ^ (-ctl & (x ^ y));
+}
+
+/*
+ * Equality check: returns 1 if x == y, 0 otherwise.
+ */
+static inline uint32_t
+EQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return NOT((q | -q) >> 31);
+}
+
+/*
+ * Inequality check: returns 1 if x != y, 0 otherwise.
+ */
+static inline uint32_t
+NEQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return (q | -q) >> 31;
+}
+
+/*
+ * Comparison: returns 1 if x > y, 0 otherwise.
+ */
+static inline uint32_t
+GT(uint32_t x, uint32_t y)
+{
+	/*
+	 * If both x < 2^31 and x < 2^31, then y-x will have its high
+	 * bit set if x > y, cleared otherwise.
+	 *
+	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
+	 * result is the high bit of x.
+	 *
+	 * If both x >= 2^31 and y >= 2^31, then we can virtually
+	 * subtract 2^31 from both, and we are back to the first case.
+	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
+	 * fine.
+	 */
+	uint32_t z;
+
+	z = y - x;
+	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
+}
+
+/*
+ * Other comparisons (greater-or-equal, lower-than, lower-or-equal).
+ */
+#define GE(x, y)   NOT(GT(y, x))
+#define LT(x, y)   GT(y, x)
+#define LE(x, y)   NOT(GT(x, y))
+
+/*
+ * General comparison: returned value is -1, 0 or 1, depending on
+ * whether x is lower than, equal to, or greater than y.
+ */
+static inline int32_t
+CMP(uint32_t x, uint32_t y)
+{
+	return (int32_t)GT(x, y) | -(int32_t)GT(y, x);
+}
+
+/*
+ * Returns 1 if x == 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+EQ0(int32_t x)
+{
+	uint32_t q;
+
+	q = (uint32_t)x;
+	return ~(q | -q) >> 31;
+}
+
+/*
+ * Returns 1 if x > 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+GT0(int32_t x)
+{
+	/*
+	 * High bit of -x is 0 if x == 0, but 1 if x > 0.
+	 */
+	uint32_t q;
+
+	q = (uint32_t)x;
+	return (~q & -q) >> 31;
+}
+
+/*
+ * Returns 1 if x >= 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+GE0(int32_t x)
+{
+	return ~(uint32_t)x >> 31;
+}
+
+/*
+ * Returns 1 if x < 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+LT0(int32_t x)
+{
+	return (uint32_t)x >> 31;
+}
+
+/*
+ * Returns 1 if x <= 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+LE0(int32_t x)
+{
+	uint32_t q;
+
+	/*
+	 * ~-x has its high bit set if and only if -x is nonnegative (as
+	 * a signed int), i.e. x is in the -(2^31-1) to 0 range. We must
+	 * do an OR with x itself to account for x = -2^31.
+	 */
+	q = (uint32_t)x;
+	return (q | ~-q) >> 31;
+}
+
+/*
+ * Conditional copy: src[] is copied into dst[] if and only if ctl is 1.
+ * dst[] and src[] may overlap completely (but not partially).
+ */
+void br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len);
+
+#define CCOPY   br_ccopy
+
+/*
+ * Compute the bit length of a 32-bit integer. Returned value is between 0
+ * and 32 (inclusive).
+ */
+static inline uint32_t
+BIT_LENGTH(uint32_t x)
+{
+	uint32_t k, c;
+
+	k = NEQ(x, 0);
+	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
+	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
+	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
+	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
+	k += GT(x, 0x0001);
+	return k;
+}
+
+/*
+ * Compute the minimum of x and y.
+ */
+static inline uint32_t
+MIN(uint32_t x, uint32_t y)
+{
+	return MUX(GT(x, y), y, x);
+}
+
+/*
+ * Compute the maximum of x and y.
+ */
+static inline uint32_t
+MAX(uint32_t x, uint32_t y)
+{
+	return MUX(GT(x, y), x, y);
+}
+
+/*
+ * Multiply two 32-bit integers, with a 64-bit result. This default
+ * implementation assumes that the basic multiplication operator
+ * yields constant-time code.
+ */
+#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+
+#if BR_CT_MUL31
+
+/*
+ * Alternate implementation of MUL31, that will be constant-time on some
+ * (old) platforms where the default MUL31 is not. Unfortunately, it is
+ * also substantially slower, and yields larger code, on more modern
+ * platforms, which is why it is deactivated by default.
+ *
+ * MUL31_lo() must do some extra work because on some platforms, the
+ * _signed_ multiplication may return early if the top bits are 1.
+ * Simply truncating (casting) the output of MUL31() would not be
+ * sufficient, because the compiler may notice that we keep only the low
+ * word, and then replace automatically the unsigned multiplication with
+ * a signed multiplication opcode.
+ */
+#define MUL31(x, y)   ((uint64_t)((x) | (uint32_t)0x80000000) \
+                       * (uint64_t)((y) | (uint32_t)0x80000000) \
+                       - ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \
+                       - ((uint64_t)1 << 62))
+static inline uint32_t
+MUL31_lo(uint32_t x, uint32_t y)
+{
+	uint32_t xl, xh;
+	uint32_t yl, yh;
+
+	xl = (x & 0xFFFF) | (uint32_t)0x80000000;
+	xh = (x >> 16) | (uint32_t)0x80000000;
+	yl = (y & 0xFFFF) | (uint32_t)0x80000000;
+	yh = (y >> 16) | (uint32_t)0x80000000;
+	return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF;
+}
+
+#else
+
+/*
+ * Multiply two 31-bit integers, with a 62-bit result. This default
+ * implementation assumes that the basic multiplication operator
+ * yields constant-time code.
+ * The MUL31_lo() macro returns only the low 31 bits of the product.
+ */
+#define MUL31(x, y)     ((uint64_t)(x) * (uint64_t)(y))
+#define MUL31_lo(x, y)  (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF)
+
+#endif
+
+/*
+ * Multiply two words together; the sum of the lengths of the two
+ * operands must not exceed 31 (for instance, one operand may use 16
+ * bits if the other fits on 15). If BR_CT_MUL15 is non-zero, then the
+ * macro will contain some extra operations that help in making the
+ * operation constant-time on some platforms, where the basic 32-bit
+ * multiplication is not constant-time.
+ */
+#if BR_CT_MUL15
+#define MUL15(x, y)   (((uint32_t)(x) | (uint32_t)0x80000000) \
+                       * ((uint32_t)(y) | (uint32_t)0x80000000) \
+		       & (uint32_t)0x7FFFFFFF)
+#else
+#define MUL15(x, y)   ((uint32_t)(x) * (uint32_t)(y))
+#endif
+
+/*
+ * Arithmetic right shift (sign bit is copied). What happens when
+ * right-shifting a negative value is _implementation-defined_, so it
+ * does not trigger undefined behaviour, but it is still up to each
+ * compiler to define (and document) what it does. Most/all compilers
+ * will do an arithmetic shift, the sign bit being used to fill the
+ * holes; this is a native operation on the underlying CPU, and it would
+ * make little sense for the compiler to do otherwise. GCC explicitly
+ * documents that it follows that convention.
+ *
+ * Still, if BR_NO_ARITH_SHIFT is defined (and non-zero), then an
+ * alternate version will be used, that does not rely on such
+ * implementation-defined behaviour. Unfortunately, it is also slower
+ * and yields bigger code, which is why it is deactivated by default.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                      | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Constant-time division. The dividend hi:lo is divided by the
+ * divisor d; the quotient is returned and the remainder is written
+ * in *r. If hi == d, then the quotient does not fit on 32 bits;
+ * returned value is thus truncated. If hi > d, returned values are
+ * indeterminate.
+ */
+uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r);
+
+/*
+ * Wrapper for br_divrem(); the remainder is returned, and the quotient
+ * is discarded.
+ */
+static inline uint32_t
+br_rem(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	br_divrem(hi, lo, d, &r);
+	return r;
+}
+
+/*
+ * Wrapper for br_divrem(); the quotient is returned, and the remainder
+ * is discarded.
+ */
+static inline uint32_t
+br_div(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	return br_divrem(hi, lo, d, &r);
+}
+
+/* ==================================================================== */
+
+/*
+ * Integers 'i32'
+ * --------------
+ *
+ * The 'i32' functions implement computations on big integers using
+ * an internal representation as an array of 32-bit integers. For
+ * an array x[]:
+ *  -- x[0] contains the "announced bit length" of the integer
+ *  -- x[1], x[2]... contain the value in little-endian order (x[1]
+ *     contains the least significant 32 bits)
+ *
+ * Multiplications rely on the elementary 32x32->64 multiplication.
+ *
+ * The announced bit length specifies the number of bits that are
+ * significant in the subsequent 32-bit words. Unused bits in the
+ * last (most significant) word are set to 0; subsequent words are
+ * uninitialized and need not exist at all.
+ *
+ * The execution time and memory access patterns of all computations
+ * depend on the announced bit length, but not on the actual word
+ * values. For modular integers, the announced bit length of any integer
+ * modulo n is equal to the actual bit length of n; thus, computations
+ * on modular integers are "constant-time" (only the modulus length may
+ * leak).
+ */
+
+/*
+ * Compute the actual bit length of an integer. The argument x should
+ * point to the first (least significant) value word of the integer.
+ * The len 'xlen' contains the number of 32-bit words to access.
+ *
+ * CT: value or length of x does not leak.
+ */
+uint32_t br_i32_bit_length(uint32_t *x, size_t xlen);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * "true" bit length of the integer is computed, but all words of x[]
+ * corresponding to the full 'len' bytes of the source are set.
+ *
+ * CT: value or length of x does not leak.
+ */
+void br_i32_decode(uint32_t *x, const void *src, size_t len);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * integer MUST be lower than m[]; the announced bit length written in
+ * x[] will be equal to that of m[]. All 'len' bytes from the source are
+ * read.
+ *
+ * Returned value is 1 if the decode value fits within the modulus, 0
+ * otherwise. In the latter case, the x[] buffer will be set to 0 (but
+ * still with the announced bit length of m[]).
+ *
+ * CT: value or length of x does not leak. Memory access pattern depends
+ * only of 'len' and the announced bit length of m. Whether x fits or
+ * not does not leak either.
+ */
+uint32_t br_i32_decode_mod(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Reduce an integer (a[]) modulo another (m[]). The result is written
+ * in x[] and its announced bit length is set to be equal to that of m[].
+ *
+ * x[] MUST be distinct from a[] and m[].
+ *
+ * CT: only announced bit lengths leak, not values of x, a or m.
+ */
+void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);
+
+/*
+ * Decode an integer from its big-endian unsigned representation, and
+ * reduce it modulo the provided modulus m[]. The announced bit length
+ * of the result is set to be equal to that of the modulus.
+ *
+ * x[] MUST be distinct from m[].
+ */
+void br_i32_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Encode an integer into its big-endian unsigned representation. The
+ * output length in bytes is provided (parameter 'len'); if the length
+ * is too short then the integer is appropriately truncated; if it is
+ * too long then the extra bytes are set to 0.
+ */
+void br_i32_encode(void *dst, size_t len, const uint32_t *x);
+
+/*
+ * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
+ * function assumes that x[] and m[] have the same announced bit
+ * length, and the announced bit length of m[] matches its true
+ * bit length.
+ *
+ * x[] and m[] MUST be distinct arrays.
+ *
+ * CT: only the common announced bit length of x and m leaks, not
+ * the values of x, z or m.
+ */
+void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);
+
+/*
+ * Extract one word from an integer. The offset is counted in bits.
+ * The word MUST entirely fit within the word elements corresponding
+ * to the announced bit length of a[].
+ */
+static inline uint32_t
+br_i32_word(const uint32_t *a, uint32_t off)
+{
+	size_t u;
+	unsigned j;
+
+	u = (size_t)(off >> 5) + 1;
+	j = (unsigned)off & 31;
+	if (j == 0) {
+		return a[u];
+	} else {
+		return (a[u] >> j) | (a[u + 1] << (32 - j));
+	}
+}
+
+/*
+ * Test whether an integer is zero.
+ */
+uint32_t br_i32_iszero(const uint32_t *x);
+
+/*
+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
+ * is unmodified, but the carry is still computed and returned. The
+ * arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
+ * then a[] is unmodified, but the carry is still computed and returned.
+ * The arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Compute d+a*b, result in d. The initial announced bit length of d[]
+ * MUST match that of a[]. The d[] array MUST be large enough to
+ * accommodate the full result, plus (possibly) an extra word. The
+ * resulting announced bit length of d[] will be the sum of the announced
+ * bit lengths of a[] and b[] (therefore, it may be larger than the actual
+ * bit length of the numerical result).
+ *
+ * a[] and b[] may be the same array. d[] must be disjoint from both a[]
+ * and b[].
+ */
+void br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
+
+/*
+ * Zeroize an integer. The announced bit length is set to the provided
+ * value, and the corresponding words are set to 0.
+ */
+static inline void
+br_i32_zero(uint32_t *x, uint32_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);
+}
+
+/*
+ * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
+ */
+uint32_t br_i32_ninv32(uint32_t x);
+
+/*
+ * Convert a modular integer to Montgomery representation. The integer x[]
+ * MUST be lower than m[], but with the same announced bit length.
+ */
+void br_i32_to_monty(uint32_t *x, const uint32_t *m);
+
+/*
+ * Convert a modular integer back from Montgomery representation. The
+ * integer x[] MUST be lower than m[], but with the same announced bit
+ * length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is
+ * the least significant value word of m[] (this works only if m[] is
+ * an odd integer).
+ */
+void br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);
+
+/*
+ * Compute a modular Montgomery multiplication. d[] is filled with the
+ * value of x*y/R modulo m[] (where R is the Montgomery factor). The
+ * array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be
+ * numerically lower than m[]. x[] and y[] MAY be the same array. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer).
+ */
+void br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i);
+
+/*
+ * Compute a modular exponentiation. x[] MUST be an integer modulo m[]
+ * (same announced bit length, lower value). m[] MUST be odd. The
+ * exponent is in big-endian unsigned notation, over 'elen' bytes. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer). The t1[] and t2[] parameters must be temporary arrays,
+ * each large enough to accommodate an integer with the same size as m[].
+ */
+void br_i32_modpow(uint32_t *x, const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);
+
+/* ==================================================================== */
+
+/*
+ * Integers 'i31'
+ * --------------
+ *
+ * The 'i31' functions implement computations on big integers using
+ * an internal representation as an array of 32-bit integers. For
+ * an array x[]:
+ *  -- x[0] encodes the array length and the "announced bit length"
+ *     of the integer: namely, if the announced bit length is k,
+ *     then x[0] = ((k / 31) << 5) + (k % 31).
+ *  -- x[1], x[2]... contain the value in little-endian order, 31
+ *     bits per word (x[1] contains the least significant 31 bits).
+ *     The upper bit of each word is 0.
+ *
+ * Multiplications rely on the elementary 32x32->64 multiplication.
+ *
+ * The announced bit length specifies the number of bits that are
+ * significant in the subsequent 32-bit words. Unused bits in the
+ * last (most significant) word are set to 0; subsequent words are
+ * uninitialized and need not exist at all.
+ *
+ * The execution time and memory access patterns of all computations
+ * depend on the announced bit length, but not on the actual word
+ * values. For modular integers, the announced bit length of any integer
+ * modulo n is equal to the actual bit length of n; thus, computations
+ * on modular integers are "constant-time" (only the modulus length may
+ * leak).
+ */
+
+/*
+ * Test whether an integer is zero.
+ */
+uint32_t br_i31_iszero(const uint32_t *x);
+
+/*
+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
+ * is unmodified, but the carry is still computed and returned. The
+ * arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
+ * then a[] is unmodified, but the carry is still computed and returned.
+ * The arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Compute the ENCODED actual bit length of an integer. The argument x
+ * should point to the first (least significant) value word of the
+ * integer. The len 'xlen' contains the number of 32-bit words to
+ * access. The upper bit of each value word MUST be 0.
+ * Returned value is ((k / 31) << 5) + (k % 31) if the bit length is k.
+ *
+ * CT: value or length of x does not leak.
+ */
+uint32_t br_i31_bit_length(uint32_t *x, size_t xlen);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * "true" bit length of the integer is computed and set in the encoded
+ * announced bit length (x[0]), but all words of x[] corresponding to
+ * the full 'len' bytes of the source are set.
+ *
+ * CT: value or length of x does not leak.
+ */
+void br_i31_decode(uint32_t *x, const void *src, size_t len);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * integer MUST be lower than m[]; the (encoded) announced bit length
+ * written in x[] will be equal to that of m[]. All 'len' bytes from the
+ * source are read.
+ *
+ * Returned value is 1 if the decode value fits within the modulus, 0
+ * otherwise. In the latter case, the x[] buffer will be set to 0 (but
+ * still with the announced bit length of m[]).
+ *
+ * CT: value or length of x does not leak. Memory access pattern depends
+ * only of 'len' and the announced bit length of m. Whether x fits or
+ * not does not leak either.
+ */
+uint32_t br_i31_decode_mod(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Zeroize an integer. The announced bit length is set to the provided
+ * value, and the corresponding words are set to 0. The ENCODED bit length
+ * is expected here.
+ */
+static inline void
+br_i31_zero(uint32_t *x, uint32_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);
+}
+
+/*
+ * Right-shift an integer. The shift amount must be lower than 31
+ * bits.
+ */
+void br_i31_rshift(uint32_t *x, int count);
+
+/*
+ * Reduce an integer (a[]) modulo another (m[]). The result is written
+ * in x[] and its announced bit length is set to be equal to that of m[].
+ *
+ * x[] MUST be distinct from a[] and m[].
+ *
+ * CT: only announced bit lengths leak, not values of x, a or m.
+ */
+void br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);
+
+/*
+ * Decode an integer from its big-endian unsigned representation, and
+ * reduce it modulo the provided modulus m[]. The announced bit length
+ * of the result is set to be equal to that of the modulus.
+ *
+ * x[] MUST be distinct from m[].
+ */
+void br_i31_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Multiply x[] by 2^31 and then add integer z, modulo m[]. This
+ * function assumes that x[] and m[] have the same announced bit
+ * length, the announced bit length of m[] matches its true
+ * bit length.
+ *
+ * x[] and m[] MUST be distinct arrays. z MUST fit in 31 bits (upper
+ * bit set to 0).
+ *
+ * CT: only the common announced bit length of x and m leaks, not
+ * the values of x, z or m.
+ */
+void br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);
+
+/*
+ * Encode an integer into its big-endian unsigned representation. The
+ * output length in bytes is provided (parameter 'len'); if the length
+ * is too short then the integer is appropriately truncated; if it is
+ * too long then the extra bytes are set to 0.
+ */
+void br_i31_encode(void *dst, size_t len, const uint32_t *x);
+
+/*
+ * Compute -(1/x) mod 2^31. If x is even, then this function returns 0.
+ */
+uint32_t br_i31_ninv31(uint32_t x);
+
+/*
+ * Compute a modular Montgomery multiplication. d[] is filled with the
+ * value of x*y/R modulo m[] (where R is the Montgomery factor). The
+ * array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be
+ * numerically lower than m[]. x[] and y[] MAY be the same array. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer).
+ */
+void br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i);
+
+/*
+ * Convert a modular integer to Montgomery representation. The integer x[]
+ * MUST be lower than m[], but with the same announced bit length.
+ */
+void br_i31_to_monty(uint32_t *x, const uint32_t *m);
+
+/*
+ * Convert a modular integer back from Montgomery representation. The
+ * integer x[] MUST be lower than m[], but with the same announced bit
+ * length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is
+ * the least significant value word of m[] (this works only if m[] is
+ * an odd integer).
+ */
+void br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);
+
+/*
+ * Compute a modular exponentiation. x[] MUST be an integer modulo m[]
+ * (same announced bit length, lower value). m[] MUST be odd. The
+ * exponent is in big-endian unsigned notation, over 'elen' bytes. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer). The t1[] and t2[] parameters must be temporary arrays,
+ * each large enough to accommodate an integer with the same size as m[].
+ */
+void br_i31_modpow(uint32_t *x, const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);
+
+/*
+ * Compute a modular exponentiation. x[] MUST be an integer modulo m[]
+ * (same announced bit length, lower value). m[] MUST be odd. The
+ * exponent is in big-endian unsigned notation, over 'elen' bytes. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer). The tmp[] array is used for temporaries, and has size
+ * 'twlen' words; it must be large enough to accommodate at least two
+ * temporary values with the same size as m[] (including the leading
+ * "bit length" word). If there is room for more temporaries, then this
+ * function may use the extra room for window-based optimisation,
+ * resulting in faster computations.
+ *
+ * Returned value is 1 on success, 0 on error. An error is reported if
+ * the provided tmp[] array is too short.
+ */
+uint32_t br_i31_modpow_opt(uint32_t *x, const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
+
+/*
+ * Compute d+a*b, result in d. The initial announced bit length of d[]
+ * MUST match that of a[]. The d[] array MUST be large enough to
+ * accommodate the full result, plus (possibly) an extra word. The
+ * resulting announced bit length of d[] will be the sum of the announced
+ * bit lengths of a[] and b[] (therefore, it may be larger than the actual
+ * bit length of the numerical result).
+ *
+ * a[] and b[] may be the same array. d[] must be disjoint from both a[]
+ * and b[].
+ */
+void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
+
+/*
+ * Compute x/y mod m, result in x. Values x and y must be between 0 and
+ * m-1, and have the same announced bit length as m. Modulus m must be
+ * odd. The "m0i" parameter is equal to -1/m mod 2^31. The array 't'
+ * must point to a temporary area that can hold at least three integers
+ * of the size of m.
+ *
+ * m may not overlap x and y. x and y may overlap each other (this can
+ * be useful to test whether a value is invertible modulo m). t must be
+ * disjoint from all other arrays.
+ *
+ * Returned value is 1 on success, 0 otherwise. Success is attained if
+ * y is invertible modulo m.
+ */
+uint32_t br_i31_moddiv(uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i, uint32_t *t);
+
+/* ==================================================================== */
+
+/*
+ * FIXME: document "i15" functions.
+ */
+
+static inline void
+br_i15_zero(uint16_t *x, uint16_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x);
+}
+
+uint32_t br_i15_iszero(const uint16_t *x);
+
+uint16_t br_i15_ninv15(uint16_t x);
+
+uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl);
+
+uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl);
+
+void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m);
+
+void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i);
+
+void br_i15_to_monty(uint16_t *x, const uint16_t *m);
+
+void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2);
+
+uint32_t br_i15_modpow_opt(uint16_t *x, const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen);
+
+void br_i15_encode(void *dst, size_t len, const uint16_t *x);
+
+uint32_t br_i15_decode_mod(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m);
+
+void br_i15_rshift(uint16_t *x, int count);
+
+uint32_t br_i15_bit_length(uint16_t *x, size_t xlen);
+
+void br_i15_decode(uint16_t *x, const void *src, size_t len);
+
+void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i);
+
+void br_i15_decode_reduce(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m);
+
+void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m);
+
+void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b);
+
+uint32_t br_i15_moddiv(uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i, uint16_t *t);
+
+/*
+ * Variant of br_i31_modpow_opt() that internally uses 64x64->128
+ * multiplications. It expects the same parameters as br_i31_modpow_opt(),
+ * except that the temporaries should be 64-bit integers, not 32-bit
+ * integers.
+ */
+uint32_t br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen);
+
+/*
+ * Type for a function with the same API as br_i31_modpow_opt() (some
+ * implementations of this type may have stricter alignment requirements
+ * on the temporaries).
+ */
+typedef uint32_t (*br_i31_modpow_opt_type)(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
+
+/*
+ * Wrapper for br_i62_modpow_opt() that uses the same type as
+ * br_i31_modpow_opt(); however, it requires its 'tmp' argument to the
+ * 64-bit aligned.
+ */
+uint32_t br_i62_modpow_opt_as_i31(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
+
+/* ==================================================================== */
+
+static inline size_t
+br_digest_size(const br_hash_class *digest_class)
+{
+	return (size_t)(digest_class->desc >> BR_HASHDESC_OUT_OFF)
+		& BR_HASHDESC_OUT_MASK;
+}
+
+/*
+ * Get the output size (in bytes) of a hash function.
+ */
+size_t br_digest_size_by_ID(int digest_id);
+
+/*
+ * Get the OID (encoded OBJECT IDENTIFIER value, without tag and length)
+ * for a hash function. If digest_id is not a supported digest identifier
+ * (in particular if it is equal to 0, i.e. br_md5sha1_ID), then NULL is
+ * returned and *len is set to 0.
+ */
+const unsigned char *br_digest_OID(int digest_id, size_t *len);
+
+/* ==================================================================== */
+/*
+ * DES support functions.
+ */
+
+/*
+ * Apply DES Initial Permutation.
+ */
+void br_des_do_IP(uint32_t *xl, uint32_t *xr);
+
+/*
+ * Apply DES Final Permutation (inverse of IP).
+ */
+void br_des_do_invIP(uint32_t *xl, uint32_t *xr);
+
+/*
+ * Key schedule unit: for a DES key (8 bytes), compute 16 subkeys. Each
+ * subkey is two 28-bit words represented as two 32-bit words; the PC-2
+ * bit extration is NOT applied.
+ */
+void br_des_keysched_unit(uint32_t *skey, const void *key);
+
+/*
+ * Reversal of 16 DES sub-keys (for decryption).
+ */
+void br_des_rev_skey(uint32_t *skey);
+
+/*
+ * DES/3DES key schedule for 'des_tab' (encryption direction). Returned
+ * value is the number of rounds.
+ */
+unsigned br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len);
+
+/*
+ * DES/3DES key schedule for 'des_ct' (encryption direction). Returned
+ * value is the number of rounds.
+ */
+unsigned br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len);
+
+/*
+ * DES/3DES subkey decompression (from the compressed bitsliced subkeys).
+ */
+void br_des_ct_skey_expand(uint32_t *sk_exp,
+	unsigned num_rounds, const uint32_t *skey);
+
+/*
+ * DES/3DES block encryption/decryption ('des_tab').
+ */
+void br_des_tab_process_block(unsigned num_rounds,
+	const uint32_t *skey, void *block);
+
+/*
+ * DES/3DES block encryption/decryption ('des_ct').
+ */
+void br_des_ct_process_block(unsigned num_rounds,
+	const uint32_t *skey, void *block);
+
+/* ==================================================================== */
+/*
+ * AES support functions.
+ */
+
+/*
+ * The AES S-box (256-byte table).
+ */
+extern const unsigned char br_aes_S[];
+
+/*
+ * AES key schedule. skey[] is filled with n+1 128-bit subkeys, where n
+ * is the number of rounds (10 to 14, depending on key size). The number
+ * of rounds is returned. If the key size is invalid (not 16, 24 or 32),
+ * then 0 is returned.
+ *
+ * This implementation uses a 256-byte table and is NOT constant-time.
+ */
+unsigned br_aes_keysched(uint32_t *skey, const void *key, size_t key_len);
+
+/*
+ * AES key schedule for decryption ('aes_big' implementation).
+ */
+unsigned br_aes_big_keysched_inv(uint32_t *skey,
+	const void *key, size_t key_len);
+
+/*
+ * AES block encryption with the 'aes_big' implementation (fast, but
+ * not constant-time). This function encrypts a single block "in place".
+ */
+void br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data);
+
+/*
+ * AES block decryption with the 'aes_big' implementation (fast, but
+ * not constant-time). This function decrypts a single block "in place".
+ */
+void br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data);
+
+/*
+ * AES block encryption with the 'aes_small' implementation (small, but
+ * slow and not constant-time). This function encrypts a single block
+ * "in place".
+ */
+void br_aes_small_encrypt(unsigned num_rounds,
+	const uint32_t *skey, void *data);
+
+/*
+ * AES block decryption with the 'aes_small' implementation (small, but
+ * slow and not constant-time). This function decrypts a single block
+ * "in place".
+ */
+void br_aes_small_decrypt(unsigned num_rounds,
+	const uint32_t *skey, void *data);
+
+/*
+ * The constant-time implementation is "bitsliced": the 128-bit state is
+ * split over eight 32-bit words q* in the following way:
+ *
+ * -- Input block consists in 16 bytes:
+ *    a00 a10 a20 a30 a01 a11 a21 a31 a02 a12 a22 a32 a03 a13 a23 a33
+ * In the terminology of FIPS 197, this is a 4x4 matrix which is read
+ * column by column.
+ *
+ * -- Each byte is split into eight bits which are distributed over the
+ * eight words, at the same rank. Thus, for a byte x at rank k, bit 0
+ * (least significant) of x will be at rank k in q0 (if that bit is b,
+ * then it contributes "b << k" to the value of q0), bit 1 of x will be
+ * at rank k in q1, and so on.
+ *
+ * -- Ranks given to bits are in "row order" and are either all even, or
+ * all odd. Two independent AES states are thus interleaved, one using
+ * the even ranks, the other the odd ranks. Row order means:
+ *    a00 a01 a02 a03 a10 a11 a12 a13 a20 a21 a22 a23 a30 a31 a32 a33
+ *
+ * Converting input bytes from two AES blocks to bitslice representation
+ * is done in the following way:
+ * -- Decode first block into the four words q0 q2 q4 q6, in that order,
+ * using little-endian convention.
+ * -- Decode second block into the four words q1 q3 q5 q7, in that order,
+ * using little-endian convention.
+ * -- Call br_aes_ct_ortho().
+ *
+ * Converting back to bytes is done by using the reverse operations. Note
+ * that br_aes_ct_ortho() is its own inverse.
+ */
+
+/*
+ * Perform bytewise orthogonalization of eight 32-bit words. Bytes
+ * of q0..q7 are spread over all words: for a byte x that occurs
+ * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
+ * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
+ *
+ * This operation is an involution.
+ */
+void br_aes_ct_ortho(uint32_t *q);
+
+/*
+ * The AES S-box, as a bitsliced constant-time version. The input array
+ * consists in eight 32-bit words; 32 S-box instances are computed in
+ * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)
+ * are spread over the words 0 to 7, at the same rank.
+ */
+void br_aes_ct_bitslice_Sbox(uint32_t *q);
+
+/*
+ * Like br_aes_bitslice_Sbox(), but for the inverse S-box.
+ */
+void br_aes_ct_bitslice_invSbox(uint32_t *q);
+
+/*
+ * Compute AES encryption on bitsliced data. Since input is stored on
+ * eight 32-bit words, two block encryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct_bitslice_encrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q);
+
+/*
+ * Compute AES decryption on bitsliced data. Since input is stored on
+ * eight 32-bit words, two block decryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct_bitslice_decrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q);
+
+/*
+ * AES key schedule, constant-time version. skey[] is filled with n+1
+ * 128-bit subkeys, where n is the number of rounds (10 to 14, depending
+ * on key size). The number of rounds is returned. If the key size is
+ * invalid (not 16, 24 or 32), then 0 is returned.
+ */
+unsigned br_aes_ct_keysched(uint32_t *comp_skey,
+	const void *key, size_t key_len);
+
+/*
+ * Expand AES subkeys as produced by br_aes_ct_keysched(), into
+ * a larger array suitable for br_aes_ct_bitslice_encrypt() and
+ * br_aes_ct_bitslice_decrypt().
+ */
+void br_aes_ct_skey_expand(uint32_t *skey,
+	unsigned num_rounds, const uint32_t *comp_skey);
+
+/*
+ * For the ct64 implementation, the same bitslicing technique is used,
+ * but four instances are interleaved. First instance uses bits 0, 4,
+ * 8, 12,... of each word; second instance uses bits 1, 5, 9, 13,...
+ * and so on.
+ */
+
+/*
+ * Perform bytewise orthogonalization of eight 64-bit words. Bytes
+ * of q0..q7 are spread over all words: for a byte x that occurs
+ * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
+ * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
+ *
+ * This operation is an involution.
+ */
+void br_aes_ct64_ortho(uint64_t *q);
+
+/*
+ * Interleave bytes for an AES input block. If input bytes are
+ * denoted 0123456789ABCDEF, and have been decoded with little-endian
+ * convention (w[0] contains 0123, with '3' being most significant;
+ * w[1] contains 4567, and so on), then output word q0 will be
+ * set to 08192A3B (again little-endian convention) and q1 will
+ * be set to 4C5D6E7F.
+ */
+void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w);
+
+/*
+ * Perform the opposite of br_aes_ct64_interleave_in().
+ */
+void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1);
+
+/*
+ * The AES S-box, as a bitsliced constant-time version. The input array
+ * consists in eight 64-bit words; 64 S-box instances are computed in
+ * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)
+ * are spread over the words 0 to 7, at the same rank.
+ */
+void br_aes_ct64_bitslice_Sbox(uint64_t *q);
+
+/*
+ * Like br_aes_bitslice_Sbox(), but for the inverse S-box.
+ */
+void br_aes_ct64_bitslice_invSbox(uint64_t *q);
+
+/*
+ * Compute AES encryption on bitsliced data. Since input is stored on
+ * eight 64-bit words, four block encryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q);
+
+/*
+ * Compute AES decryption on bitsliced data. Since input is stored on
+ * eight 64-bit words, four block decryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q);
+
+/*
+ * AES key schedule, constant-time version. skey[] is filled with n+1
+ * 128-bit subkeys, where n is the number of rounds (10 to 14, depending
+ * on key size). The number of rounds is returned. If the key size is
+ * invalid (not 16, 24 or 32), then 0 is returned.
+ */
+unsigned br_aes_ct64_keysched(uint64_t *comp_skey,
+	const void *key, size_t key_len);
+
+/*
+ * Expand AES subkeys as produced by br_aes_ct64_keysched(), into
+ * a larger array suitable for br_aes_ct64_bitslice_encrypt() and
+ * br_aes_ct64_bitslice_decrypt().
+ */
+void br_aes_ct64_skey_expand(uint64_t *skey,
+	unsigned num_rounds, const uint64_t *comp_skey);
+
+/*
+ * Test support for AES-NI opcodes.
+ */
+int br_aes_x86ni_supported(void);
+
+/*
+ * AES key schedule, using x86 AES-NI instructions. This yields the
+ * subkeys in the encryption direction. Number of rounds is returned.
+ * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
+ */
+unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,
+	const void *key, size_t len);
+
+/*
+ * AES key schedule, using x86 AES-NI instructions. This yields the
+ * subkeys in the decryption direction. Number of rounds is returned.
+ * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
+ */
+unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,
+	const void *key, size_t len);
+
+/*
+ * Test support for AES POWER8 opcodes.
+ */
+int br_aes_pwr8_supported(void);
+
+/*
+ * AES key schedule, using POWER8 instructions. This yields the
+ * subkeys in the encryption direction. Number of rounds is returned.
+ * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
+ */
+unsigned br_aes_pwr8_keysched(unsigned char *skni,
+	const void *key, size_t len);
+
+/* ==================================================================== */
+/*
+ * RSA.
+ */
+
+/*
+ * Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is
+ * the encoded hash function OID, or NULL.
+ */
+uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	uint32_t n_bitlen, unsigned char *x);
+
+/*
+ * Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded
+ * hash function OID, or NULL. The provided 'sig' value is _after_ the
+ * modular exponentiation, i.e. it should be the padded hash. On
+ * success, the hashed message is extracted.
+ */
+uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
+	const unsigned char *hash_oid, size_t hash_len,
+	unsigned char *hash_out);
+
+/*
+ * Apply proper PSS padding. The 'x' buffer is output only: it
+ * receives the value that is to be exponentiated.
+ */
+uint32_t br_rsa_pss_sig_pad(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	uint32_t n_bitlen, unsigned char *x);
+
+/*
+ * Check PSS padding. The provided value is the one _after_
+ * the modular exponentiation; it is modified by this function.
+ * This function infers the signature length from the public key
+ * size, i.e. it assumes that this has already been verified (as
+ * part of the exponentiation).
+ */
+uint32_t br_rsa_pss_sig_unpad(
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_public_key *pk, unsigned char *x);
+
+/*
+ * Apply OAEP padding. Returned value is the actual padded string length,
+ * or zero on error.
+ */
+size_t br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len, const br_rsa_public_key *pk,
+	void *dst, size_t dst_nax_len, const void *src, size_t src_len);
+
+/*
+ * Unravel and check OAEP padding. If the padding is correct, then 1 is
+ * returned, '*len' is adjusted to the length of the message, and the
+ * data is moved to the start of the 'data' buffer. If the padding is
+ * incorrect, then 0 is returned and '*len' is untouched. Either way,
+ * the complete buffer contents are altered.
+ */
+uint32_t br_rsa_oaep_unpad(const br_hash_class *dig,
+	const void *label, size_t label_len, void *data, size_t *len);
+
+/*
+ * Compute MGF1 for a given seed, and XOR the output into the provided
+ * buffer.
+ */
+void br_mgf1_xor(void *data, size_t len,
+	const br_hash_class *dig, const void *seed, size_t seed_len);
+
+/*
+ * Inner function for RSA key generation; used by the "i31" and "i62"
+ * implementations.
+ */
+uint32_t br_rsa_i31_keygen_inner(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31);
+
+/* ==================================================================== */
+/*
+ * Elliptic curves.
+ */
+
+/*
+ * Type for generic EC parameters: curve order (unsigned big-endian
+ * encoding) and encoded conventional generator.
+ */
+typedef struct {
+	int curve;
+	const unsigned char *order;
+	size_t order_len;
+	const unsigned char *generator;
+	size_t generator_len;
+} br_ec_curve_def;
+
+extern const br_ec_curve_def br_secp256r1;
+extern const br_ec_curve_def br_secp384r1;
+extern const br_ec_curve_def br_secp521r1;
+
+/*
+ * For Curve25519, the advertised "order" really is 2^255-1, since the
+ * point multipliction function really works over arbitrary 255-bit
+ * scalars. This value is only meant as a hint for ECDH key generation;
+ * only ECDSA uses the exact curve order, and ECDSA is not used with
+ * that specific curve.
+ */
+extern const br_ec_curve_def br_curve25519;
+
+/*
+ * Decode some bytes as an i31 integer, with truncation (corresponding
+ * to the 'bits2int' operation in RFC 6979). The target ENCODED bit
+ * length is provided as last parameter. The resulting value will have
+ * this declared bit length, and consists the big-endian unsigned decoding
+ * of exactly that many bits in the source (capped at the source length).
+ */
+void br_ecdsa_i31_bits2int(uint32_t *x,
+	const void *src, size_t len, uint32_t ebitlen);
+
+/*
+ * Decode some bytes as an i15 integer, with truncation (corresponding
+ * to the 'bits2int' operation in RFC 6979). The target ENCODED bit
+ * length is provided as last parameter. The resulting value will have
+ * this declared bit length, and consists the big-endian unsigned decoding
+ * of exactly that many bits in the source (capped at the source length).
+ */
+void br_ecdsa_i15_bits2int(uint16_t *x,
+	const void *src, size_t len, uint32_t ebitlen);
+
+/* ==================================================================== */
+/*
+ * ASN.1 support functions.
+ */
+
+/*
+ * A br_asn1_uint structure contains encoding information about an
+ * INTEGER nonnegative value: pointer to the integer contents (unsigned
+ * big-endian representation), length of the integer contents,
+ * and length of the encoded value. The data shall have minimal length:
+ *  - If the integer value is zero, then 'len' must be zero.
+ *  - If the integer value is not zero, then data[0] must be non-zero.
+ *
+ * Under these conditions, 'asn1len' is necessarily equal to either len
+ * or len+1.
+ */
+typedef struct {
+	const unsigned char *data;
+	size_t len;
+	size_t asn1len;
+} br_asn1_uint;
+
+/*
+ * Given an encoded integer (unsigned big-endian, with possible leading
+ * bytes of value 0), returned the "prepared INTEGER" structure.
+ */
+br_asn1_uint br_asn1_uint_prepare(const void *xdata, size_t xlen);
+
+/*
+ * Encode an ASN.1 length. The length of the encoded length is returned.
+ * If 'dest' is NULL, then no encoding is performed, but the length of
+ * the encoded length is still computed and returned.
+ */
+size_t br_asn1_encode_length(void *dest, size_t len);
+
+/*
+ * Convenient macro for computing lengths of lengths.
+ */
+#define len_of_len(len)   br_asn1_encode_length(NULL, len)
+
+/*
+ * Encode a (prepared) ASN.1 INTEGER. The encoded length is returned.
+ * If 'dest' is NULL, then no encoding is performed, but the length of
+ * the encoded integer is still computed and returned.
+ */
+size_t br_asn1_encode_uint(void *dest, br_asn1_uint pp);
+
+/*
+ * Get the OID that identifies an elliptic curve. Returned value is
+ * the DER-encoded OID, with the length (always one byte) but without
+ * the tag. Thus, the first byte of the returned buffer contains the
+ * number of subsequent bytes in the value. If the curve is not
+ * recognised, NULL is returned.
+ */
+const unsigned char *br_get_curve_OID(int curve);
+
+/*
+ * Inner function for EC private key encoding. This is equivalent to
+ * the API function br_encode_ec_raw_der(), except for an extra
+ * parameter: if 'include_curve_oid' is zero, then the curve OID is
+ * _not_ included in the output blob (this is for PKCS#8 support).
+ */
+size_t br_encode_ec_raw_der_inner(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk,
+	int include_curve_oid);
+
+/* ==================================================================== */
+/*
+ * SSL/TLS support functions.
+ */
+
+/*
+ * Record types.
+ */
+#define BR_SSL_CHANGE_CIPHER_SPEC    20
+#define BR_SSL_ALERT                 21
+#define BR_SSL_HANDSHAKE             22
+#define BR_SSL_APPLICATION_DATA      23
+
+/*
+ * Handshake message types.
+ */
+#define BR_SSL_HELLO_REQUEST          0
+#define BR_SSL_CLIENT_HELLO           1
+#define BR_SSL_SERVER_HELLO           2
+#define BR_SSL_CERTIFICATE           11
+#define BR_SSL_SERVER_KEY_EXCHANGE   12
+#define BR_SSL_CERTIFICATE_REQUEST   13
+#define BR_SSL_SERVER_HELLO_DONE     14
+#define BR_SSL_CERTIFICATE_VERIFY    15
+#define BR_SSL_CLIENT_KEY_EXCHANGE   16
+#define BR_SSL_FINISHED              20
+
+/*
+ * Alert levels.
+ */
+#define BR_LEVEL_WARNING   1
+#define BR_LEVEL_FATAL     2
+
+/*
+ * Low-level I/O state.
+ */
+#define BR_IO_FAILED   0
+#define BR_IO_IN       1
+#define BR_IO_OUT      2
+#define BR_IO_INOUT    3
+
+/*
+ * Mark a SSL engine as failed. The provided error code is recorded if
+ * the engine was not already marked as failed. If 'err' is 0, then the
+ * engine is marked as closed (without error).
+ */
+void br_ssl_engine_fail(br_ssl_engine_context *cc, int err);
+
+/*
+ * Test whether the engine is closed (normally or as a failure).
+ */
+static inline int
+br_ssl_engine_closed(const br_ssl_engine_context *cc)
+{
+	return cc->iomode == BR_IO_FAILED;
+}
+
+/*
+ * Configure a new maximum fragment length. If possible, the maximum
+ * length for outgoing records is immediately adjusted (if there are
+ * not already too many buffered bytes for that).
+ */
+void br_ssl_engine_new_max_frag_len(
+	br_ssl_engine_context *rc, unsigned max_frag_len);
+
+/*
+ * Test whether the current incoming record has been fully received
+ * or not. This functions returns 0 only if a complete record header
+ * has been received, but some of the (possibly encrypted) payload
+ * has not yet been obtained.
+ */
+int br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc);
+
+/*
+ * Flush the current record (if not empty). This is meant to be called
+ * from the handshake processor only.
+ */
+void br_ssl_engine_flush_record(br_ssl_engine_context *cc);
+
+/*
+ * Test whether there is some accumulated payload to send.
+ */
+static inline int
+br_ssl_engine_has_pld_to_send(const br_ssl_engine_context *rc)
+{
+	return rc->oxa != rc->oxb && rc->oxa != rc->oxc;
+}
+
+/*
+ * Initialize RNG in engine. Returned value is 1 on success, 0 on error.
+ * This function will try to use the OS-provided RNG, if available. If
+ * there is no OS-provided RNG, or if it failed, and no entropy was
+ * injected by the caller, then a failure will be reported. On error,
+ * the context error code is set.
+ */
+int br_ssl_engine_init_rand(br_ssl_engine_context *cc);
+
+/*
+ * Reset the handshake-related parts of the engine.
+ */
+void br_ssl_engine_hs_reset(br_ssl_engine_context *cc,
+	void (*hsinit)(void *), void (*hsrun)(void *));
+
+/*
+ * Get the PRF to use for this context, for the provided PRF hash
+ * function ID.
+ */
+br_tls_prf_impl br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id);
+
+/*
+ * Consume the provided pre-master secret and compute the corresponding
+ * master secret. The 'prf_id' is the ID of the hash function to use
+ * with the TLS 1.2 PRF (ignored if the version is TLS 1.0 or 1.1).
+ */
+void br_ssl_engine_compute_master(br_ssl_engine_context *cc,
+	int prf_id, const void *pms, size_t len);
+
+/*
+ * Switch to CBC decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF (ignored if not TLS 1.2+)
+ *    mac_id           id of hash function for HMAC
+ *    bc_impl          block cipher implementation (CBC decryption)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcdec_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to CBC encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF (ignored if not TLS 1.2+)
+ *    mac_id           id of hash function for HMAC
+ *    bc_impl          block cipher implementation (CBC encryption)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcenc_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to GCM decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to GCM encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to ChaCha20+Poly1305 decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ */
+void br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id);
+
+/*
+ * Switch to ChaCha20+Poly1305 encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ */
+void br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id);
+
+/*
+ * Switch to CCM decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR+CBC)
+ *    cipher_key_len   block cipher key length (in bytes)
+ *    tag_len          tag length (in bytes)
+ */
+void br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len);
+
+/*
+ * Switch to GCM encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR+CBC)
+ *    cipher_key_len   block cipher key length (in bytes)
+ *    tag_len          tag length (in bytes)
+ */
+void br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len);
+
+/*
+ * Calls to T0-generated code.
+ */
+void br_ssl_hs_client_init_main(void *ctx);
+void br_ssl_hs_client_run(void *ctx);
+void br_ssl_hs_server_init_main(void *ctx);
+void br_ssl_hs_server_run(void *ctx);
+
+/*
+ * Get the hash function to use for signatures, given a bit mask of
+ * supported hash functions. This implements a strict choice order
+ * (namely SHA-256, SHA-384, SHA-512, SHA-224, SHA-1). If the mask
+ * does not document support of any of these hash functions, then this
+ * functions returns 0.
+ */
+int br_ssl_choose_hash(unsigned bf);
+
+/* ==================================================================== */
+
+/*
+ * PowerPC / POWER assembly stuff. The special BR_POWER_ASM_MACROS macro
+ * must be defined before including this file; this is done by source
+ * files that use some inline assembly for PowerPC / POWER machines.
+ */
+
+#if BR_POWER_ASM_MACROS
+
+#define lxvw4x(xt, ra, rb)        lxvw4x_(xt, ra, rb)
+#define stxvw4x(xt, ra, rb)       stxvw4x_(xt, ra, rb)
+
+#define bdnz(foo)                 bdnz_(foo)
+#define bdz(foo)                  bdz_(foo)
+#define beq(foo)                  beq_(foo)
+
+#define li(rx, value)             li_(rx, value)
+#define addi(rx, ra, imm)         addi_(rx, ra, imm)
+#define cmpldi(rx, imm)           cmpldi_(rx, imm)
+#define mtctr(rx)                 mtctr_(rx)
+#define vspltb(vrt, vrb, uim)     vspltb_(vrt, vrb, uim)
+#define vspltw(vrt, vrb, uim)     vspltw_(vrt, vrb, uim)
+#define vspltisb(vrt, imm)        vspltisb_(vrt, imm)
+#define vspltisw(vrt, imm)        vspltisw_(vrt, imm)
+#define vrlw(vrt, vra, vrb)       vrlw_(vrt, vra, vrb)
+#define vsbox(vrt, vra)           vsbox_(vrt, vra)
+#define vxor(vrt, vra, vrb)       vxor_(vrt, vra, vrb)
+#define vand(vrt, vra, vrb)       vand_(vrt, vra, vrb)
+#define vsro(vrt, vra, vrb)       vsro_(vrt, vra, vrb)
+#define vsl(vrt, vra, vrb)        vsl_(vrt, vra, vrb)
+#define vsldoi(vt, va, vb, sh)    vsldoi_(vt, va, vb, sh)
+#define vsr(vrt, vra, vrb)        vsr_(vrt, vra, vrb)
+#define vaddcuw(vrt, vra, vrb)    vaddcuw_(vrt, vra, vrb)
+#define vadduwm(vrt, vra, vrb)    vadduwm_(vrt, vra, vrb)
+#define vsububm(vrt, vra, vrb)    vsububm_(vrt, vra, vrb)
+#define vsubuwm(vrt, vra, vrb)    vsubuwm_(vrt, vra, vrb)
+#define vsrw(vrt, vra, vrb)       vsrw_(vrt, vra, vrb)
+#define vcipher(vt, va, vb)       vcipher_(vt, va, vb)
+#define vcipherlast(vt, va, vb)   vcipherlast_(vt, va, vb)
+#define vncipher(vt, va, vb)      vncipher_(vt, va, vb)
+#define vncipherlast(vt, va, vb)  vncipherlast_(vt, va, vb)
+#define vperm(vt, va, vb, vc)     vperm_(vt, va, vb, vc)
+#define vpmsumd(vt, va, vb)       vpmsumd_(vt, va, vb)
+#define xxpermdi(vt, va, vb, d)   xxpermdi_(vt, va, vb, d)
+
+#define lxvw4x_(xt, ra, rb)       "\tlxvw4x\t" #xt "," #ra "," #rb "\n"
+#define stxvw4x_(xt, ra, rb)      "\tstxvw4x\t" #xt "," #ra "," #rb "\n"
+
+#define label(foo)                #foo "%=:\n"
+#define bdnz_(foo)                "\tbdnz\t" #foo "%=\n"
+#define bdz_(foo)                 "\tbdz\t" #foo "%=\n"
+#define beq_(foo)                 "\tbeq\t" #foo "%=\n"
+
+#define li_(rx, value)            "\tli\t" #rx "," #value "\n"
+#define addi_(rx, ra, imm)        "\taddi\t" #rx "," #ra "," #imm "\n"
+#define cmpldi_(rx, imm)          "\tcmpldi\t" #rx "," #imm "\n"
+#define mtctr_(rx)                "\tmtctr\t" #rx "\n"
+#define vspltb_(vrt, vrb, uim)    "\tvspltb\t" #vrt "," #vrb "," #uim "\n"
+#define vspltw_(vrt, vrb, uim)    "\tvspltw\t" #vrt "," #vrb "," #uim "\n"
+#define vspltisb_(vrt, imm)       "\tvspltisb\t" #vrt "," #imm "\n"
+#define vspltisw_(vrt, imm)       "\tvspltisw\t" #vrt "," #imm "\n"
+#define vrlw_(vrt, vra, vrb)      "\tvrlw\t" #vrt "," #vra "," #vrb "\n"
+#define vsbox_(vrt, vra)          "\tvsbox\t" #vrt "," #vra "\n"
+#define vxor_(vrt, vra, vrb)      "\tvxor\t" #vrt "," #vra "," #vrb "\n"
+#define vand_(vrt, vra, vrb)      "\tvand\t" #vrt "," #vra "," #vrb "\n"
+#define vsro_(vrt, vra, vrb)      "\tvsro\t" #vrt "," #vra "," #vrb "\n"
+#define vsl_(vrt, vra, vrb)       "\tvsl\t" #vrt "," #vra "," #vrb "\n"
+#define vsldoi_(vt, va, vb, sh)   "\tvsldoi\t" #vt "," #va "," #vb "," #sh "\n"
+#define vsr_(vrt, vra, vrb)       "\tvsr\t" #vrt "," #vra "," #vrb "\n"
+#define vaddcuw_(vrt, vra, vrb)   "\tvaddcuw\t" #vrt "," #vra "," #vrb "\n"
+#define vadduwm_(vrt, vra, vrb)   "\tvadduwm\t" #vrt "," #vra "," #vrb "\n"
+#define vsububm_(vrt, vra, vrb)   "\tvsububm\t" #vrt "," #vra "," #vrb "\n"
+#define vsubuwm_(vrt, vra, vrb)   "\tvsubuwm\t" #vrt "," #vra "," #vrb "\n"
+#define vsrw_(vrt, vra, vrb)      "\tvsrw\t" #vrt "," #vra "," #vrb "\n"
+#define vcipher_(vt, va, vb)      "\tvcipher\t" #vt "," #va "," #vb "\n"
+#define vcipherlast_(vt, va, vb)  "\tvcipherlast\t" #vt "," #va "," #vb "\n"
+#define vncipher_(vt, va, vb)     "\tvncipher\t" #vt "," #va "," #vb "\n"
+#define vncipherlast_(vt, va, vb) "\tvncipherlast\t" #vt "," #va "," #vb "\n"
+#define vperm_(vt, va, vb, vc)    "\tvperm\t" #vt "," #va "," #vb "," #vc "\n"
+#define vpmsumd_(vt, va, vb)      "\tvpmsumd\t" #vt "," #va "," #vb "\n"
+#define xxpermdi_(vt, va, vb, d)  "\txxpermdi\t" #vt "," #va "," #vb "," #d "\n"
+
+#endif
+
+/* ==================================================================== */
+/*
+ * Special "activate intrinsics" code, needed for some compiler versions.
+ * This is defined at the end of this file, so that it won't impact any
+ * of the inline functions defined previously; and it is controlled by
+ * a specific macro defined in the caller code.
+ *
+ * Calling code conventions:
+ *
+ *  - Caller must define BR_ENABLE_INTRINSICS before including "inner.h".
+ *  - Functions that use intrinsics must be enclosed in an "enabled"
+ *    region (between BR_TARGETS_X86_UP and BR_TARGETS_X86_DOWN).
+ *  - Functions that use intrinsics must be tagged with the appropriate
+ *    BR_TARGET().
+ */
+
+#if BR_ENABLE_INTRINSICS && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)
+
+/*
+ * x86 intrinsics (both 32-bit and 64-bit).
+ */
+#if BR_i386 || BR_amd64
+
+/*
+ * On GCC before version 5.0, we need to use the pragma to enable the
+ * target options globally, because the 'target' function attribute
+ * appears to be unreliable. Before 4.6 we must also avoid the
+ * push_options / pop_options mechanism, because it tends to trigger
+ * some internal compiler errors.
+ */
+#if BR_GCC && !BR_GCC_5_0
+#if BR_GCC_4_6
+#define BR_TARGETS_X86_UP \
+	_Pragma("GCC push_options") \
+	_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul,rdrnd\")")
+#define BR_TARGETS_X86_DOWN \
+	_Pragma("GCC pop_options")
+#else
+#define BR_TARGETS_X86_UP \
+	_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul\")")
+#define BR_TARGETS_X86_DOWN
+#endif
+#pragma GCC diagnostic ignored "-Wpsabi"
+#endif
+
+#if BR_CLANG && !BR_CLANG_3_8
+#undef __SSE2__
+#undef __SSE3__
+#undef __SSSE3__
+#undef __SSE4_1__
+#undef __AES__
+#undef __PCLMUL__
+#undef __RDRND__
+#define __SSE2__     1
+#define __SSE3__     1
+#define __SSSE3__    1
+#define __SSE4_1__   1
+#define __AES__      1
+#define __PCLMUL__   1
+#define __RDRND__    1
+#endif
+
+#ifndef BR_TARGETS_X86_UP
+#define BR_TARGETS_X86_UP
+#endif
+#ifndef BR_TARGETS_X86_DOWN
+#define BR_TARGETS_X86_DOWN
+#endif
+
+#if BR_GCC || BR_CLANG
+BR_TARGETS_X86_UP
+#include <x86intrin.h>
+#include <cpuid.h>
+#define br_bswap32   __builtin_bswap32
+BR_TARGETS_X86_DOWN
+#endif
+
+#if BR_MSC
+#include <stdlib.h>
+#include <intrin.h>
+#include <immintrin.h>
+#define br_bswap32   _byteswap_ulong
+#endif
+
+static inline int
+br_cpuid(uint32_t mask_eax, uint32_t mask_ebx,
+	uint32_t mask_ecx, uint32_t mask_edx)
+{
+#if BR_GCC || BR_CLANG
+	unsigned eax, ebx, ecx, edx;
+
+	if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+		if ((eax & mask_eax) == mask_eax
+			&& (ebx & mask_ebx) == mask_ebx
+			&& (ecx & mask_ecx) == mask_ecx
+			&& (edx & mask_edx) == mask_edx)
+		{
+			return 1;
+		}
+	}
+#elif BR_MSC
+	int info[4];
+
+	__cpuid(info, 1);
+	if (((uint32_t)info[0] & mask_eax) == mask_eax
+		&& ((uint32_t)info[1] & mask_ebx) == mask_ebx
+		&& ((uint32_t)info[2] & mask_ecx) == mask_ecx
+		&& ((uint32_t)info[3] & mask_edx) == mask_edx)
+	{
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+#endif
+
+#endif
+
+/* ==================================================================== */
+
+#endif
diff --git a/third_party/bearssl/src/md5.c b/third_party/bearssl/src/md5.c
new file mode 100644
index 0000000..0df7abe
--- /dev/null
+++ b/third_party/bearssl/src/md5.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((((C) ^ (B)) & (D)) ^ (C))
+#define H(B, C, D)     ((B) ^ (C) ^ (D))
+#define I(B, C, D)     ((C) ^ ((B) | ~(D)))
+
+#define ROTL(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+
+/* see inner.h */
+const uint32_t br_md5_IV[4] = {
+	0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476
+};
+
+static const uint32_t K[64] = {
+	0xD76AA478, 0xE8C7B756, 0x242070DB, 0xC1BDCEEE,
+	0xF57C0FAF, 0x4787C62A, 0xA8304613, 0xFD469501,
+	0x698098D8, 0x8B44F7AF, 0xFFFF5BB1, 0x895CD7BE,
+	0x6B901122, 0xFD987193, 0xA679438E, 0x49B40821,
+
+	0xF61E2562, 0xC040B340, 0x265E5A51, 0xE9B6C7AA,
+	0xD62F105D, 0x02441453, 0xD8A1E681, 0xE7D3FBC8,
+	0x21E1CDE6, 0xC33707D6, 0xF4D50D87, 0x455A14ED,
+	0xA9E3E905, 0xFCEFA3F8, 0x676F02D9, 0x8D2A4C8A,
+
+	0xFFFA3942, 0x8771F681, 0x6D9D6122, 0xFDE5380C,
+	0xA4BEEA44, 0x4BDECFA9, 0xF6BB4B60, 0xBEBFBC70,
+	0x289B7EC6, 0xEAA127FA, 0xD4EF3085, 0x04881D05,
+	0xD9D4D039, 0xE6DB99E5, 0x1FA27CF8, 0xC4AC5665,
+
+	0xF4292244, 0x432AFF97, 0xAB9423A7, 0xFC93A039,
+	0x655B59C3, 0x8F0CCC92, 0xFFEFF47D, 0x85845DD1,
+	0x6FA87E4F, 0xFE2CE6E0, 0xA3014314, 0x4E0811A1,
+	0xF7537E82, 0xBD3AF235, 0x2AD7D2BB, 0xEB86D391
+};
+
+static const unsigned char MP[48] = {
+	1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,
+	5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,
+	0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9
+};
+
+/* see inner.h */
+void
+br_md5_round(const unsigned char *buf, uint32_t *val)
+{
+	uint32_t m[16];
+	uint32_t a, b, c, d;
+	int i;
+
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	/* obsolete
+	for (i = 0; i < 16; i ++) {
+		m[i] = br_dec32le(buf + (i << 2));
+	}
+	*/
+	br_range_dec32le(m, 16, buf);
+
+	for (i = 0; i < 16; i += 4) {
+		a = b + ROTL(a + F(b, c, d) + m[i + 0] + K[i + 0],  7);
+		d = a + ROTL(d + F(a, b, c) + m[i + 1] + K[i + 1], 12);
+		c = d + ROTL(c + F(d, a, b) + m[i + 2] + K[i + 2], 17);
+		b = c + ROTL(b + F(c, d, a) + m[i + 3] + K[i + 3], 22);
+	}
+	for (i = 16; i < 32; i += 4) {
+		a = b + ROTL(a + G(b, c, d) + m[MP[i - 16]] + K[i + 0],  5);
+		d = a + ROTL(d + G(a, b, c) + m[MP[i - 15]] + K[i + 1],  9);
+		c = d + ROTL(c + G(d, a, b) + m[MP[i - 14]] + K[i + 2], 14);
+		b = c + ROTL(b + G(c, d, a) + m[MP[i - 13]] + K[i + 3], 20);
+	}
+	for (i = 32; i < 48; i += 4) {
+		a = b + ROTL(a + H(b, c, d) + m[MP[i - 16]] + K[i + 0],  4);
+		d = a + ROTL(d + H(a, b, c) + m[MP[i - 15]] + K[i + 1], 11);
+		c = d + ROTL(c + H(d, a, b) + m[MP[i - 14]] + K[i + 2], 16);
+		b = c + ROTL(b + H(c, d, a) + m[MP[i - 13]] + K[i + 3], 23);
+	}
+	for (i = 48; i < 64; i += 4) {
+		a = b + ROTL(a + I(b, c, d) + m[MP[i - 16]] + K[i + 0],  6);
+		d = a + ROTL(d + I(a, b, c) + m[MP[i - 15]] + K[i + 1], 10);
+		c = d + ROTL(c + I(d, a, b) + m[MP[i - 14]] + K[i + 2], 15);
+		b = c + ROTL(b + I(c, d, a) + m[MP[i - 13]] + K[i + 3], 21);
+	}
+
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+}
+
+/* see bearssl.h */
+void
+br_md5_init(br_md5_context *cc)
+{
+	cc->vtable = &br_md5_vtable;
+	memcpy(cc->val, br_md5_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_md5_update(br_md5_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_md5_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_md5_out(const br_md5_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val[4];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_md5_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64le(buf + 56, cc->count << 3);
+	br_md5_round(buf, val);
+	br_range_enc32le(dst, val, 4);
+}
+
+/* see bearssl.h */
+uint64_t
+br_md5_state(const br_md5_context *cc, void *dst)
+{
+	br_range_enc32le(dst, cc->val, 4);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_md5_set_state(br_md5_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32le(cc->val, 4, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_md5_vtable = {
+	sizeof(br_md5_context),
+	BR_HASHDESC_ID(br_md5_ID)
+		| BR_HASHDESC_OUT(16)
+		| BR_HASHDESC_STATE(16)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING,
+	(void (*)(const br_hash_class **))&br_md5_init,
+	(void (*)(const br_hash_class **, const void *, size_t))&br_md5_update,
+	(void (*)(const br_hash_class *const *, void *))&br_md5_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_md5_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_md5_set_state
+};
diff --git a/third_party/bearssl/src/md5sha1.c b/third_party/bearssl/src/md5sha1.c
new file mode 100644
index 0000000..f701aee
--- /dev/null
+++ b/third_party/bearssl/src/md5sha1.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_md5sha1_init(br_md5sha1_context *cc)
+{
+	cc->vtable = &br_md5sha1_vtable;
+	memcpy(cc->val_md5, br_md5_IV, sizeof cc->val_md5);
+	memcpy(cc->val_sha1, br_sha1_IV, sizeof cc->val_sha1);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_update(br_md5sha1_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_md5_round(cc->buf, cc->val_md5);
+			br_sha1_round(cc->buf, cc->val_sha1);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_out(const br_md5sha1_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val_md5[4];
+	uint32_t val_sha1[5];
+	size_t ptr;
+	unsigned char *out;
+	uint64_t count;
+
+	count = cc->count;
+	ptr = (size_t)count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val_md5, cc->val_md5, sizeof val_md5);
+	memcpy(val_sha1, cc->val_sha1, sizeof val_sha1);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_md5_round(buf, val_md5);
+		br_sha1_round(buf, val_sha1);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	count <<= 3;
+	br_enc64le(buf + 56, count);
+	br_md5_round(buf, val_md5);
+	br_enc64be(buf + 56, count);
+	br_sha1_round(buf, val_sha1);
+	out = dst;
+	br_range_enc32le(out, val_md5, 4);
+	br_range_enc32be(out + 16, val_sha1, 5);
+}
+
+/* see bearssl.h */
+uint64_t
+br_md5sha1_state(const br_md5sha1_context *cc, void *dst)
+{
+	unsigned char *out;
+
+	out = dst;
+	br_range_enc32le(out, cc->val_md5, 4);
+	br_range_enc32be(out + 16, cc->val_sha1, 5);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_set_state(br_md5sha1_context *cc, const void *stb, uint64_t count)
+{
+	const unsigned char *buf;
+
+	buf = stb;
+	br_range_dec32le(cc->val_md5, 4, buf);
+	br_range_dec32be(cc->val_sha1, 5, buf + 16);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_md5sha1_vtable = {
+	sizeof(br_md5sha1_context),
+	BR_HASHDESC_ID(br_md5sha1_ID)
+		| BR_HASHDESC_OUT(36)
+		| BR_HASHDESC_STATE(36)
+		| BR_HASHDESC_LBLEN(6),
+	(void (*)(const br_hash_class **))&br_md5sha1_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_md5sha1_update,
+	(void (*)(const br_hash_class *const *, void *))
+		&br_md5sha1_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))
+		&br_md5sha1_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_md5sha1_set_state
+};
diff --git a/third_party/bearssl/src/mgf1.c b/third_party/bearssl/src/mgf1.c
new file mode 100644
index 0000000..7a23588
--- /dev/null
+++ b/third_party/bearssl/src/mgf1.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_mgf1_xor(void *data, size_t len,
+	const br_hash_class *dig, const void *seed, size_t seed_len)
+{
+	unsigned char *buf;
+	size_t u, hlen;
+	uint32_t c;
+
+	buf = data;
+	hlen = br_digest_size(dig);
+	for (u = 0, c = 0; u < len; u += hlen, c ++) {
+		br_hash_compat_context hc;
+		unsigned char tmp[64];
+		size_t v;
+
+		hc.vtable = dig;
+		dig->init(&hc.vtable);
+		dig->update(&hc.vtable, seed, seed_len);
+		br_enc32be(tmp, c);
+		dig->update(&hc.vtable, tmp, 4);
+		dig->out(&hc.vtable, tmp);
+		for (v = 0; v < hlen; v ++) {
+			if ((u + v) >= len) {
+				break;
+			}
+			buf[u + v] ^= tmp[v];
+		}
+	}
+}
diff --git a/third_party/bearssl/src/multihash.c b/third_party/bearssl/src/multihash.c
new file mode 100644
index 0000000..b6df2e0
--- /dev/null
+++ b/third_party/bearssl/src/multihash.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * An aggregate context that is large enough for all supported hash
+ * functions.
+ */
+typedef union {
+	const br_hash_class *vtable;
+	br_md5_context md5;
+	br_sha1_context sha1;
+	br_sha224_context sha224;
+	br_sha256_context sha256;
+	br_sha384_context sha384;
+	br_sha512_context sha512;
+} gen_hash_context;
+
+/*
+ * Get the offset to the state for a specific hash function within the
+ * context structure. This shall be called only for the supported hash
+ * functions,
+ */
+static size_t
+get_state_offset(int id)
+{
+	if (id >= 5) {
+		/*
+		 * SHA-384 has id 5, and SHA-512 has id 6. Both use
+		 * eight 64-bit words for their state.
+		 */
+		return offsetof(br_multihash_context, val_64)
+			+ ((size_t)(id - 5) * (8 * sizeof(uint64_t)));
+	} else {
+		/*
+		 * MD5 has id 1, SHA-1 has id 2, SHA-224 has id 3 and
+		 * SHA-256 has id 4. They use 32-bit words for their
+		 * states (4 words for MD5, 5 for SHA-1, 8 for SHA-224
+		 * and 8 for SHA-256).
+		 */
+		unsigned x;
+
+		x = id - 1;
+		x = ((x + (x & (x >> 1))) << 2) + (x >> 1);
+		return offsetof(br_multihash_context, val_32)
+			+ x * sizeof(uint32_t);
+	}
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_zero(br_multihash_context *ctx)
+{
+	/*
+	 * This is not standard, but yields very short and efficient code,
+	 * and it works "everywhere".
+	 */
+	memset(ctx, 0, sizeof *ctx);
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_init(br_multihash_context *ctx)
+{
+	int i;
+
+	ctx->count = 0;
+	for (i = 1; i <= 6; i ++) {
+		const br_hash_class *hc;
+
+		hc = ctx->impl[i - 1];
+		if (hc != NULL) {
+			gen_hash_context g;
+
+			hc->init(&g.vtable);
+			hc->state(&g.vtable,
+				(unsigned char *)ctx + get_state_offset(i));
+		}
+	}
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_update(br_multihash_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)ctx->count & 127;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 128 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(ctx->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		ctx->count += (uint64_t)clen;
+		if (ptr == 128) {
+			int i;
+
+			for (i = 1; i <= 6; i ++) {
+				const br_hash_class *hc;
+
+				hc = ctx->impl[i - 1];
+				if (hc != NULL) {
+					gen_hash_context g;
+					unsigned char *state;
+
+					state = (unsigned char *)ctx
+						+ get_state_offset(i);
+					hc->set_state(&g.vtable,
+						state, ctx->count - 128);
+					hc->update(&g.vtable, ctx->buf, 128);
+					hc->state(&g.vtable, state);
+				}
+			}
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl_hash.h */
+size_t
+br_multihash_out(const br_multihash_context *ctx, int id, void *dst)
+{
+	const br_hash_class *hc;
+	gen_hash_context g;
+	const unsigned char *state;
+
+	hc = ctx->impl[id - 1];
+	if (hc == NULL) {
+		return 0;
+	}
+	state = (const unsigned char *)ctx + get_state_offset(id);
+	hc->set_state(&g.vtable, state, ctx->count & ~(uint64_t)127);
+	hc->update(&g.vtable, ctx->buf, ctx->count & (uint64_t)127);
+	hc->out(&g.vtable, dst);
+	return (hc->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+}
diff --git a/third_party/bearssl/src/poly1305_ctmul.c b/third_party/bearssl/src/poly1305_ctmul.c
new file mode 100644
index 0000000..150e610
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_ctmul.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Perform the inner processing of blocks for Poly1305. The accumulator
+ * and the r key are provided as arrays of 26-bit words (these words
+ * are allowed to have an extra bit, i.e. use 27 bits).
+ *
+ * On output, all accumulator words fit on 26 bits, except acc[1], which
+ * may be slightly larger (but by a very small amount only).
+ */
+static void
+poly1305_inner(uint32_t *acc, const uint32_t *r, const void *data, size_t len)
+{
+	/*
+	 * Implementation notes: we split the 130-bit values into five
+	 * 26-bit words. This gives us some space for carries.
+	 *
+	 * This code is inspired from the public-domain code available
+	 * on:
+	 *      https://github.com/floodyberry/poly1305-donna
+	 *
+	 * Since we compute modulo 2^130-5, the "upper words" become
+	 * low words with a factor of 5; that is, x*2^130 = x*5 mod p.
+	 */
+	const unsigned char *buf;
+	uint32_t a0, a1, a2, a3, a4;
+	uint32_t r0, r1, r2, r3, r4;
+	uint32_t u1, u2, u3, u4;
+
+	r0 = r[0];
+	r1 = r[1];
+	r2 = r[2];
+	r3 = r[3];
+	r4 = r[4];
+
+	u1 = r1 * 5;
+	u2 = r2 * 5;
+	u3 = r3 * 5;
+	u4 = r4 * 5;
+
+	a0 = acc[0];
+	a1 = acc[1];
+	a2 = acc[2];
+	a3 = acc[3];
+	a4 = acc[4];
+
+	buf = data;
+	while (len > 0) {
+		uint64_t w0, w1, w2, w3, w4;
+		uint64_t c;
+		unsigned char tmp[16];
+
+		/*
+		 * If there is a partial block, right-pad it with zeros.
+		 */
+		if (len < 16) {
+			memset(tmp, 0, sizeof tmp);
+			memcpy(tmp, buf, len);
+			buf = tmp;
+			len = 16;
+		}
+
+		/*
+		 * Decode next block and apply the "high bit"; that value
+		 * is added to the accumulator.
+		 */
+		a0 += br_dec32le(buf) & 0x03FFFFFF;
+		a1 += (br_dec32le(buf +  3) >> 2) & 0x03FFFFFF;
+		a2 += (br_dec32le(buf +  6) >> 4) & 0x03FFFFFF;
+		a3 += (br_dec32le(buf +  9) >> 6) & 0x03FFFFFF;
+		a4 += (br_dec32le(buf + 12) >> 8) | 0x01000000;
+
+		/*
+		 * Compute multiplication.
+		 */
+#define M(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+
+		w0 = M(a0, r0) + M(a1, u4) + M(a2, u3) + M(a3, u2) + M(a4, u1);
+		w1 = M(a0, r1) + M(a1, r0) + M(a2, u4) + M(a3, u3) + M(a4, u2);
+		w2 = M(a0, r2) + M(a1, r1) + M(a2, r0) + M(a3, u4) + M(a4, u3);
+		w3 = M(a0, r3) + M(a1, r2) + M(a2, r1) + M(a3, r0) + M(a4, u4);
+		w4 = M(a0, r4) + M(a1, r3) + M(a2, r2) + M(a3, r1) + M(a4, r0);
+
+#undef M
+		/*
+		 * Perform some (partial) modular reduction. This step is
+		 * enough to keep values in ranges such that there won't
+		 * be carry overflows. Most of the reduction was done in
+		 * the multiplication step (by using the 'u*' values, and
+		 * using the fact that 2^130 = -5 mod p); here we perform
+		 * some carry propagation.
+		 */
+		c = w0 >> 26;
+		a0 = (uint32_t)w0 & 0x3FFFFFF;
+		w1 += c;
+		c = w1 >> 26;
+		a1 = (uint32_t)w1 & 0x3FFFFFF;
+		w2 += c;
+		c = w2 >> 26;
+		a2 = (uint32_t)w2 & 0x3FFFFFF;
+		w3 += c;
+		c = w3 >> 26;
+		a3 = (uint32_t)w3 & 0x3FFFFFF;
+		w4 += c;
+		c = w4 >> 26;
+		a4 = (uint32_t)w4 & 0x3FFFFFF;
+		a0 += (uint32_t)c * 5;
+		a1 += a0 >> 26;
+		a0 &= 0x3FFFFFF;
+
+		buf += 16;
+		len -= 16;
+	}
+
+	acc[0] = a0;
+	acc[1] = a1;
+	acc[2] = a2;
+	acc[3] = a3;
+	acc[4] = a4;
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmul_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint32_t r[5], acc[5], cc, ctl, hi;
+	uint64_t w;
+	int i;
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Decode the 'r' value into 26-bit words, with the "clamping"
+	 * operation applied.
+	 */
+	r[0] = br_dec32le(pkey) & 0x03FFFFFF;
+	r[1] = (br_dec32le(pkey +  3) >> 2) & 0x03FFFF03;
+	r[2] = (br_dec32le(pkey +  6) >> 4) & 0x03FFC0FF;
+	r[3] = (br_dec32le(pkey +  9) >> 6) & 0x03F03FFF;
+	r[4] = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF;
+
+	/*
+	 * Accumulator is 0.
+	 */
+	memset(acc, 0, sizeof acc);
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner(acc, r, foot, sizeof foot);
+
+	/*
+	 * Finalise modular reduction. This is done with carry propagation
+	 * and applying the '2^130 = -5 mod p' rule. Note that the output
+	 * of poly1035_inner() is already mostly reduced, since only
+	 * acc[1] may be (very slightly) above 2^26. A single loop back
+	 * to acc[1] will be enough to make the value fit in 130 bits.
+	 */
+	cc = 0;
+	for (i = 1; i <= 6; i ++) {
+		int j;
+
+		j = (i >= 5) ? i - 5 : i;
+		acc[j] += cc;
+		cc = acc[j] >> 26;
+		acc[j] &= 0x03FFFFFF;
+	}
+
+	/*
+	 * We may still have a value in the 2^130-5..2^130-1 range, in
+	 * which case we must reduce it again. The code below selects,
+	 * in constant-time, between 'acc' and 'acc-p',
+	 */
+	ctl = GT(acc[0], 0x03FFFFFA);
+	for (i = 1; i < 5; i ++) {
+		ctl &= EQ(acc[i], 0x03FFFFFF);
+	}
+	cc = 5;
+	for (i = 0; i < 5; i ++) {
+		uint32_t t;
+
+		t = (acc[i] + cc);
+		cc = t >> 26;
+		t &= 0x03FFFFFF;
+		acc[i] = MUX(ctl, t, acc[i]);
+	}
+
+	/*
+	 * Convert back the accumulator to 32-bit words, and add the
+	 * 's' value (second half of pkey[]). That addition is done
+	 * modulo 2^128.
+	 */
+	w = (uint64_t)acc[0] + ((uint64_t)acc[1] << 26) + br_dec32le(pkey + 16);
+	br_enc32le((unsigned char *)tag, (uint32_t)w);
+	w = (w >> 32) + ((uint64_t)acc[2] << 20) + br_dec32le(pkey + 20);
+	br_enc32le((unsigned char *)tag + 4, (uint32_t)w);
+	w = (w >> 32) + ((uint64_t)acc[3] << 14) + br_dec32le(pkey + 24);
+	br_enc32le((unsigned char *)tag + 8, (uint32_t)w);
+	hi = (uint32_t)(w >> 32) + (acc[4] << 8) + br_dec32le(pkey + 28);
+	br_enc32le((unsigned char *)tag + 12, hi);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
diff --git a/third_party/bearssl/src/poly1305_ctmul32.c b/third_party/bearssl/src/poly1305_ctmul32.c
new file mode 100644
index 0000000..15d9635
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_ctmul32.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Perform the inner processing of blocks for Poly1305.
+ */
+static void
+poly1305_inner(uint32_t *a, const uint32_t *r, const void *data, size_t len)
+{
+	/*
+	 * Implementation notes: we split the 130-bit values into ten
+	 * 13-bit words. This gives us some space for carries and allows
+	 * using only 32x32->32 multiplications, which are way faster than
+	 * 32x32->64 multiplications on the ARM Cortex-M0/M0+, and also
+	 * help in making constant-time code on the Cortex-M3.
+	 *
+	 * Since we compute modulo 2^130-5, the "upper words" become
+	 * low words with a factor of 5; that is, x*2^130 = x*5 mod p.
+	 * This has already been integrated in the r[] array, which
+	 * is extended to the 0..18 range.
+	 *
+	 * In each loop iteration, a[] and r[] words are 13-bit each,
+	 * except a[1] which may use 14 bits.
+	 */
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t b[10];
+		unsigned u, v;
+		uint32_t z, cc1, cc2;
+
+		/*
+		 * If there is a partial block, right-pad it with zeros.
+		 */
+		if (len < 16) {
+			memset(tmp, 0, sizeof tmp);
+			memcpy(tmp, buf, len);
+			buf = tmp;
+			len = 16;
+		}
+
+		/*
+		 * Decode next block and apply the "high bit"; that value
+		 * is added to the accumulator.
+		 */
+		v = br_dec16le(buf);
+		a[0] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[2] << 3;
+		v |= buf[3] << 11;
+		a[1] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[4] << 6;
+		a[2] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[5] << 1;
+		v |= buf[6] << 9;
+		a[3] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[7] << 4;
+		v |= buf[8] << 12;
+		a[4] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[9] << 7;
+		a[5] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[10] << 2;
+		v |= buf[11] << 10;
+		a[6] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[12] << 5;
+		a[7] += v & 0x01FFF;
+		v = br_dec16le(buf + 13);
+		a[8] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[15] << 3;
+		a[9] += v | 0x00800;
+
+		/*
+		 * At that point, all a[] values fit on 14 bits, while
+		 * all r[] values fit on 13 bits. Thus products fit on
+		 * 27 bits, and we can accumulate up to 31 of them in
+		 * a 32-bit word and still have some room for carries.
+		 */
+
+		/*
+		 * Now a[] contains words with values up to 14 bits each.
+		 * We perform the multiplication with r[].
+		 *
+		 * The extended words of r[] may be larger than 13 bits
+		 * (they are 5 times a 13-bit word) so the full summation
+		 * may yield values up to 46 times a 27-bit word, which
+		 * does not fit on a 32-bit word. To avoid that issue, we
+		 * must split the loop below in two, with a carry
+		 * propagation operation in the middle.
+		 */
+		cc1 = 0;
+		for (u = 0; u < 10; u ++) {
+			uint32_t s;
+
+			s = cc1
+				+ MUL15(a[0], r[u + 9 - 0])
+				+ MUL15(a[1], r[u + 9 - 1])
+				+ MUL15(a[2], r[u + 9 - 2])
+				+ MUL15(a[3], r[u + 9 - 3])
+				+ MUL15(a[4], r[u + 9 - 4]);
+			b[u] = s & 0x1FFF;
+			cc1 = s >> 13;
+		}
+		cc2 = 0;
+		for (u = 0; u < 10; u ++) {
+			uint32_t s;
+
+			s = b[u] + cc2
+				+ MUL15(a[5], r[u + 9 - 5])
+				+ MUL15(a[6], r[u + 9 - 6])
+				+ MUL15(a[7], r[u + 9 - 7])
+				+ MUL15(a[8], r[u + 9 - 8])
+				+ MUL15(a[9], r[u + 9 - 9]);
+			b[u] = s & 0x1FFF;
+			cc2 = s >> 13;
+		}
+		memcpy(a, b, sizeof b);
+
+		/*
+		 * The two carries "loop back" with a factor of 5. We
+		 * propagate them into a[0] and a[1].
+		 */
+		z = cc1 + cc2;
+		z += (z << 2) + a[0];
+		a[0] = z & 0x1FFF;
+		a[1] += z >> 13;
+
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmul32_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint32_t z, r[19], acc[10], cc, ctl;
+	int i;
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Decode the 'r' value into 13-bit words, with the "clamping"
+	 * operation applied.
+	 */
+	z = br_dec32le(pkey) & 0x03FFFFFF;
+	r[9] = z & 0x1FFF;
+	r[10] = z >> 13;
+	z = (br_dec32le(pkey +  3) >> 2) & 0x03FFFF03;
+	r[11] = z & 0x1FFF;
+	r[12] = z >> 13;
+	z = (br_dec32le(pkey +  6) >> 4) & 0x03FFC0FF;
+	r[13] = z & 0x1FFF;
+	r[14] = z >> 13;
+	z = (br_dec32le(pkey +  9) >> 6) & 0x03F03FFF;
+	r[15] = z & 0x1FFF;
+	r[16] = z >> 13;
+	z = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF;
+	r[17] = z & 0x1FFF;
+	r[18] = z >> 13;
+
+	/*
+	 * Extend r[] with the 5x factor pre-applied.
+	 */
+	for (i = 0; i < 9; i ++) {
+		r[i] = MUL15(5, r[i + 10]);
+	}
+
+	/*
+	 * Accumulator is 0.
+	 */
+	memset(acc, 0, sizeof acc);
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner(acc, r, foot, sizeof foot);
+
+	/*
+	 * Finalise modular reduction. This is done with carry propagation
+	 * and applying the '2^130 = -5 mod p' rule. Note that the output
+	 * of poly1035_inner() is already mostly reduced, since only
+	 * acc[1] may be (very slightly) above 2^13. A single loop back
+	 * to acc[1] will be enough to make the value fit in 130 bits.
+	 */
+	cc = 0;
+	for (i = 1; i < 10; i ++) {
+		z = acc[i] + cc;
+		acc[i] = z & 0x1FFF;
+		cc = z >> 13;
+	}
+	z = acc[0] + cc + (cc << 2);
+	acc[0] = z & 0x1FFF;
+	acc[1] += z >> 13;
+
+	/*
+	 * We may still have a value in the 2^130-5..2^130-1 range, in
+	 * which case we must reduce it again. The code below selects,
+	 * in constant-time, between 'acc' and 'acc-p',
+	 */
+	ctl = GT(acc[0], 0x1FFA);
+	for (i = 1; i < 10; i ++) {
+		ctl &= EQ(acc[i], 0x1FFF);
+	}
+	acc[0] = MUX(ctl, acc[0] - 0x1FFB, acc[0]);
+	for (i = 1; i < 10; i ++) {
+		acc[i] &= ~(-ctl);
+	}
+
+	/*
+	 * Convert back the accumulator to 32-bit words, and add the
+	 * 's' value (second half of pkey[]). That addition is done
+	 * modulo 2^128.
+	 */
+	z = acc[0] + (acc[1] << 13) + br_dec16le(pkey + 16);
+	br_enc16le((unsigned char *)tag, z & 0xFFFF);
+	z = (z >> 16) + (acc[2] << 10) + br_dec16le(pkey + 18);
+	br_enc16le((unsigned char *)tag + 2, z & 0xFFFF);
+	z = (z >> 16) + (acc[3] << 7) + br_dec16le(pkey + 20);
+	br_enc16le((unsigned char *)tag + 4, z & 0xFFFF);
+	z = (z >> 16) + (acc[4] << 4) + br_dec16le(pkey + 22);
+	br_enc16le((unsigned char *)tag + 6, z & 0xFFFF);
+	z = (z >> 16) + (acc[5] << 1) + (acc[6] << 14) + br_dec16le(pkey + 24);
+	br_enc16le((unsigned char *)tag + 8, z & 0xFFFF);
+	z = (z >> 16) + (acc[7] << 11) + br_dec16le(pkey + 26);
+	br_enc16le((unsigned char *)tag + 10, z & 0xFFFF);
+	z = (z >> 16) + (acc[8] << 8) + br_dec16le(pkey + 28);
+	br_enc16le((unsigned char *)tag + 12, z & 0xFFFF);
+	z = (z >> 16) + (acc[9] << 5) + br_dec16le(pkey + 30);
+	br_enc16le((unsigned char *)tag + 14, z & 0xFFFF);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
diff --git a/third_party/bearssl/src/poly1305_ctmulq.c b/third_party/bearssl/src/poly1305_ctmulq.c
new file mode 100644
index 0000000..b00683a
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_ctmulq.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_INT128
+
+#define MUL128(hi, lo, x, y)   do { \
+		unsigned __int128 mul128tmp; \
+		mul128tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
+		(hi) = (uint64_t)(mul128tmp >> 64); \
+		(lo) = (uint64_t)mul128tmp; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#include <intrin.h>
+
+#define MUL128(hi, lo, x, y)   do { \
+		(lo) = _umul128((x), (y), &(hi)); \
+	} while (0)
+
+#endif
+
+#define MASK42   ((uint64_t)0x000003FFFFFFFFFF)
+#define MASK44   ((uint64_t)0x00000FFFFFFFFFFF)
+
+/*
+ * The "accumulator" word is nominally a 130-bit value. We split it into
+ * words of 44 bits, each held in a 64-bit variable.
+ *
+ * If the current accumulator is a = a0 + a1*W + a2*W^2 (where W = 2^44)
+ * and r = r0 + r1*W + r2*W^2, then:
+ *
+ *   a*r = (a0*r0)
+ *       + (a0*r1 + a1*r0) * W
+ *       + (a0*r2 + a1*r1 + a2*r0) * W^2
+ *       + (a1*r2 + a2*r1) * W^3
+ *       + (a2*r2) * W^4
+ *
+ * We want to reduce that value modulo p = 2^130-5, so W^3 = 20 mod p,
+ * and W^4 = 20*W mod p. Thus, if we define u1 = 20*r1 and u2 = 20*r2,
+ * then the equations above become:
+ *
+ *  b0 = a0*r0 + a1*u2 + a2*u1
+ *  b1 = a0*r1 + a1*r0 + a2*u2
+ *  b2 = a0*r2 + a1*r1 + a2*r0
+ *
+ * In order to make u1 fit in 44 bits, we can change these equations
+ * into:
+ *
+ *  b0 = a0*r0 + a1*u2 + a2*t1
+ *  b1 = a0*r1 + a1*r0 + a2*t2
+ *  b2 = a0*r2 + a1*r1 + a2*r0
+ *
+ * Where t1 is u1 truncated to 44 bits, and t2 is u2 added to the extra
+ * bits of u1. Note that since r is clamped down to a 124-bit value, the
+ * values u2 and t2 fit on 44 bits too.
+ *
+ * The bx values are larger than 44 bits, so we may split them into a
+ * lower half (cx, 44 bits) and an upper half (dx). The new values for
+ * the accumulator are then:
+ *
+ *  e0 = c0 + 20*d2
+ *  e1 = c1 + d0
+ *  e2 = c2 + d1
+ *
+ * The equations allow for some room, i.e. the ax values may be larger
+ * than 44 bits. Similarly, the ex values will usually be larger than
+ * the ax. Thus, some sort of carry propagation must be done regularly,
+ * though not necessarily at each iteration. In particular, we do not
+ * need to compute the additions (for the bx values) over 128-bit
+ * quantities; we can stick to 64-bit computations.
+ *
+ *
+ * Since the 128-bit result of a 64x64 multiplication is actually
+ * represented over two 64-bit registers, it is cheaper to arrange for
+ * any split that happens between the "high" and "low" halves to be on
+ * that 64-bit boundary. This is done by left shifting the rx, ux and tx
+ * by 20 bits (since they all fit on 44 bits each, this shift is
+ * always possible).
+ */
+
+static void
+poly1305_inner_big(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+
+#define MX(hi, lo, m0, m1, m2)   do { \
+		uint64_t mxhi, mxlo; \
+		MUL128(mxhi, mxlo, a0, m0); \
+		(hi) = mxhi; \
+		(lo) = mxlo >> 20; \
+		MUL128(mxhi, mxlo, a1, m1); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+		MUL128(mxhi, mxlo, a2, m2); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+	} while (0)
+
+	const unsigned char *buf;
+	uint64_t a0, a1, a2;
+	uint64_t r0, r1, r2, t1, t2, u2;
+
+	r0 = r[0];
+	r1 = r[1];
+	r2 = r[2];
+	t1 = r[3];
+	t2 = r[4];
+	u2 = r[5];
+	a0 = acc[0];
+	a1 = acc[1];
+	a2 = acc[2];
+	buf = data;
+
+	while (len > 0) {
+		uint64_t v0, v1, v2;
+		uint64_t c0, c1, c2, d0, d1, d2;
+
+		v0 = br_dec64le(buf + 0);
+		v1 = br_dec64le(buf + 8);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		v0 = br_dec64le(buf + 16);
+		v1 = br_dec64le(buf + 24);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		v0 = br_dec64le(buf + 32);
+		v1 = br_dec64le(buf + 40);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		v0 = br_dec64le(buf + 48);
+		v1 = br_dec64le(buf + 56);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		a1 += a0 >> 44;
+		a0 &= MASK44;
+		a2 += a1 >> 44;
+		a1 &= MASK44;
+		a0 += 20 * (a2 >> 44);
+		a2 &= MASK44;
+
+		buf += 64;
+		len -= 64;
+	}
+	acc[0] = a0;
+	acc[1] = a1;
+	acc[2] = a2;
+
+#undef MX
+}
+
+static void
+poly1305_inner_small(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint64_t a0, a1, a2;
+	uint64_t r0, r1, r2, t1, t2, u2;
+
+	r0 = r[0];
+	r1 = r[1];
+	r2 = r[2];
+	t1 = r[3];
+	t2 = r[4];
+	u2 = r[5];
+	a0 = acc[0];
+	a1 = acc[1];
+	a2 = acc[2];
+	buf = data;
+
+	while (len > 0) {
+		uint64_t v0, v1, v2;
+		uint64_t c0, c1, c2, d0, d1, d2;
+		unsigned char tmp[16];
+
+		if (len < 16) {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			buf = tmp;
+			len = 16;
+		}
+		v0 = br_dec64le(buf + 0);
+		v1 = br_dec64le(buf + 8);
+
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+
+#define MX(hi, lo, m0, m1, m2)   do { \
+		uint64_t mxhi, mxlo; \
+		MUL128(mxhi, mxlo, a0, m0); \
+		(hi) = mxhi; \
+		(lo) = mxlo >> 20; \
+		MUL128(mxhi, mxlo, a1, m1); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+		MUL128(mxhi, mxlo, a2, m2); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+	} while (0)
+
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+
+#undef MX
+
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		a1 += a0 >> 44;
+		a0 &= MASK44;
+		a2 += a1 >> 44;
+		a1 &= MASK44;
+		a0 += 20 * (a2 >> 44);
+		a2 &= MASK44;
+
+		buf += 16;
+		len -= 16;
+	}
+	acc[0] = a0;
+	acc[1] = a1;
+	acc[2] = a2;
+}
+
+static inline void
+poly1305_inner(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+	if (len >= 64) {
+		size_t len2;
+
+		len2 = len & ~(size_t)63;
+		poly1305_inner_big(acc, r, data, len2);
+		data = (const unsigned char *)data + len2;
+		len -= len2;
+	}
+	if (len > 0) {
+		poly1305_inner_small(acc, r, data, len);
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmulq_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint64_t r[6], acc[3], r0, r1;
+	uint32_t v0, v1, v2, v3, v4;
+	uint64_t w0, w1, w2, w3;
+	uint32_t ctl;
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Apply the "clamping" on r.
+	 */
+	pkey[ 3] &= 0x0F;
+	pkey[ 4] &= 0xFC;
+	pkey[ 7] &= 0x0F;
+	pkey[ 8] &= 0xFC;
+	pkey[11] &= 0x0F;
+	pkey[12] &= 0xFC;
+	pkey[15] &= 0x0F;
+
+	/*
+	 * Decode the 'r' value into 44-bit words, left-shifted by 20 bits.
+	 * Also compute the u1 and u2 values.
+	 */
+	r0 = br_dec64le(pkey +  0);
+	r1 = br_dec64le(pkey +  8);
+	r[0] = r0 << 20;
+	r[1] = ((r0 >> 24) | (r1 << 40)) & ~(uint64_t)0xFFFFF;
+	r[2] = (r1 >> 4) & ~(uint64_t)0xFFFFF;
+	r1 = 20 * (r[1] >> 20);
+	r[3] = r1 << 20;
+	r[5] = 20 * r[2];
+	r[4] = (r[5] + (r1 >> 24)) & ~(uint64_t)0xFFFFF;
+
+	/*
+	 * Accumulator is 0.
+	 */
+	acc[0] = 0;
+	acc[1] = 0;
+	acc[2] = 0;
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner_small(acc, r, foot, sizeof foot);
+
+	/*
+	 * Finalise modular reduction. At that point, the value consists
+	 * in three 44-bit values (the lowest one might be slightly above
+	 * 2^44). Two loops shall be sufficient.
+	 */
+	acc[1] += (acc[0] >> 44);
+	acc[0] &= MASK44;
+	acc[2] += (acc[1] >> 44);
+	acc[1] &= MASK44;
+	acc[0] += 5 * (acc[2] >> 42);
+	acc[2] &= MASK42;
+	acc[1] += (acc[0] >> 44);
+	acc[0] &= MASK44;
+	acc[2] += (acc[1] >> 44);
+	acc[1] &= MASK44;
+	acc[0] += 5 * (acc[2] >> 42);
+	acc[2] &= MASK42;
+
+	/*
+	 * The value may still fall in the 2^130-5..2^130-1 range, in
+	 * which case we must reduce it again. The code below selects,
+	 * in constant-time, between 'acc' and 'acc-p'. We encode the
+	 * value over four 32-bit integers to finish the operation.
+	 */
+	v0 = (uint32_t)acc[0];
+	v1 = (uint32_t)(acc[0] >> 32) | ((uint32_t)acc[1] << 12);
+	v2 = (uint32_t)(acc[1] >> 20) | ((uint32_t)acc[2] << 24);
+	v3 = (uint32_t)(acc[2] >> 8);
+	v4 = (uint32_t)(acc[2] >> 40);
+
+	ctl = GT(v0, 0xFFFFFFFA);
+	ctl &= EQ(v1, 0xFFFFFFFF);
+	ctl &= EQ(v2, 0xFFFFFFFF);
+	ctl &= EQ(v3, 0xFFFFFFFF);
+	ctl &= EQ(v4, 0x00000003);
+	v0 = MUX(ctl, v0 + 5, v0);
+	v1 = MUX(ctl, 0, v1);
+	v2 = MUX(ctl, 0, v2);
+	v3 = MUX(ctl, 0, v3);
+
+	/*
+	 * Add the "s" value. This is done modulo 2^128. Don't forget
+	 * carry propagation...
+	 */
+	w0 = (uint64_t)v0 + (uint64_t)br_dec32le(pkey + 16);
+	w1 = (uint64_t)v1 + (uint64_t)br_dec32le(pkey + 20) + (w0 >> 32);
+	w2 = (uint64_t)v2 + (uint64_t)br_dec32le(pkey + 24) + (w1 >> 32);
+	w3 = (uint64_t)v3 + (uint64_t)br_dec32le(pkey + 28) + (w2 >> 32);
+	v0 = (uint32_t)w0;
+	v1 = (uint32_t)w1;
+	v2 = (uint32_t)w2;
+	v3 = (uint32_t)w3;
+
+	/*
+	 * Encode the tag.
+	 */
+	br_enc32le((unsigned char *)tag +  0, v0);
+	br_enc32le((unsigned char *)tag +  4, v1);
+	br_enc32le((unsigned char *)tag +  8, v2);
+	br_enc32le((unsigned char *)tag + 12, v3);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
+
+/* see bearssl_block.h */
+br_poly1305_run
+br_poly1305_ctmulq_get(void)
+{
+	return &br_poly1305_ctmulq_run;
+}
+
+#else
+
+/* see bearssl_block.h */
+br_poly1305_run
+br_poly1305_ctmulq_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/poly1305_i15.c b/third_party/bearssl/src/poly1305_i15.c
new file mode 100644
index 0000000..6f89212
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_i15.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This is a "reference" implementation of Poly1305 that uses the
+ * generic "i15" code for big integers. It is slow, but it handles all
+ * big-integer operations with generic code, thereby avoiding most
+ * tricky situations with carry propagation and modular reduction.
+ */
+
+/*
+ * Modulus: 2^130-5.
+ */
+static const uint16_t P1305[] = {
+	0x008A,
+	0x7FFB, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x03FF
+};
+
+/*
+ * -p mod 2^15.
+ */
+#define P0I   0x4CCD
+
+/*
+ * R^2 mod p, for conversion to Montgomery representation (R = 2^135,
+ * since we use 9 words of 15 bits each, and 15*9 = 135).
+ */
+static const uint16_t R2[] = {
+	0x008A,
+	0x6400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+/*
+ * Perform the inner processing of blocks for Poly1305. The "r" array
+ * is in Montgomery representation, while the "a" array is not.
+ */
+static void
+poly1305_inner(uint16_t *a, const uint16_t *r, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16], rev[16];
+		uint16_t b[10];
+		uint32_t ctl;
+		int i;
+
+		/*
+		 * If there is a partial block, right-pad it with zeros.
+		 */
+		if (len < 16) {
+			memset(tmp, 0, sizeof tmp);
+			memcpy(tmp, buf, len);
+			buf = tmp;
+			len = 16;
+		}
+
+		/*
+		 * Decode next block and apply the "high bit". Since
+		 * decoding is little-endian, we must byte-swap the buffer.
+		 */
+		for (i = 0; i < 16; i ++) {
+			rev[i] = buf[15 - i];
+		}
+		br_i15_decode_mod(b, rev, sizeof rev, P1305);
+		b[9] |= 0x0100;
+
+		/*
+		 * Add the accumulator to the decoded block (modular
+		 * addition).
+		 */
+		ctl = br_i15_add(b, a, 1);
+		ctl |= NOT(br_i15_sub(b, P1305, 0));
+		br_i15_sub(b, P1305, ctl);
+
+		/*
+		 * Multiply by r, result is the new accumulator value.
+		 */
+		br_i15_montymul(a, b, r, P1305, P0I);
+
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/*
+ * Byteswap a 16-byte value.
+ */
+static void
+byteswap16(unsigned char *buf)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		unsigned x;
+
+		x = buf[i];
+		buf[i] = buf[15 - i];
+		buf[15 - i] = x;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_i15_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint16_t t[10], r[10], acc[10];
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Apply the "clamping" operation on the encoded 'r' value.
+	 */
+	pkey[ 3] &= 0x0F;
+	pkey[ 7] &= 0x0F;
+	pkey[11] &= 0x0F;
+	pkey[15] &= 0x0F;
+	pkey[ 4] &= 0xFC;
+	pkey[ 8] &= 0xFC;
+	pkey[12] &= 0xFC;
+
+	/*
+	 * Decode the clamped 'r' value. Decoding should use little-endian
+	 * so we must byteswap the value first.
+	 */
+	byteswap16(pkey);
+	br_i15_decode_mod(t, pkey, 16, P1305);
+
+	/*
+	 * Convert 'r' to Montgomery representation.
+	 */
+	br_i15_montymul(r, t, R2, P1305, P0I);
+
+	/*
+	 * Accumulator is 0.
+	 */
+	br_i15_zero(acc, 0x8A);
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner(acc, r, foot, sizeof foot);
+
+	/*
+	 * Decode the value 's'. Again, a byteswap is needed.
+	 */
+	byteswap16(pkey + 16);
+	br_i15_decode_mod(t, pkey + 16, 16, P1305);
+
+	/*
+	 * Add the value 's' to the accumulator. That addition is done
+	 * modulo 2^128, so we just ignore the carry.
+	 */
+	br_i15_add(acc, t, 1);
+
+	/*
+	 * Encode the result (128 low bits) to the tag. Encoding should
+	 * be little-endian.
+	 */
+	br_i15_encode(tag, 16, acc);
+	byteswap16(tag);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
diff --git a/third_party/bearssl/src/prf.c b/third_party/bearssl/src/prf.c
new file mode 100644
index 0000000..f04a5fb
--- /dev/null
+++ b/third_party/bearssl/src/prf.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_tls_phash(void *dst, size_t len,
+	const br_hash_class *dig,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	unsigned char *buf;
+	unsigned char tmp[64], a[64];
+	br_hmac_key_context kc;
+	br_hmac_context hc;
+	size_t label_len, hlen, u;
+
+	if (len == 0) {
+		return;
+	}
+	buf = dst;
+	for (label_len = 0; label[label_len]; label_len ++);
+	hlen = br_digest_size(dig);
+	br_hmac_key_init(&kc, dig, secret, secret_len);
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, label, label_len);
+	for (u = 0; u < seed_num; u ++) {
+		br_hmac_update(&hc, seed[u].data, seed[u].len);
+	}
+	br_hmac_out(&hc, a);
+	for (;;) {
+		br_hmac_init(&hc, &kc, 0);
+		br_hmac_update(&hc, a, hlen);
+		br_hmac_update(&hc, label, label_len);
+		for (u = 0; u < seed_num; u ++) {
+			br_hmac_update(&hc, seed[u].data, seed[u].len);
+		}
+		br_hmac_out(&hc, tmp);
+		for (u = 0; u < hlen && u < len; u ++) {
+			buf[u] ^= tmp[u];
+		}
+		buf += u;
+		len -= u;
+		if (len == 0) {
+			return;
+		}
+		br_hmac_init(&hc, &kc, 0);
+		br_hmac_update(&hc, a, hlen);
+		br_hmac_out(&hc, a);
+	}
+}
diff --git a/third_party/bearssl/src/prf_md5sha1.c b/third_party/bearssl/src/prf_md5sha1.c
new file mode 100644
index 0000000..3212833
--- /dev/null
+++ b/third_party/bearssl/src/prf_md5sha1.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_tls10_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	const unsigned char *s1;
+	size_t slen;
+
+	s1 = secret;
+	slen = (secret_len + 1) >> 1;
+	memset(dst, 0, len);
+	br_tls_phash(dst, len, &br_md5_vtable,
+		s1, slen, label, seed_num, seed);
+	br_tls_phash(dst, len, &br_sha1_vtable,
+		s1 + secret_len - slen, slen, label, seed_num, seed);
+}
diff --git a/third_party/bearssl/src/prf_sha256.c b/third_party/bearssl/src/prf_sha256.c
new file mode 100644
index 0000000..76041de
--- /dev/null
+++ b/third_party/bearssl/src/prf_sha256.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_tls12_sha256_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	memset(dst, 0, len);
+	br_tls_phash(dst, len, &br_sha256_vtable,
+		secret, secret_len, label, seed_num, seed);
+}
diff --git a/third_party/bearssl/src/prf_sha384.c b/third_party/bearssl/src/prf_sha384.c
new file mode 100644
index 0000000..c20c4e6
--- /dev/null
+++ b/third_party/bearssl/src/prf_sha384.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_tls12_sha384_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	memset(dst, 0, len);
+	br_tls_phash(dst, len, &br_sha384_vtable,
+		secret, secret_len, label, seed_num, seed);
+}
diff --git a/third_party/bearssl/src/rsa_default_keygen.c b/third_party/bearssl/src/rsa_default_keygen.c
new file mode 100644
index 0000000..f2e83c8
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_keygen.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_keygen
+br_rsa_keygen_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_keygen;
+#elif BR_LOMUL
+	return &br_rsa_i15_keygen;
+#else
+	return &br_rsa_i31_keygen;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_modulus.c b/third_party/bearssl/src/rsa_default_modulus.c
new file mode 100644
index 0000000..57d4be5
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_modulus.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_compute_modulus
+br_rsa_compute_modulus_get_default(void)
+{
+#if BR_LOMUL
+	return &br_rsa_i15_compute_modulus;
+#else
+	return &br_rsa_i31_compute_modulus;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_oaep_decrypt.c b/third_party/bearssl/src/rsa_default_oaep_decrypt.c
new file mode 100644
index 0000000..7345d64
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_oaep_decrypt.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_decrypt
+br_rsa_oaep_decrypt_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_oaep_decrypt;
+#elif BR_LOMUL
+	return &br_rsa_i15_oaep_decrypt;
+#else
+	return &br_rsa_i31_oaep_decrypt;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_oaep_encrypt.c b/third_party/bearssl/src/rsa_default_oaep_encrypt.c
new file mode 100644
index 0000000..ae33fcc
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_oaep_encrypt.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_encrypt
+br_rsa_oaep_encrypt_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_oaep_encrypt;
+#elif BR_LOMUL
+	return &br_rsa_i15_oaep_encrypt;
+#else
+	return &br_rsa_i31_oaep_encrypt;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pkcs1_sign.c b/third_party/bearssl/src/rsa_default_pkcs1_sign.c
new file mode 100644
index 0000000..e926704
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pkcs1_sign.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_sign
+br_rsa_pkcs1_sign_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pkcs1_sign;
+#elif BR_LOMUL
+	return &br_rsa_i15_pkcs1_sign;
+#else
+	return &br_rsa_i31_pkcs1_sign;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c
new file mode 100644
index 0000000..b3dbeb7
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_vrfy
+br_rsa_pkcs1_vrfy_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pkcs1_vrfy;
+#elif BR_LOMUL
+	return &br_rsa_i15_pkcs1_vrfy;
+#else
+	return &br_rsa_i31_pkcs1_vrfy;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_priv.c b/third_party/bearssl/src/rsa_default_priv.c
new file mode 100644
index 0000000..bb0b2c0
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_priv.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_private
+br_rsa_private_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_private;
+#elif BR_LOMUL
+	return &br_rsa_i15_private;
+#else
+	return &br_rsa_i31_private;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_privexp.c b/third_party/bearssl/src/rsa_default_privexp.c
new file mode 100644
index 0000000..cda4555
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_privexp.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_compute_privexp
+br_rsa_compute_privexp_get_default(void)
+{
+#if BR_LOMUL
+	return &br_rsa_i15_compute_privexp;
+#else
+	return &br_rsa_i31_compute_privexp;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pss_sign.c b/third_party/bearssl/src/rsa_default_pss_sign.c
new file mode 100644
index 0000000..ce4f3e0
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pss_sign.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pss_sign
+br_rsa_pss_sign_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pss_sign;
+#elif BR_LOMUL
+	return &br_rsa_i15_pss_sign;
+#else
+	return &br_rsa_i31_pss_sign;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pss_vrfy.c b/third_party/bearssl/src/rsa_default_pss_vrfy.c
new file mode 100644
index 0000000..e3a9ad9
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pss_vrfy.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pss_vrfy
+br_rsa_pss_vrfy_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pss_vrfy;
+#elif BR_LOMUL
+	return &br_rsa_i15_pss_vrfy;
+#else
+	return &br_rsa_i31_pss_vrfy;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pub.c b/third_party/bearssl/src/rsa_default_pub.c
new file mode 100644
index 0000000..a1f03ef
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pub.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_public
+br_rsa_public_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_public;
+#elif BR_LOMUL
+	return &br_rsa_i15_public;
+#else
+	return &br_rsa_i31_public;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pubexp.c b/third_party/bearssl/src/rsa_default_pubexp.c
new file mode 100644
index 0000000..47bc000
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pubexp.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_compute_pubexp
+br_rsa_compute_pubexp_get_default(void)
+{
+#if BR_LOMUL
+	return &br_rsa_i15_compute_pubexp;
+#else
+	return &br_rsa_i31_compute_pubexp;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_i15_keygen.c b/third_party/bearssl/src/rsa_i15_keygen.c
new file mode 100644
index 0000000..e8da419
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_keygen.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Make a random integer of the provided size. The size is encoded.
+ * The header word is untouched.
+ */
+static void
+mkrand(const br_prng_class **rng, uint16_t *x, uint32_t esize)
+{
+	size_t u, len;
+	unsigned m;
+
+	len = (esize + 15) >> 4;
+	(*rng)->generate(rng, x + 1, len * sizeof(uint16_t));
+	for (u = 1; u < len; u ++) {
+		x[u] &= 0x7FFF;
+	}
+	m = esize & 15;
+	if (m == 0) {
+		x[len] &= 0x7FFF;
+	} else {
+		x[len] &= 0x7FFF >> (15 - m);
+	}
+}
+
+/*
+ * This is the big-endian unsigned representation of the product of
+ * all small primes from 13 to 1481.
+ */
+static const unsigned char SMALL_PRIMES[] = {
+	0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,
+	0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,
+	0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,
+	0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,
+	0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,
+	0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,
+	0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,
+	0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,
+	0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,
+	0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,
+	0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,
+	0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,
+	0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,
+	0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,
+	0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,
+	0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,
+	0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,
+	0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,
+	0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,
+	0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,
+	0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,
+	0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,
+	0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,
+	0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,
+	0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,
+	0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD
+};
+
+/*
+ * We need temporary values for at least 7 integers of the same size
+ * as a factor (including header word); more space helps with performance
+ * (in modular exponentiations), but we much prefer to remain under
+ * 2 kilobytes in total, to save stack space. The macro TEMPS below
+ * exceeds 1024 (which is a count in 16-bit words) when BR_MAX_RSA_SIZE
+ * is greater than 4350 (default value is 4096, so the 2-kB limit is
+ * maintained unless BR_MAX_RSA_SIZE was modified).
+ */
+#define MAX(x, y)   ((x) > (y) ? (x) : (y))
+#define TEMPS       MAX(1024, 7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 29) / 15))
+
+/*
+ * Perform trial division on a candidate prime. This computes
+ * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The
+ * br_i15_moddiv() function will report an error if y is not invertible
+ * modulo x. Returned value is 1 on success (none of the small primes
+ * divides x), 0 on error (a non-trivial GCD is obtained).
+ *
+ * This function assumes that x is odd.
+ */
+static uint32_t
+trial_divisions(const uint16_t *x, uint16_t *t)
+{
+	uint16_t *y;
+	uint16_t x0i;
+
+	y = t;
+	t += 1 + ((x[0] + 15) >> 4);
+	x0i = br_i15_ninv15(x[1]);
+	br_i15_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x);
+	return br_i15_moddiv(y, y, x, x0i, t);
+}
+
+/*
+ * Perform n rounds of Miller-Rabin on the candidate prime x. This
+ * function assumes that x = 3 mod 4.
+ *
+ * Returned value is 1 on success (all rounds completed successfully),
+ * 0 otherwise.
+ */
+static uint32_t
+miller_rabin(const br_prng_class **rng, const uint16_t *x, int n,
+	uint16_t *t, size_t tlen)
+{
+	/*
+	 * Since x = 3 mod 4, the Miller-Rabin test is simple:
+	 *  - get a random base a (such that 1 < a < x-1)
+	 *  - compute z = a^((x-1)/2) mod x
+	 *  - if z != 1 and z != x-1, the number x is composite
+	 *
+	 * We generate bases 'a' randomly with a size which is
+	 * one bit less than x, which ensures that a < x-1. It
+	 * is not useful to verify that a > 1 because the probability
+	 * that we get a value a equal to 0 or 1 is much smaller
+	 * than the probability of our Miller-Rabin tests not to
+	 * detect a composite, which is already quite smaller than the
+	 * probability of the hardware misbehaving and return a
+	 * composite integer because of some glitch (e.g. bad RAM
+	 * or ill-timed cosmic ray).
+	 */
+	unsigned char *xm1d2;
+	size_t xlen, xm1d2_len, xm1d2_len_u16, u;
+	uint32_t asize;
+	unsigned cc;
+	uint16_t x0i;
+
+	/*
+	 * Compute (x-1)/2 (encoded).
+	 */
+	xm1d2 = (unsigned char *)t;
+	xm1d2_len = ((x[0] - (x[0] >> 4)) + 7) >> 3;
+	br_i15_encode(xm1d2, xm1d2_len, x);
+	cc = 0;
+	for (u = 0; u < xm1d2_len; u ++) {
+		unsigned w;
+
+		w = xm1d2[u];
+		xm1d2[u] = (unsigned char)((w >> 1) | cc);
+		cc = w << 7;
+	}
+
+	/*
+	 * We used some words of the provided buffer for (x-1)/2.
+	 */
+	xm1d2_len_u16 = (xm1d2_len + 1) >> 1;
+	t += xm1d2_len_u16;
+	tlen -= xm1d2_len_u16;
+
+	xlen = (x[0] + 15) >> 4;
+	asize = x[0] - 1 - EQ0(x[0] & 15);
+	x0i = br_i15_ninv15(x[1]);
+	while (n -- > 0) {
+		uint16_t *a;
+		uint32_t eq1, eqm1;
+
+		/*
+		 * Generate a random base. We don't need the base to be
+		 * really uniform modulo x, so we just get a random
+		 * number which is one bit shorter than x.
+		 */
+		a = t;
+		a[0] = x[0];
+		a[xlen] = 0;
+		mkrand(rng, a, asize);
+
+		/*
+		 * Compute a^((x-1)/2) mod x. We assume here that the
+		 * function will not fail (the temporary array is large
+		 * enough).
+		 */
+		br_i15_modpow_opt(a, xm1d2, xm1d2_len,
+			x, x0i, t + 1 + xlen, tlen - 1 - xlen);
+
+		/*
+		 * We must obtain either 1 or x-1. Note that x is odd,
+		 * hence x-1 differs from x only in its low word (no
+		 * carry).
+		 */
+		eq1 = a[1] ^ 1;
+		eqm1 = a[1] ^ (x[1] - 1);
+		for (u = 2; u <= xlen; u ++) {
+			eq1 |= a[u];
+			eqm1 |= a[u] ^ x[u];
+		}
+
+		if ((EQ0(eq1) | EQ0(eqm1)) == 0) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*
+ * Create a random prime of the provided size. 'size' is the _encoded_
+ * bit length. The two top bits and the two bottom bits are set to 1.
+ */
+static void
+mkprime(const br_prng_class **rng, uint16_t *x, uint32_t esize,
+	uint32_t pubexp, uint16_t *t, size_t tlen)
+{
+	size_t len;
+
+	x[0] = esize;
+	len = (esize + 15) >> 4;
+	for (;;) {
+		size_t u;
+		uint32_t m3, m5, m7, m11;
+		int rounds;
+
+		/*
+		 * Generate random bits. We force the two top bits and the
+		 * two bottom bits to 1.
+		 */
+		mkrand(rng, x, esize);
+		if ((esize & 15) == 0) {
+			x[len] |= 0x6000;
+		} else if ((esize & 15) == 1) {
+			x[len] |= 0x0001;
+			x[len - 1] |= 0x4000;
+		} else {
+			x[len] |= 0x0003 << ((esize & 15) - 2);
+		}
+		x[1] |= 0x0003;
+
+		/*
+		 * Trial division with low primes (3, 5, 7 and 11). We
+		 * use the following properties:
+		 *
+		 *   2^2 = 1 mod 3
+		 *   2^4 = 1 mod 5
+		 *   2^3 = 1 mod 7
+		 *   2^10 = 1 mod 11
+		 */
+		m3 = 0;
+		m5 = 0;
+		m7 = 0;
+		m11 = 0;
+		for (u = 0; u < len; u ++) {
+			uint32_t w;
+
+			w = x[1 + u];
+			m3 += w << (u & 1);
+			m3 = (m3 & 0xFF) + (m3 >> 8);
+			m5 += w << ((4 - u) & 3);
+			m5 = (m5 & 0xFF) + (m5 >> 8);
+			m7 += w;
+			m7 = (m7 & 0x1FF) + (m7 >> 9);
+			m11 += w << (5 & -(u & 1));
+			m11 = (m11 & 0x3FF) + (m11 >> 10);
+		}
+
+		/*
+		 * Maximum values of m* at this point:
+		 *  m3:   511
+		 *  m5:   2310
+		 *  m7:   510
+		 *  m11:  2047
+		 * We use the same properties to make further reductions.
+		 */
+
+		m3 = (m3 & 0x0F) + (m3 >> 4);      /* max: 46 */
+		m3 = (m3 & 0x0F) + (m3 >> 4);      /* max: 16 */
+		m3 = ((m3 * 43) >> 5) & 3;
+
+		m5 = (m5 & 0xFF) + (m5 >> 8);      /* max: 263 */
+		m5 = (m5 & 0x0F) + (m5 >> 4);      /* max: 30 */
+		m5 = (m5 & 0x0F) + (m5 >> 4);      /* max: 15 */
+		m5 -= 10 & -GT(m5, 9);
+		m5 -= 5 & -GT(m5, 4);
+
+		m7 = (m7 & 0x3F) + (m7 >> 6);      /* max: 69 */
+		m7 = (m7 & 7) + (m7 >> 3);         /* max: 14 */
+		m7 = ((m7 * 147) >> 7) & 7;
+
+		/*
+		 * 2^5 = 32 = -1 mod 11.
+		 */
+		m11 = (m11 & 0x1F) + 66 - (m11 >> 5);   /* max: 97 */
+		m11 -= 88 & -GT(m11, 87);
+		m11 -= 44 & -GT(m11, 43);
+		m11 -= 22 & -GT(m11, 21);
+		m11 -= 11 & -GT(m11, 10);
+
+		/*
+		 * If any of these modulo is 0, then the candidate is
+		 * not prime. Also, if pubexp is 3, 5, 7 or 11, and the
+		 * corresponding modulus is 1, then the candidate must
+		 * be rejected, because we need e to be invertible
+		 * modulo p-1. We can use simple comparisons here
+		 * because they won't leak information on a candidate
+		 * that we keep, only on one that we reject (and is thus
+		 * not secret).
+		 */
+		if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) {
+			continue;
+		}
+		if ((pubexp == 3 && m3 == 1)
+			|| (pubexp == 5 && m5 == 1)
+			|| (pubexp == 7 && m7 == 1)
+			|| (pubexp == 11 && m11 == 1))
+		{
+			continue;
+		}
+
+		/*
+		 * More trial divisions.
+		 */
+		if (!trial_divisions(x, t)) {
+			continue;
+		}
+
+		/*
+		 * Miller-Rabin algorithm. Since we selected a random
+		 * integer, not a maliciously crafted integer, we can use
+		 * relatively few rounds to lower the risk of a false
+		 * positive (i.e. declaring prime a non-prime) under
+		 * 2^(-80). It is not useful to lower the probability much
+		 * below that, since that would be substantially below
+		 * the probability of the hardware misbehaving. Sufficient
+		 * numbers of rounds are extracted from the Handbook of
+		 * Applied Cryptography, note 4.49 (page 149).
+		 *
+		 * Since we work on the encoded size (esize), we need to
+		 * compare with encoded thresholds.
+		 */
+		if (esize < 320) {
+			rounds = 12;
+		} else if (esize < 480) {
+			rounds = 9;
+		} else if (esize < 693) {
+			rounds = 6;
+		} else if (esize < 906) {
+			rounds = 4;
+		} else if (esize < 1386) {
+			rounds = 3;
+		} else {
+			rounds = 2;
+		}
+
+		if (miller_rabin(rng, x, rounds, t, tlen)) {
+			return;
+		}
+	}
+}
+
+/*
+ * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided
+ * as parameter (with announced bit length equal to that of p). This
+ * function computes d = 1/e mod p-1 (for an odd integer e). Returned
+ * value is 1 on success, 0 on error (an error is reported if e is not
+ * invertible modulo p-1).
+ *
+ * The temporary buffer (t) must have room for at least 4 integers of
+ * the size of p.
+ */
+static uint32_t
+invert_pubexp(uint16_t *d, const uint16_t *m, uint32_t e, uint16_t *t)
+{
+	uint16_t *f;
+	uint32_t r;
+
+	f = t;
+	t += 1 + ((m[0] + 15) >> 4);
+
+	/*
+	 * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.
+	 */
+	br_i15_zero(d, m[0]);
+	d[1] = 1;
+	br_i15_zero(f, m[0]);
+	f[1] = e & 0x7FFF;
+	f[2] = (e >> 15) & 0x7FFF;
+	f[3] = e >> 30;
+	r = br_i15_moddiv(d, f, m, br_i15_ninv15(m[1]), t);
+
+	/*
+	 * We really want d = 1/e mod p-1, with p = 2m. By the CRT,
+	 * the result is either the d we got, or d + m.
+	 *
+	 * Let's write e*d = 1 + k*m, for some integer k. Integers e
+	 * and m are odd. If d is odd, then e*d is odd, which implies
+	 * that k must be even; in that case, e*d = 1 + (k/2)*2m, and
+	 * thus d is already fine. Conversely, if d is even, then k
+	 * is odd, and we must add m to d in order to get the correct
+	 * result.
+	 */
+	br_i15_add(d, m, (uint32_t)(1 - (d[1] & 1)));
+
+	return r;
+}
+
+/*
+ * Swap two buffers in RAM. They must be disjoint.
+ */
+static void
+bufswap(void *b1, void *b2, size_t len)
+{
+	size_t u;
+	unsigned char *buf1, *buf2;
+
+	buf1 = b1;
+	buf2 = b2;
+	for (u = 0; u < len; u ++) {
+		unsigned w;
+
+		w = buf1[u];
+		buf1[u] = buf2[u];
+		buf2[u] = w;
+	}
+}
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_keygen(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp)
+{
+	uint32_t esize_p, esize_q;
+	size_t plen, qlen, tlen;
+	uint16_t *p, *q, *t;
+	uint16_t tmp[TEMPS];
+	uint32_t r;
+
+	if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) {
+		return 0;
+	}
+	if (pubexp == 0) {
+		pubexp = 3;
+	} else if (pubexp == 1 || (pubexp & 1) == 0) {
+		return 0;
+	}
+
+	esize_p = (size + 1) >> 1;
+	esize_q = size - esize_p;
+	sk->n_bitlen = size;
+	sk->p = kbuf_priv;
+	sk->plen = (esize_p + 7) >> 3;
+	sk->q = sk->p + sk->plen;
+	sk->qlen = (esize_q + 7) >> 3;
+	sk->dp = sk->q + sk->qlen;
+	sk->dplen = sk->plen;
+	sk->dq = sk->dp + sk->dplen;
+	sk->dqlen = sk->qlen;
+	sk->iq = sk->dq + sk->dqlen;
+	sk->iqlen = sk->plen;
+
+	if (pk != NULL) {
+		pk->n = kbuf_pub;
+		pk->nlen = (size + 7) >> 3;
+		pk->e = pk->n + pk->nlen;
+		pk->elen = 4;
+		br_enc32be(pk->e, pubexp);
+		while (*pk->e == 0) {
+			pk->e ++;
+			pk->elen --;
+		}
+	}
+
+	/*
+	 * We now switch to encoded sizes.
+	 *
+	 * floor((x * 17477) / (2^18)) is equal to floor(x/15) for all
+	 * integers x from 0 to 23833.
+	 */
+	esize_p += MUL15(esize_p, 17477) >> 18;
+	esize_q += MUL15(esize_q, 17477) >> 18;
+	plen = (esize_p + 15) >> 4;
+	qlen = (esize_q + 15) >> 4;
+	p = tmp;
+	q = p + 1 + plen;
+	t = q + 1 + qlen;
+	tlen = ((sizeof tmp) / sizeof(uint16_t)) - (2 + plen + qlen);
+
+	/*
+	 * When looking for primes p and q, we temporarily divide
+	 * candidates by 2, in order to compute the inverse of the
+	 * public exponent.
+	 */
+
+	for (;;) {
+		mkprime(rng, p, esize_p, pubexp, t, tlen);
+		br_i15_rshift(p, 1);
+		if (invert_pubexp(t, p, pubexp, t + 1 + plen)) {
+			br_i15_add(p, p, 1);
+			p[1] |= 1;
+			br_i15_encode(sk->p, sk->plen, p);
+			br_i15_encode(sk->dp, sk->dplen, t);
+			break;
+		}
+	}
+
+	for (;;) {
+		mkprime(rng, q, esize_q, pubexp, t, tlen);
+		br_i15_rshift(q, 1);
+		if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) {
+			br_i15_add(q, q, 1);
+			q[1] |= 1;
+			br_i15_encode(sk->q, sk->qlen, q);
+			br_i15_encode(sk->dq, sk->dqlen, t);
+			break;
+		}
+	}
+
+	/*
+	 * If p and q have the same size, then it is possible that q > p
+	 * (when the target modulus size is odd, we generate p with a
+	 * greater bit length than q). If q > p, we want to swap p and q
+	 * (and also dp and dq) for two reasons:
+	 *  - The final step below (inversion of q modulo p) is easier if
+	 *    p > q.
+	 *  - While BearSSL's RSA code is perfectly happy with RSA keys such
+	 *    that p < q, some other implementations have restrictions and
+	 *    require p > q.
+	 *
+	 * Note that we can do a simple non-constant-time swap here,
+	 * because the only information we leak here is that we insist on
+	 * returning p and q such that p > q, which is not a secret.
+	 */
+	if (esize_p == esize_q && br_i15_sub(p, q, 0) == 1) {
+		bufswap(p, q, (1 + plen) * sizeof *p);
+		bufswap(sk->p, sk->q, sk->plen);
+		bufswap(sk->dp, sk->dq, sk->dplen);
+	}
+
+	/*
+	 * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.
+	 *
+	 * We ensured that p >= q, so this is just a matter of updating the
+	 * header word for q (and possibly adding an extra word).
+	 *
+	 * Theoretically, the call below may fail, in case we were
+	 * extraordinarily unlucky, and p = q. Another failure case is if
+	 * Miller-Rabin failed us _twice_, and p and q are non-prime and
+	 * have a factor is common. We report the error mostly because it
+	 * is cheap and we can, but in practice this never happens (or, at
+	 * least, it happens way less often than hardware glitches).
+	 */
+	q[0] = p[0];
+	if (plen > qlen) {
+		q[plen] = 0;
+		t ++;
+		tlen --;
+	}
+	br_i15_zero(t, p[0]);
+	t[1] = 1;
+	r = br_i15_moddiv(t, q, p, br_i15_ninv15(p[1]), t + 1 + plen);
+	br_i15_encode(sk->iq, sk->iqlen, t);
+
+	/*
+	 * Compute the public modulus too, if required.
+	 */
+	if (pk != NULL) {
+		br_i15_zero(t, p[0]);
+		br_i15_mulacc(t, p, q);
+		br_i15_encode(pk->n, pk->nlen, t);
+	}
+
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_modulus.c b/third_party/bearssl/src/rsa_i15_modulus.c
new file mode 100644
index 0000000..16458c3
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_modulus.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i15_compute_modulus(void *n, const br_rsa_private_key *sk)
+{
+	uint16_t tmp[4 * (((BR_MAX_RSA_SIZE / 2) + 14) / 15) + 5];
+	uint16_t *t, *p, *q;
+	const unsigned char *pbuf, *qbuf;
+	size_t nlen, plen, qlen, tlen;
+
+	/*
+	 * Compute actual byte and lengths for p and q.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+
+	t = tmp;
+	tlen = (sizeof tmp) / (sizeof tmp[0]);
+
+	/*
+	 * Decode p.
+	 */
+	if ((15 * tlen) < (plen << 3) + 15) {
+		return 0;
+	}
+	br_i15_decode(t, pbuf, plen);
+	p = t;
+	plen = (p[0] + 31) >> 4;
+	t += plen;
+	tlen -= plen;
+
+	/*
+	 * Decode q.
+	 */
+	if ((15 * tlen) < (qlen << 3) + 15) {
+		return 0;
+	}
+	br_i15_decode(t, qbuf, qlen);
+	q = t;
+	qlen = (q[0] + 31) >> 4;
+	t += qlen;
+	tlen -= qlen;
+
+	/*
+	 * Computation can proceed only if we have enough room for the
+	 * modulus.
+	 */
+	if (tlen < (plen + qlen + 1)) {
+		return 0;
+	}
+
+	/*
+	 * Private key already contains the modulus bit length, from which
+	 * we can infer the output length. Even if n is NULL, we still had
+	 * to decode p and q to make sure that the product can be computed.
+	 */
+	nlen = (sk->n_bitlen + 7) >> 3;
+	if (n != NULL) {
+		br_i15_zero(t, p[0]);
+		br_i15_mulacc(t, p, q);
+		br_i15_encode(n, nlen, t);
+	}
+	return nlen;
+}
diff --git a/third_party/bearssl/src/rsa_i15_oaep_decrypt.c b/third_party/bearssl/src/rsa_i15_oaep_decrypt.c
new file mode 100644
index 0000000..927eecd
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_oaep_decrypt.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i15_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_oaep_encrypt.c b/third_party/bearssl/src/rsa_i15_oaep_encrypt.c
new file mode 100644
index 0000000..b9a6cfa
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_oaep_encrypt.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i15_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i15_public(dst, dlen, pk);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pkcs1_sign.c b/third_party/bearssl/src/rsa_i15_pkcs1_sign.c
new file mode 100644
index 0000000..f519423
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i15_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c
new file mode 100644
index 0000000..2c35184
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i15_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/third_party/bearssl/src/rsa_i15_priv.c b/third_party/bearssl/src/rsa_i15_priv.c
new file mode 100644
index 0000000..177cc3a
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_priv.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U      (2 + ((BR_MAX_RSA_FACTOR + 14) / 15))
+#define TLEN   (8 * U)
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	size_t fwlen;
+	uint16_t p0i, q0i;
+	size_t xlen, u;
+	uint16_t tmp[1 + TLEN];
+	long z;
+	uint16_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t r;
+
+	/*
+	 * Compute the actual lengths of p and q, in bytes.
+	 * These lengths are not considered secret (we cannot really hide
+	 * them anyway in constant-time code).
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+
+	/*
+	 * Compute the maximum factor length, in words.
+	 */
+	z = (long)(plen > qlen ? plen : qlen) << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 15;
+		fwlen ++;
+	}
+	/*
+	 * Round up the word length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * We need to fit at least 6 values in the stack buffer.
+	 */
+	if (6 * fwlen > TLEN) {
+		return 0;
+	}
+
+	/*
+	 * Compute signature length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Ensure 32-bit alignment for value words.
+	 */
+	mq = tmp;
+	if (((uintptr_t)mq & 2) == 0) {
+		mq ++;
+	}
+
+	/*
+	 * Decode q.
+	 */
+	br_i15_decode(mq, q, qlen);
+
+	/*
+	 * Decode p.
+	 */
+	t1 = mq + fwlen;
+	br_i15_decode(t1, p, plen);
+
+	/*
+	 * Compute the modulus (product of the two factors), to compare
+	 * it with the source value. We use br_i15_mulacc(), since it's
+	 * already used later on.
+	 */
+	t2 = mq + 2 * fwlen;
+	br_i15_zero(t2, mq[0]);
+	br_i15_mulacc(t2, mq, t1);
+
+	/*
+	 * We encode the modulus into bytes, to perform the comparison
+	 * with bytes. We know that the product length, in bytes, is
+	 * exactly xlen.
+	 * The comparison actually computes the carry when subtracting
+	 * the modulus from the source value; that carry must be 1 for
+	 * a value in the correct range. We keep it in r, which is our
+	 * accumulator for the error code.
+	 */
+	t3 = mq + 4 * fwlen;
+	br_i15_encode(t3, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)t3)[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Move the decoded p to another temporary buffer.
+	 */
+	mp = mq + 2 * fwlen;
+	memmove(mp, t1, fwlen * sizeof *t1);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i15_ninv15(mq[1]);
+	s2 = mq + fwlen;
+	br_i15_decode_reduce(s2, x, xlen, mq);
+	r &= br_i15_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+		mq + 3 * fwlen, TLEN - 3 * fwlen);
+
+	/*
+	 * Compute s1 = x^dq mod q.
+	 */
+	p0i = br_i15_ninv15(mp[1]);
+	s1 = mq + 3 * fwlen;
+	br_i15_decode_reduce(s1, x, xlen, mp);
+	r &= br_i15_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+		mq + 4 * fwlen, TLEN - 4 * fwlen);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i15_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	t1 = mq + 4 * fwlen;
+	t2 = mq + 5 * fwlen;
+	br_i15_reduce(t2, s2, mp);
+	br_i15_add(s1, mp, br_i15_sub(s1, t2, 1));
+	br_i15_to_monty(s1, mp);
+	br_i15_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i15_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed, so we can
+	 * reuse them for t3. Moreover, the first step of the computation
+	 * is to copy s2 into t3, after which s2 is not needed. Right
+	 * now, mq is in slot 0, s2 is in slot 1, and t2 in slot 5.
+	 * Therefore, we have ample room for t3 by simply using s2.
+	 */
+	t3 = s2;
+	br_i15_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i15_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_privexp.c b/third_party/bearssl/src/rsa_i15_privexp.c
new file mode 100644
index 0000000..57d6918
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_privexp.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i15_compute_privexp(void *d,
+	const br_rsa_private_key *sk, uint32_t e)
+{
+	/*
+	 * We want to invert e modulo phi = (p-1)(q-1). This first
+	 * requires computing phi, which is easy since we have the factors
+	 * p and q in the private key structure.
+	 *
+	 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
+	 * We could invert e modulo phi/4 then patch the result to
+	 * modulo phi, but this would involve assembling three modulus-wide
+	 * values (phi/4, 1 and e) and calling moddiv, that requires
+	 * three more temporaries, for a total of six big integers, or
+	 * slightly more than 3 kB of stack space for RSA-4096. This
+	 * exceeds our stack requirements.
+	 *
+	 * Instead, we first use one step of the extended GCD:
+	 *
+	 *   - We compute phi = k*e + r  (Euclidean division of phi by e).
+	 *     If public exponent e is correct, then r != 0 (e must be
+	 *     invertible modulo phi). We also have k != 0 since we
+	 *     enforce non-ridiculously-small factors.
+	 *
+	 *   - We find small u, v such that u*e - v*r = 1  (using a
+	 *     binary GCD; we can arrange for u < r and v < e, i.e. all
+	 *     values fit on 32 bits).
+	 *
+	 *   - Solution is: d = u + v*k
+	 *     This last computation is exact: since u < r and v < e,
+	 *     the above implies d < r + e*((phi-r)/e) = phi
+	 */
+
+	uint16_t tmp[4 * ((BR_MAX_RSA_FACTOR + 14) / 15) + 12];
+	uint16_t *p, *q, *k, *m, *z, *phi;
+	const unsigned char *pbuf, *qbuf;
+	size_t plen, qlen, u, len, dlen;
+	uint32_t r, a, b, u0, v0, u1, v1, he, hr;
+	int i;
+
+	/*
+	 * Check that e is correct.
+	 */
+	if (e < 3 || (e & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Check lengths of p and q, and that they are both odd.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
+		|| (pbuf[plen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+	if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
+		|| (qbuf[qlen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+
+	/*
+	 * Output length is that of the modulus.
+	 */
+	dlen = (sk->n_bitlen + 7) >> 3;
+	if (d == NULL) {
+		return dlen;
+	}
+
+	p = tmp;
+	br_i15_decode(p, pbuf, plen);
+	plen = (p[0] + 15) >> 4;
+	q = p + 1 + plen;
+	br_i15_decode(q, qbuf, qlen);
+	qlen = (q[0] + 15) >> 4;
+
+	/*
+	 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
+	 * we do not need anymore). The mulacc function sets the announced
+	 * bit length of t to be the sum of the announced bit lengths of
+	 * p-1 and q-1, which is usually exact but may overshoot by one 1
+	 * bit in some cases; we readjust it to its true length.
+	 */
+	p[1] --;
+	q[1] --;
+	phi = q + 1 + qlen;
+	br_i15_zero(phi, p[0]);
+	br_i15_mulacc(phi, p, q);
+	len = (phi[0] + 15) >> 4;
+	memmove(tmp, phi, (1 + len) * sizeof *phi);
+	phi = tmp;
+	phi[0] = br_i15_bit_length(phi + 1, len);
+	len = (phi[0] + 15) >> 4;
+
+	/*
+	 * Divide phi by public exponent e. The final remainder r must be
+	 * non-zero (otherwise, the key is invalid). The quotient is k,
+	 * which we write over phi, since we don't need phi after that.
+	 */
+	r = 0;
+	for (u = len; u >= 1; u --) {
+		/*
+		 * Upon entry, r < e, and phi[u] < 2^15; hence,
+		 * hi:lo < e*2^15. Thus, the produced word k[u]
+		 * must be lower than 2^15, and the new remainder r
+		 * is lower than e.
+		 */
+		uint32_t hi, lo;
+
+		hi = r >> 17;
+		lo = (r << 15) + phi[u];
+		phi[u] = br_divrem(hi, lo, e, &r);
+	}
+	if (r == 0) {
+		return 0;
+	}
+	k = phi;
+
+	/*
+	 * Compute u and v such that u*e - v*r = GCD(e,r). We use
+	 * a binary GCD algorithm, with 6 extra integers a, b,
+	 * u0, u1, v0 and v1. Initial values are:
+	 *   a = e    u0 = 1   v0 = 0
+	 *   b = r    u1 = r   v1 = e-1
+	 * The following invariants are maintained:
+	 *   a = u0*e - v0*r
+	 *   b = u1*e - v1*r
+	 *   0 < a <= e
+	 *   0 < b <= r
+	 *   0 <= u0 <= r
+	 *   0 <= v0 <= e
+	 *   0 <= u1 <= r
+	 *   0 <= v1 <= e
+	 *
+	 * At each iteration, we reduce either a or b by one bit, and
+	 * adjust u0, u1, v0 and v1 to maintain the invariants:
+	 *  - if a is even, then a <- a/2
+	 *  - otherwise, if b is even, then b <- b/2
+	 *  - otherwise, if a > b, then a <- (a-b)/2
+	 *  - otherwise, if b > a, then b <- (b-a)/2
+	 * Algorithm stops when a = b. At that point, the common value
+	 * is the GCD of e and r; it must be 1 (otherwise, the private
+	 * key or public exponent is not valid). The (u0,v0) or (u1,v1)
+	 * pairs are the solution we are looking for.
+	 *
+	 * Since either a or b is reduced by at least 1 bit at each
+	 * iteration, 62 iterations are enough to reach the end
+	 * condition.
+	 *
+	 * To maintain the invariants, we must compute the same operations
+	 * on the u* and v* values that we do on a and b:
+	 *  - When a is divided by 2, u0 and v0 must be divided by 2.
+	 *  - When b is divided by 2, u1 and v1 must be divided by 2.
+	 *  - When b is subtracted from a, u1 and v1 are subtracted from
+	 *    u0 and v0, respectively.
+	 *  - When a is subtracted from b, u0 and v0 are subtracted from
+	 *    u1 and v1, respectively.
+	 *
+	 * However, we want to keep the u* and v* values in their proper
+	 * ranges. The following remarks apply:
+	 *
+	 *  - When a is divided by 2, then a is even. Therefore:
+	 *
+	 *     * If r is odd, then u0 and v0 must have the same parity;
+	 *       if they are both odd, then adding r to u0 and e to v0
+	 *       makes them both even, and the division by 2 brings them
+	 *       back to the proper range.
+	 *
+	 *     * If r is even, then u0 must be even; if v0 is odd, then
+	 *       adding r to u0 and e to v0 makes them both even, and the
+	 *       division by 2 brings them back to the proper range.
+	 *
+	 *    Thus, all we need to do is to look at the parity of v0,
+	 *    and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
+	 *    a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
+	 *    division (r+1 does not overflow since r < e; and (e/2)+1
+	 *    is equal to (e+1)/2 since e is odd).
+	 *
+	 *  - When we subtract b from a, three cases may occur:
+	 *
+	 *     * u1 <= u0 and v1 <= v0: just do the subtractions
+	 *
+	 *     * u1 > u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *     * u1 <= u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *    The fourth case (u1 > u0 and v1 <= v0) is not possible
+	 *    because it would contradict "b < a" (which is the reason
+	 *    why we subtract b from a).
+	 *
+	 *    The tricky case is the third one: from the equations, it
+	 *    seems that u0 may go out of range. However, the invariants
+	 *    and ranges of other values imply that, in that case, the
+	 *    new u0 does not actually exceed the range.
+	 *
+	 *    We can thus handle the subtraction by adding (r,e) based
+	 *    solely on the comparison between v0 and v1.
+	 */
+	a = e;
+	b = r;
+	u0 = 1;
+	v0 = 0;
+	u1 = r;
+	v1 = e - 1;
+	hr = (r + 1) >> 1;
+	he = (e >> 1) + 1;
+	for (i = 0; i < 62; i ++) {
+		uint32_t oa, ob, agtb, bgta;
+		uint32_t sab, sba, da, db;
+		uint32_t ctl;
+
+		oa = a & 1;                  /* 1 if a is odd */
+		ob = b & 1;                  /* 1 if b is odd */
+		agtb = GT(a, b);             /* 1 if a > b */
+		bgta = GT(b, a);             /* 1 if b > a */
+
+		sab = oa & ob & agtb;        /* 1 if a <- a-b */
+		sba = oa & ob & bgta;        /* 1 if b <- b-a */
+
+		/* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
+		ctl = GT(v1, v0);
+		a -= b & -sab;
+		u0 -= (u1 - (r & -ctl)) & -sab;
+		v0 -= (v1 - (e & -ctl)) & -sab;
+
+		/* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
+		ctl = GT(v0, v1);
+		b -= a & -sba;
+		u1 -= (u0 - (r & -ctl)) & -sba;
+		v1 -= (v0 - (e & -ctl)) & -sba;
+
+		da = NOT(oa) | sab;          /* 1 if a <- a/2 */
+		db = (oa & NOT(ob)) | sba;   /* 1 if b <- b/2 */
+
+		/* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
+		ctl = v0 & 1;
+		a ^= (a ^ (a >> 1)) & -da;
+		u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
+		v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
+
+		/* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
+		ctl = v1 & 1;
+		b ^= (b ^ (b >> 1)) & -db;
+		u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
+		v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
+	}
+
+	/*
+	 * Check that the GCD is indeed 1. If not, then the key is invalid
+	 * (and there's no harm in leaking that piece of information).
+	 */
+	if (a != 1) {
+		return 0;
+	}
+
+	/*
+	 * Now we have u0*e - v0*r = 1. Let's compute the result as:
+	 *   d = u0 + v0*k
+	 * We still have k in the tmp[] array, and its announced bit
+	 * length is that of phi.
+	 */
+	m = k + 1 + len;
+	m[0] = (2 << 4) + 2;  /* bit length is 32 bits, encoded */
+	m[1] = v0 & 0x7FFF;
+	m[2] = (v0 >> 15) & 0x7FFF;
+	m[3] = v0 >> 30;
+	z = m + 4;
+	br_i15_zero(z, k[0]);
+	z[1] = u0 & 0x7FFF;
+	z[2] = (u0 >> 15) & 0x7FFF;
+	z[3] = u0 >> 30;
+	br_i15_mulacc(z, k, m);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i15_encode(d, dlen, z);
+	return dlen;
+}
diff --git a/third_party/bearssl/src/rsa_i15_pss_sign.c b/third_party/bearssl/src/rsa_i15_pss_sign.c
new file mode 100644
index 0000000..dd9385b
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pss_sign.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i15_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pss_vrfy.c b/third_party/bearssl/src/rsa_i15_pss_vrfy.c
new file mode 100644
index 0000000..7d9f2cb
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pss_vrfy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i15_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pub.c b/third_party/bearssl/src/rsa_i15_pub.c
new file mode 100644
index 0000000..9eab5e8
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pub.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * As a strict minimum, we need four buffers that can hold a
+ * modular integer.
+ */
+#define TLEN   (4 * (2 + ((BR_MAX_RSA_SIZE + 14) / 15)))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint16_t tmp[1 + TLEN];
+	uint16_t *m, *a, *t;
+	size_t fwlen;
+	long z;
+	uint16_t m0i;
+	uint32_t r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	z = (long)nlen << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 15;
+		fwlen ++;
+	}
+	/*
+	 * Round up length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * The modulus gets decoded into m[].
+	 * The value to exponentiate goes into a[].
+	 * The temporaries for modular exponentiations are in t[].
+	 *
+	 * We want the first value word of each integer to be aligned
+	 * on a 32-bit boundary.
+	 */
+	m = tmp;
+	if (((uintptr_t)m & 2) == 0) {
+		m ++;
+	}
+	a = m + fwlen;
+	t = m + 2 * fwlen;
+
+	/*
+	 * Decode the modulus.
+	 */
+	br_i15_decode(m, n, nlen);
+	m0i = br_i15_ninv15(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i15_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i15_modpow_opt(a, pk->e, pk->elen, m, m0i, t, TLEN - 2 * fwlen);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i15_encode(x, xlen, a);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_pubexp.c b/third_party/bearssl/src/rsa_i15_pubexp.c
new file mode 100644
index 0000000..803bff7
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pubexp.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Recompute public exponent, based on factor p and reduced private
+ * exponent dp.
+ */
+static uint32_t
+get_pubexp(const unsigned char *pbuf, size_t plen,
+	const unsigned char *dpbuf, size_t dplen)
+{
+	/*
+	 * dp is the inverse of e modulo p-1. If p = 3 mod 4, then
+	 * p-1 = 2*((p-1)/2). Taken modulo 2, e is odd and has inverse 1;
+	 * thus, dp must be odd.
+	 *
+	 * We compute the inverse of dp modulo (p-1)/2. This requires
+	 * first reducing dp modulo (p-1)/2 (this can be done with a
+	 * conditional subtract, no need to use the generic modular
+	 * reduction function); then, we use moddiv.
+	 */
+
+	uint16_t tmp[6 * ((BR_MAX_RSA_FACTOR + 29) / 15)];
+	uint16_t *p, *dp, *x;
+	size_t len;
+	uint32_t e;
+
+	/*
+	 * Compute actual factor length (in bytes) and check that it fits
+	 * under our size constraints.
+	 */
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen == 0 || plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)) {
+		return 0;
+	}
+
+	/*
+	 * Compute actual reduced exponent length (in bytes) and check that
+	 * it is not longer than p.
+	 */
+	while (dplen > 0 && *dpbuf == 0) {
+		dpbuf ++;
+		dplen --;
+	}
+	if (dplen > plen || dplen == 0
+		|| (dplen == plen && dpbuf[0] > pbuf[0]))
+	{
+		return 0;
+	}
+
+	/*
+	 * Verify that p = 3 mod 4 and that dp is odd.
+	 */
+	if ((pbuf[plen - 1] & 3) != 3 || (dpbuf[dplen - 1] & 1) != 1) {
+		return 0;
+	}
+
+	/*
+	 * Decode p and compute (p-1)/2.
+	 */
+	p = tmp;
+	br_i15_decode(p, pbuf, plen);
+	len = (p[0] + 31) >> 4;
+	br_i15_rshift(p, 1);
+
+	/*
+	 * Decode dp and make sure its announced bit length matches that of
+	 * p (we already know that the size of dp, in bits, does not exceed
+	 * the size of p, so we just have to copy the header word).
+	 */
+	dp = p + len;
+	memset(dp, 0, len * sizeof *dp);
+	br_i15_decode(dp, dpbuf, dplen);
+	dp[0] = p[0];
+
+	/*
+	 * Subtract (p-1)/2 from dp if necessary.
+	 */
+	br_i15_sub(dp, p, NOT(br_i15_sub(dp, p, 0)));
+
+	/*
+	 * If another subtraction is needed, then this means that the
+	 * value was invalid. We don't care to leak information about
+	 * invalid keys.
+	 */
+	if (br_i15_sub(dp, p, 0) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Invert dp modulo (p-1)/2. If the inversion fails, then the
+	 * key value was invalid.
+	 */
+	x = dp + len;
+	br_i15_zero(x, p[0]);
+	x[1] = 1;
+	if (br_i15_moddiv(x, dp, p, br_i15_ninv15(p[1]), x + len) == 0) {
+		return 0;
+	}
+
+	/*
+	 * We now have an inverse. We must set it to zero (error) if its
+	 * length is greater than 32 bits and/or if it is an even integer.
+	 * Take care that the bit_length function returns an encoded
+	 * bit length.
+	 */
+	e = (uint32_t)x[1] | ((uint32_t)x[2] << 15) | ((uint32_t)x[3] << 30);
+	e &= -LT(br_i15_bit_length(x + 1, len - 1), 35);
+	e &= -(e & 1);
+	return e;
+}
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_compute_pubexp(const br_rsa_private_key *sk)
+{
+	/*
+	 * Get the public exponent from both p and q. This is the right
+	 * exponent if we get twice the same value.
+	 */
+	uint32_t ep, eq;
+
+	ep = get_pubexp(sk->p, sk->plen, sk->dp, sk->dplen);
+	eq = get_pubexp(sk->q, sk->qlen, sk->dq, sk->dqlen);
+	return ep & -EQ(ep, eq);
+}
diff --git a/third_party/bearssl/src/rsa_i31_keygen.c b/third_party/bearssl/src/rsa_i31_keygen.c
new file mode 100644
index 0000000..77708f8
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_keygen.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_keygen(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp)
+{
+	return br_rsa_i31_keygen_inner(rng,
+		sk, kbuf_priv, pk, kbuf_pub, size, pubexp,
+		&br_i31_modpow_opt);
+}
diff --git a/third_party/bearssl/src/rsa_i31_keygen_inner.c b/third_party/bearssl/src/rsa_i31_keygen_inner.c
new file mode 100644
index 0000000..98df445
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_keygen_inner.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Make a random integer of the provided size. The size is encoded.
+ * The header word is untouched.
+ */
+static void
+mkrand(const br_prng_class **rng, uint32_t *x, uint32_t esize)
+{
+	size_t u, len;
+	unsigned m;
+
+	len = (esize + 31) >> 5;
+	(*rng)->generate(rng, x + 1, len * sizeof(uint32_t));
+	for (u = 1; u < len; u ++) {
+		x[u] &= 0x7FFFFFFF;
+	}
+	m = esize & 31;
+	if (m == 0) {
+		x[len] &= 0x7FFFFFFF;
+	} else {
+		x[len] &= 0x7FFFFFFF >> (31 - m);
+	}
+}
+
+/*
+ * This is the big-endian unsigned representation of the product of
+ * all small primes from 13 to 1481.
+ */
+static const unsigned char SMALL_PRIMES[] = {
+	0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,
+	0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,
+	0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,
+	0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,
+	0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,
+	0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,
+	0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,
+	0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,
+	0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,
+	0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,
+	0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,
+	0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,
+	0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,
+	0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,
+	0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,
+	0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,
+	0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,
+	0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,
+	0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,
+	0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,
+	0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,
+	0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,
+	0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,
+	0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,
+	0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,
+	0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD
+};
+
+/*
+ * We need temporary values for at least 7 integers of the same size
+ * as a factor (including header word); more space helps with performance
+ * (in modular exponentiations), but we much prefer to remain under
+ * 2 kilobytes in total, to save stack space. The macro TEMPS below
+ * exceeds 512 (which is a count in 32-bit words) when BR_MAX_RSA_SIZE
+ * is greater than 4464 (default value is 4096, so the 2-kB limit is
+ * maintained unless BR_MAX_RSA_SIZE was modified).
+ */
+#define MAX(x, y)   ((x) > (y) ? (x) : (y))
+#define ROUND2(x)   ((((x) + 1) >> 1) << 1)
+
+#define TEMPS   MAX(512, ROUND2(7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 61) / 31)))
+
+/*
+ * Perform trial division on a candidate prime. This computes
+ * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The
+ * br_i31_moddiv() function will report an error if y is not invertible
+ * modulo x. Returned value is 1 on success (none of the small primes
+ * divides x), 0 on error (a non-trivial GCD is obtained).
+ *
+ * This function assumes that x is odd.
+ */
+static uint32_t
+trial_divisions(const uint32_t *x, uint32_t *t)
+{
+	uint32_t *y;
+	uint32_t x0i;
+
+	y = t;
+	t += 1 + ((x[0] + 31) >> 5);
+	x0i = br_i31_ninv31(x[1]);
+	br_i31_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x);
+	return br_i31_moddiv(y, y, x, x0i, t);
+}
+
+/*
+ * Perform n rounds of Miller-Rabin on the candidate prime x. This
+ * function assumes that x = 3 mod 4.
+ *
+ * Returned value is 1 on success (all rounds completed successfully),
+ * 0 otherwise.
+ */
+static uint32_t
+miller_rabin(const br_prng_class **rng, const uint32_t *x, int n,
+	uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31)
+{
+	/*
+	 * Since x = 3 mod 4, the Miller-Rabin test is simple:
+	 *  - get a random base a (such that 1 < a < x-1)
+	 *  - compute z = a^((x-1)/2) mod x
+	 *  - if z != 1 and z != x-1, the number x is composite
+	 *
+	 * We generate bases 'a' randomly with a size which is
+	 * one bit less than x, which ensures that a < x-1. It
+	 * is not useful to verify that a > 1 because the probability
+	 * that we get a value a equal to 0 or 1 is much smaller
+	 * than the probability of our Miller-Rabin tests not to
+	 * detect a composite, which is already quite smaller than the
+	 * probability of the hardware misbehaving and return a
+	 * composite integer because of some glitch (e.g. bad RAM
+	 * or ill-timed cosmic ray).
+	 */
+	unsigned char *xm1d2;
+	size_t xlen, xm1d2_len, xm1d2_len_u32, u;
+	uint32_t asize;
+	unsigned cc;
+	uint32_t x0i;
+
+	/*
+	 * Compute (x-1)/2 (encoded).
+	 */
+	xm1d2 = (unsigned char *)t;
+	xm1d2_len = ((x[0] - (x[0] >> 5)) + 7) >> 3;
+	br_i31_encode(xm1d2, xm1d2_len, x);
+	cc = 0;
+	for (u = 0; u < xm1d2_len; u ++) {
+		unsigned w;
+
+		w = xm1d2[u];
+		xm1d2[u] = (unsigned char)((w >> 1) | cc);
+		cc = w << 7;
+	}
+
+	/*
+	 * We used some words of the provided buffer for (x-1)/2.
+	 */
+	xm1d2_len_u32 = (xm1d2_len + 3) >> 2;
+	t += xm1d2_len_u32;
+	tlen -= xm1d2_len_u32;
+
+	xlen = (x[0] + 31) >> 5;
+	asize = x[0] - 1 - EQ0(x[0] & 31);
+	x0i = br_i31_ninv31(x[1]);
+	while (n -- > 0) {
+		uint32_t *a, *t2;
+		uint32_t eq1, eqm1;
+		size_t t2len;
+
+		/*
+		 * Generate a random base. We don't need the base to be
+		 * really uniform modulo x, so we just get a random
+		 * number which is one bit shorter than x.
+		 */
+		a = t;
+		a[0] = x[0];
+		a[xlen] = 0;
+		mkrand(rng, a, asize);
+
+		/*
+		 * Compute a^((x-1)/2) mod x. We assume here that the
+		 * function will not fail (the temporary array is large
+		 * enough).
+		 */
+		t2 = t + 1 + xlen;
+		t2len = tlen - 1 - xlen;
+		if ((t2len & 1) != 0) {
+			/*
+			 * Since the source array is 64-bit aligned and
+			 * has an even number of elements (TEMPS), we
+			 * can use the parity of the remaining length to
+			 * detect and adjust alignment.
+			 */
+			t2 ++;
+			t2len --;
+		}
+		mp31(a, xm1d2, xm1d2_len, x, x0i, t2, t2len);
+
+		/*
+		 * We must obtain either 1 or x-1. Note that x is odd,
+		 * hence x-1 differs from x only in its low word (no
+		 * carry).
+		 */
+		eq1 = a[1] ^ 1;
+		eqm1 = a[1] ^ (x[1] - 1);
+		for (u = 2; u <= xlen; u ++) {
+			eq1 |= a[u];
+			eqm1 |= a[u] ^ x[u];
+		}
+
+		if ((EQ0(eq1) | EQ0(eqm1)) == 0) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*
+ * Create a random prime of the provided size. 'size' is the _encoded_
+ * bit length. The two top bits and the two bottom bits are set to 1.
+ */
+static void
+mkprime(const br_prng_class **rng, uint32_t *x, uint32_t esize,
+	uint32_t pubexp, uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31)
+{
+	size_t len;
+
+	x[0] = esize;
+	len = (esize + 31) >> 5;
+	for (;;) {
+		size_t u;
+		uint32_t m3, m5, m7, m11;
+		int rounds, s7, s11;
+
+		/*
+		 * Generate random bits. We force the two top bits and the
+		 * two bottom bits to 1.
+		 */
+		mkrand(rng, x, esize);
+		if ((esize & 31) == 0) {
+			x[len] |= 0x60000000;
+		} else if ((esize & 31) == 1) {
+			x[len] |= 0x00000001;
+			x[len - 1] |= 0x40000000;
+		} else {
+			x[len] |= 0x00000003 << ((esize & 31) - 2);
+		}
+		x[1] |= 0x00000003;
+
+		/*
+		 * Trial division with low primes (3, 5, 7 and 11). We
+		 * use the following properties:
+		 *
+		 *   2^2 = 1 mod 3
+		 *   2^4 = 1 mod 5
+		 *   2^3 = 1 mod 7
+		 *   2^10 = 1 mod 11
+		 */
+		m3 = 0;
+		m5 = 0;
+		m7 = 0;
+		m11 = 0;
+		s7 = 0;
+		s11 = 0;
+		for (u = 0; u < len; u ++) {
+			uint32_t w, w3, w5, w7, w11;
+
+			w = x[1 + u];
+			w3 = (w & 0xFFFF) + (w >> 16);     /* max: 98302 */
+			w5 = (w & 0xFFFF) + (w >> 16);     /* max: 98302 */
+			w7 = (w & 0x7FFF) + (w >> 15);     /* max: 98302 */
+			w11 = (w & 0xFFFFF) + (w >> 20);   /* max: 1050622 */
+
+			m3 += w3 << (u & 1);
+			m3 = (m3 & 0xFF) + (m3 >> 8);      /* max: 1025 */
+
+			m5 += w5 << ((4 - u) & 3);
+			m5 = (m5 & 0xFFF) + (m5 >> 12);    /* max: 4479 */
+
+			m7 += w7 << s7;
+			m7 = (m7 & 0x1FF) + (m7 >> 9);     /* max: 1280 */
+			if (++ s7 == 3) {
+				s7 = 0;
+			}
+
+			m11 += w11 << s11;
+			if (++ s11 == 10) {
+				s11 = 0;
+			}
+			m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 526847 */
+		}
+
+		m3 = (m3 & 0x3F) + (m3 >> 6);      /* max: 78 */
+		m3 = (m3 & 0x0F) + (m3 >> 4);      /* max: 18 */
+		m3 = ((m3 * 43) >> 5) & 3;
+
+		m5 = (m5 & 0xFF) + (m5 >> 8);      /* max: 271 */
+		m5 = (m5 & 0x0F) + (m5 >> 4);      /* max: 31 */
+		m5 -= 20 & -GT(m5, 19);
+		m5 -= 10 & -GT(m5, 9);
+		m5 -= 5 & -GT(m5, 4);
+
+		m7 = (m7 & 0x3F) + (m7 >> 6);      /* max: 82 */
+		m7 = (m7 & 0x07) + (m7 >> 3);      /* max: 16 */
+		m7 = ((m7 * 147) >> 7) & 7;
+
+		/*
+		 * 2^5 = 32 = -1 mod 11.
+		 */
+		m11 = (m11 & 0x3FF) + (m11 >> 10);      /* max: 1536 */
+		m11 = (m11 & 0x3FF) + (m11 >> 10);      /* max: 1023 */
+		m11 = (m11 & 0x1F) + 33 - (m11 >> 5);   /* max: 64 */
+		m11 -= 44 & -GT(m11, 43);
+		m11 -= 22 & -GT(m11, 21);
+		m11 -= 11 & -GT(m11, 10);
+
+		/*
+		 * If any of these modulo is 0, then the candidate is
+		 * not prime. Also, if pubexp is 3, 5, 7 or 11, and the
+		 * corresponding modulus is 1, then the candidate must
+		 * be rejected, because we need e to be invertible
+		 * modulo p-1. We can use simple comparisons here
+		 * because they won't leak information on a candidate
+		 * that we keep, only on one that we reject (and is thus
+		 * not secret).
+		 */
+		if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) {
+			continue;
+		}
+		if ((pubexp == 3 && m3 == 1)
+			|| (pubexp == 5 && m5 == 1)
+			|| (pubexp == 7 && m7 == 1)
+			|| (pubexp == 11 && m11 == 1))
+		{
+			continue;
+		}
+
+		/*
+		 * More trial divisions.
+		 */
+		if (!trial_divisions(x, t)) {
+			continue;
+		}
+
+		/*
+		 * Miller-Rabin algorithm. Since we selected a random
+		 * integer, not a maliciously crafted integer, we can use
+		 * relatively few rounds to lower the risk of a false
+		 * positive (i.e. declaring prime a non-prime) under
+		 * 2^(-80). It is not useful to lower the probability much
+		 * below that, since that would be substantially below
+		 * the probability of the hardware misbehaving. Sufficient
+		 * numbers of rounds are extracted from the Handbook of
+		 * Applied Cryptography, note 4.49 (page 149).
+		 *
+		 * Since we work on the encoded size (esize), we need to
+		 * compare with encoded thresholds.
+		 */
+		if (esize < 309) {
+			rounds = 12;
+		} else if (esize < 464) {
+			rounds = 9;
+		} else if (esize < 670) {
+			rounds = 6;
+		} else if (esize < 877) {
+			rounds = 4;
+		} else if (esize < 1341) {
+			rounds = 3;
+		} else {
+			rounds = 2;
+		}
+
+		if (miller_rabin(rng, x, rounds, t, tlen, mp31)) {
+			return;
+		}
+	}
+}
+
+/*
+ * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided
+ * as parameter (with announced bit length equal to that of p). This
+ * function computes d = 1/e mod p-1 (for an odd integer e). Returned
+ * value is 1 on success, 0 on error (an error is reported if e is not
+ * invertible modulo p-1).
+ *
+ * The temporary buffer (t) must have room for at least 4 integers of
+ * the size of p.
+ */
+static uint32_t
+invert_pubexp(uint32_t *d, const uint32_t *m, uint32_t e, uint32_t *t)
+{
+	uint32_t *f;
+	uint32_t r;
+
+	f = t;
+	t += 1 + ((m[0] + 31) >> 5);
+
+	/*
+	 * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.
+	 */
+	br_i31_zero(d, m[0]);
+	d[1] = 1;
+	br_i31_zero(f, m[0]);
+	f[1] = e & 0x7FFFFFFF;
+	f[2] = e >> 31;
+	r = br_i31_moddiv(d, f, m, br_i31_ninv31(m[1]), t);
+
+	/*
+	 * We really want d = 1/e mod p-1, with p = 2m. By the CRT,
+	 * the result is either the d we got, or d + m.
+	 *
+	 * Let's write e*d = 1 + k*m, for some integer k. Integers e
+	 * and m are odd. If d is odd, then e*d is odd, which implies
+	 * that k must be even; in that case, e*d = 1 + (k/2)*2m, and
+	 * thus d is already fine. Conversely, if d is even, then k
+	 * is odd, and we must add m to d in order to get the correct
+	 * result.
+	 */
+	br_i31_add(d, m, (uint32_t)(1 - (d[1] & 1)));
+
+	return r;
+}
+
+/*
+ * Swap two buffers in RAM. They must be disjoint.
+ */
+static void
+bufswap(void *b1, void *b2, size_t len)
+{
+	size_t u;
+	unsigned char *buf1, *buf2;
+
+	buf1 = b1;
+	buf2 = b2;
+	for (u = 0; u < len; u ++) {
+		unsigned w;
+
+		w = buf1[u];
+		buf1[u] = buf2[u];
+		buf2[u] = w;
+	}
+}
+
+/* see inner.h */
+uint32_t
+br_rsa_i31_keygen_inner(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31)
+{
+	uint32_t esize_p, esize_q;
+	size_t plen, qlen, tlen;
+	uint32_t *p, *q, *t;
+	union {
+		uint32_t t32[TEMPS];
+		uint64_t t64[TEMPS >> 1];  /* for 64-bit alignment */
+	} tmp;
+	uint32_t r;
+
+	if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) {
+		return 0;
+	}
+	if (pubexp == 0) {
+		pubexp = 3;
+	} else if (pubexp == 1 || (pubexp & 1) == 0) {
+		return 0;
+	}
+
+	esize_p = (size + 1) >> 1;
+	esize_q = size - esize_p;
+	sk->n_bitlen = size;
+	sk->p = kbuf_priv;
+	sk->plen = (esize_p + 7) >> 3;
+	sk->q = sk->p + sk->plen;
+	sk->qlen = (esize_q + 7) >> 3;
+	sk->dp = sk->q + sk->qlen;
+	sk->dplen = sk->plen;
+	sk->dq = sk->dp + sk->dplen;
+	sk->dqlen = sk->qlen;
+	sk->iq = sk->dq + sk->dqlen;
+	sk->iqlen = sk->plen;
+
+	if (pk != NULL) {
+		pk->n = kbuf_pub;
+		pk->nlen = (size + 7) >> 3;
+		pk->e = pk->n + pk->nlen;
+		pk->elen = 4;
+		br_enc32be(pk->e, pubexp);
+		while (*pk->e == 0) {
+			pk->e ++;
+			pk->elen --;
+		}
+	}
+
+	/*
+	 * We now switch to encoded sizes.
+	 *
+	 * floor((x * 16913) / (2^19)) is equal to floor(x/31) for all
+	 * integers x from 0 to 34966; the intermediate product fits on
+	 * 30 bits, thus we can use MUL31().
+	 */
+	esize_p += MUL31(esize_p, 16913) >> 19;
+	esize_q += MUL31(esize_q, 16913) >> 19;
+	plen = (esize_p + 31) >> 5;
+	qlen = (esize_q + 31) >> 5;
+	p = tmp.t32;
+	q = p + 1 + plen;
+	t = q + 1 + qlen;
+	tlen = ((sizeof tmp.t32) / sizeof(uint32_t)) - (2 + plen + qlen);
+
+	/*
+	 * When looking for primes p and q, we temporarily divide
+	 * candidates by 2, in order to compute the inverse of the
+	 * public exponent.
+	 */
+
+	for (;;) {
+		mkprime(rng, p, esize_p, pubexp, t, tlen, mp31);
+		br_i31_rshift(p, 1);
+		if (invert_pubexp(t, p, pubexp, t + 1 + plen)) {
+			br_i31_add(p, p, 1);
+			p[1] |= 1;
+			br_i31_encode(sk->p, sk->plen, p);
+			br_i31_encode(sk->dp, sk->dplen, t);
+			break;
+		}
+	}
+
+	for (;;) {
+		mkprime(rng, q, esize_q, pubexp, t, tlen, mp31);
+		br_i31_rshift(q, 1);
+		if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) {
+			br_i31_add(q, q, 1);
+			q[1] |= 1;
+			br_i31_encode(sk->q, sk->qlen, q);
+			br_i31_encode(sk->dq, sk->dqlen, t);
+			break;
+		}
+	}
+
+	/*
+	 * If p and q have the same size, then it is possible that q > p
+	 * (when the target modulus size is odd, we generate p with a
+	 * greater bit length than q). If q > p, we want to swap p and q
+	 * (and also dp and dq) for two reasons:
+	 *  - The final step below (inversion of q modulo p) is easier if
+	 *    p > q.
+	 *  - While BearSSL's RSA code is perfectly happy with RSA keys such
+	 *    that p < q, some other implementations have restrictions and
+	 *    require p > q.
+	 *
+	 * Note that we can do a simple non-constant-time swap here,
+	 * because the only information we leak here is that we insist on
+	 * returning p and q such that p > q, which is not a secret.
+	 */
+	if (esize_p == esize_q && br_i31_sub(p, q, 0) == 1) {
+		bufswap(p, q, (1 + plen) * sizeof *p);
+		bufswap(sk->p, sk->q, sk->plen);
+		bufswap(sk->dp, sk->dq, sk->dplen);
+	}
+
+	/*
+	 * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.
+	 *
+	 * We ensured that p >= q, so this is just a matter of updating the
+	 * header word for q (and possibly adding an extra word).
+	 *
+	 * Theoretically, the call below may fail, in case we were
+	 * extraordinarily unlucky, and p = q. Another failure case is if
+	 * Miller-Rabin failed us _twice_, and p and q are non-prime and
+	 * have a factor is common. We report the error mostly because it
+	 * is cheap and we can, but in practice this never happens (or, at
+	 * least, it happens way less often than hardware glitches).
+	 */
+	q[0] = p[0];
+	if (plen > qlen) {
+		q[plen] = 0;
+		t ++;
+		tlen --;
+	}
+	br_i31_zero(t, p[0]);
+	t[1] = 1;
+	r = br_i31_moddiv(t, q, p, br_i31_ninv31(p[1]), t + 1 + plen);
+	br_i31_encode(sk->iq, sk->iqlen, t);
+
+	/*
+	 * Compute the public modulus too, if required.
+	 */
+	if (pk != NULL) {
+		br_i31_zero(t, p[0]);
+		br_i31_mulacc(t, p, q);
+		br_i31_encode(pk->n, pk->nlen, t);
+	}
+
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_modulus.c b/third_party/bearssl/src/rsa_i31_modulus.c
new file mode 100644
index 0000000..f5f997f
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_modulus.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i31_compute_modulus(void *n, const br_rsa_private_key *sk)
+{
+	uint32_t tmp[4 * (((BR_MAX_RSA_SIZE / 2) + 30) / 31) + 5];
+	uint32_t *t, *p, *q;
+	const unsigned char *pbuf, *qbuf;
+	size_t nlen, plen, qlen, tlen;
+
+	/*
+	 * Compute actual byte and lengths for p and q.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+
+	t = tmp;
+	tlen = (sizeof tmp) / (sizeof tmp[0]);
+
+	/*
+	 * Decode p.
+	 */
+	if ((31 * tlen) < (plen << 3) + 31) {
+		return 0;
+	}
+	br_i31_decode(t, pbuf, plen);
+	p = t;
+	plen = (p[0] + 63) >> 5;
+	t += plen;
+	tlen -= plen;
+
+	/*
+	 * Decode q.
+	 */
+	if ((31 * tlen) < (qlen << 3) + 31) {
+		return 0;
+	}
+	br_i31_decode(t, qbuf, qlen);
+	q = t;
+	qlen = (q[0] + 63) >> 5;
+	t += qlen;
+	tlen -= qlen;
+
+	/*
+	 * Computation can proceed only if we have enough room for the
+	 * modulus.
+	 */
+	if (tlen < (plen + qlen + 1)) {
+		return 0;
+	}
+
+	/*
+	 * Private key already contains the modulus bit length, from which
+	 * we can infer the output length. Even if n is NULL, we still had
+	 * to decode p and q to make sure that the product can be computed.
+	 */
+	nlen = (sk->n_bitlen + 7) >> 3;
+	if (n != NULL) {
+		br_i31_zero(t, p[0]);
+		br_i31_mulacc(t, p, q);
+		br_i31_encode(n, nlen, t);
+	}
+	return nlen;
+}
diff --git a/third_party/bearssl/src/rsa_i31_oaep_decrypt.c b/third_party/bearssl/src/rsa_i31_oaep_decrypt.c
new file mode 100644
index 0000000..06fdd93
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_oaep_decrypt.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i31_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_oaep_encrypt.c b/third_party/bearssl/src/rsa_i31_oaep_encrypt.c
new file mode 100644
index 0000000..367008c
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_oaep_encrypt.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i31_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i31_public(dst, dlen, pk);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pkcs1_sign.c b/third_party/bearssl/src/rsa_i31_pkcs1_sign.c
new file mode 100644
index 0000000..784d3c2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i31_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c
new file mode 100644
index 0000000..e79a002
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i31_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/third_party/bearssl/src/rsa_i31_priv.c b/third_party/bearssl/src/rsa_i31_priv.c
new file mode 100644
index 0000000..b1e1244
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_priv.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U      (2 + ((BR_MAX_RSA_FACTOR + 30) / 31))
+#define TLEN   (8 * U)
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	size_t fwlen;
+	uint32_t p0i, q0i;
+	size_t xlen, u;
+	uint32_t tmp[1 + TLEN];
+	long z;
+	uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t r;
+
+	/*
+	 * Compute the actual lengths of p and q, in bytes.
+	 * These lengths are not considered secret (we cannot really hide
+	 * them anyway in constant-time code).
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+
+	/*
+	 * Compute the maximum factor length, in words.
+	 */
+	z = (long)(plen > qlen ? plen : qlen) << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+
+	/*
+	 * Round up the word length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * We need to fit at least 6 values in the stack buffer.
+	 */
+	if (6 * fwlen > TLEN) {
+		return 0;
+	}
+
+	/*
+	 * Compute modulus length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Decode q.
+	 */
+	mq = tmp;
+	br_i31_decode(mq, q, qlen);
+
+	/*
+	 * Decode p.
+	 */
+	t1 = mq + fwlen;
+	br_i31_decode(t1, p, plen);
+
+	/*
+	 * Compute the modulus (product of the two factors), to compare
+	 * it with the source value. We use br_i31_mulacc(), since it's
+	 * already used later on.
+	 */
+	t2 = mq + 2 * fwlen;
+	br_i31_zero(t2, mq[0]);
+	br_i31_mulacc(t2, mq, t1);
+
+	/*
+	 * We encode the modulus into bytes, to perform the comparison
+	 * with bytes. We know that the product length, in bytes, is
+	 * exactly xlen.
+	 * The comparison actually computes the carry when subtracting
+	 * the modulus from the source value; that carry must be 1 for
+	 * a value in the correct range. We keep it in r, which is our
+	 * accumulator for the error code.
+	 */
+	t3 = mq + 4 * fwlen;
+	br_i31_encode(t3, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)t3)[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Move the decoded p to another temporary buffer.
+	 */
+	mp = mq + 2 * fwlen;
+	memmove(mp, t1, fwlen * sizeof *t1);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i31_ninv31(mq[1]);
+	s2 = mq + fwlen;
+	br_i31_decode_reduce(s2, x, xlen, mq);
+	r &= br_i31_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+		mq + 3 * fwlen, TLEN - 3 * fwlen);
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i31_ninv31(mp[1]);
+	s1 = mq + 3 * fwlen;
+	br_i31_decode_reduce(s1, x, xlen, mp);
+	r &= br_i31_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+		mq + 4 * fwlen, TLEN - 4 * fwlen);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i31_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	t1 = mq + 4 * fwlen;
+	t2 = mq + 5 * fwlen;
+	br_i31_reduce(t2, s2, mp);
+	br_i31_add(s1, mp, br_i31_sub(s1, t2, 1));
+	br_i31_to_monty(s1, mp);
+	br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i31_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed, so we can
+	 * reuse them for t3. Moreover, the first step of the computation
+	 * is to copy s2 into t3, after which s2 is not needed. Right
+	 * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5.
+	 * Therefore, we have ample room for t3 by simply using s2.
+	 */
+	t3 = s2;
+	br_i31_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i31_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_privexp.c b/third_party/bearssl/src/rsa_i31_privexp.c
new file mode 100644
index 0000000..eee62a0
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_privexp.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i31_compute_privexp(void *d,
+	const br_rsa_private_key *sk, uint32_t e)
+{
+	/*
+	 * We want to invert e modulo phi = (p-1)(q-1). This first
+	 * requires computing phi, which is easy since we have the factors
+	 * p and q in the private key structure.
+	 *
+	 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
+	 * We could invert e modulo phi/4 then patch the result to
+	 * modulo phi, but this would involve assembling three modulus-wide
+	 * values (phi/4, 1 and e) and calling moddiv, that requires
+	 * three more temporaries, for a total of six big integers, or
+	 * slightly more than 3 kB of stack space for RSA-4096. This
+	 * exceeds our stack requirements.
+	 *
+	 * Instead, we first use one step of the extended GCD:
+	 *
+	 *   - We compute phi = k*e + r  (Euclidean division of phi by e).
+	 *     If public exponent e is correct, then r != 0 (e must be
+	 *     invertible modulo phi). We also have k != 0 since we
+	 *     enforce non-ridiculously-small factors.
+	 *
+	 *   - We find small u, v such that u*e - v*r = 1  (using a
+	 *     binary GCD; we can arrange for u < r and v < e, i.e. all
+	 *     values fit on 32 bits).
+	 *
+	 *   - Solution is: d = u + v*k
+	 *     This last computation is exact: since u < r and v < e,
+	 *     the above implies d < r + e*((phi-r)/e) = phi
+	 */
+
+	uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12];
+	uint32_t *p, *q, *k, *m, *z, *phi;
+	const unsigned char *pbuf, *qbuf;
+	size_t plen, qlen, u, len, dlen;
+	uint32_t r, a, b, u0, v0, u1, v1, he, hr;
+	int i;
+
+	/*
+	 * Check that e is correct.
+	 */
+	if (e < 3 || (e & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Check lengths of p and q, and that they are both odd.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
+		|| (pbuf[plen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+	if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
+		|| (qbuf[qlen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+
+	/*
+	 * Output length is that of the modulus.
+	 */
+	dlen = (sk->n_bitlen + 7) >> 3;
+	if (d == NULL) {
+		return dlen;
+	}
+
+	p = tmp;
+	br_i31_decode(p, pbuf, plen);
+	plen = (p[0] + 31) >> 5;
+	q = p + 1 + plen;
+	br_i31_decode(q, qbuf, qlen);
+	qlen = (q[0] + 31) >> 5;
+
+	/*
+	 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
+	 * we do not need anymore). The mulacc function sets the announced
+	 * bit length of t to be the sum of the announced bit lengths of
+	 * p-1 and q-1, which is usually exact but may overshoot by one 1
+	 * bit in some cases; we readjust it to its true length.
+	 */
+	p[1] --;
+	q[1] --;
+	phi = q + 1 + qlen;
+	br_i31_zero(phi, p[0]);
+	br_i31_mulacc(phi, p, q);
+	len = (phi[0] + 31) >> 5;
+	memmove(tmp, phi, (1 + len) * sizeof *phi);
+	phi = tmp;
+	phi[0] = br_i31_bit_length(phi + 1, len);
+	len = (phi[0] + 31) >> 5;
+
+	/*
+	 * Divide phi by public exponent e. The final remainder r must be
+	 * non-zero (otherwise, the key is invalid). The quotient is k,
+	 * which we write over phi, since we don't need phi after that.
+	 */
+	r = 0;
+	for (u = len; u >= 1; u --) {
+		/*
+		 * Upon entry, r < e, and phi[u] < 2^31; hence,
+		 * hi:lo < e*2^31. Thus, the produced word k[u]
+		 * must be lower than 2^31, and the new remainder r
+		 * is lower than e.
+		 */
+		uint32_t hi, lo;
+
+		hi = r >> 1;
+		lo = (r << 31) + phi[u];
+		phi[u] = br_divrem(hi, lo, e, &r);
+	}
+	if (r == 0) {
+		return 0;
+	}
+	k = phi;
+
+	/*
+	 * Compute u and v such that u*e - v*r = GCD(e,r). We use
+	 * a binary GCD algorithm, with 6 extra integers a, b,
+	 * u0, u1, v0 and v1. Initial values are:
+	 *   a = e    u0 = 1   v0 = 0
+	 *   b = r    u1 = r   v1 = e-1
+	 * The following invariants are maintained:
+	 *   a = u0*e - v0*r
+	 *   b = u1*e - v1*r
+	 *   0 < a <= e
+	 *   0 < b <= r
+	 *   0 <= u0 <= r
+	 *   0 <= v0 <= e
+	 *   0 <= u1 <= r
+	 *   0 <= v1 <= e
+	 *
+	 * At each iteration, we reduce either a or b by one bit, and
+	 * adjust u0, u1, v0 and v1 to maintain the invariants:
+	 *  - if a is even, then a <- a/2
+	 *  - otherwise, if b is even, then b <- b/2
+	 *  - otherwise, if a > b, then a <- (a-b)/2
+	 *  - otherwise, if b > a, then b <- (b-a)/2
+	 * Algorithm stops when a = b. At that point, the common value
+	 * is the GCD of e and r; it must be 1 (otherwise, the private
+	 * key or public exponent is not valid). The (u0,v0) or (u1,v1)
+	 * pairs are the solution we are looking for.
+	 *
+	 * Since either a or b is reduced by at least 1 bit at each
+	 * iteration, 62 iterations are enough to reach the end
+	 * condition.
+	 *
+	 * To maintain the invariants, we must compute the same operations
+	 * on the u* and v* values that we do on a and b:
+	 *  - When a is divided by 2, u0 and v0 must be divided by 2.
+	 *  - When b is divided by 2, u1 and v1 must be divided by 2.
+	 *  - When b is subtracted from a, u1 and v1 are subtracted from
+	 *    u0 and v0, respectively.
+	 *  - When a is subtracted from b, u0 and v0 are subtracted from
+	 *    u1 and v1, respectively.
+	 *
+	 * However, we want to keep the u* and v* values in their proper
+	 * ranges. The following remarks apply:
+	 *
+	 *  - When a is divided by 2, then a is even. Therefore:
+	 *
+	 *     * If r is odd, then u0 and v0 must have the same parity;
+	 *       if they are both odd, then adding r to u0 and e to v0
+	 *       makes them both even, and the division by 2 brings them
+	 *       back to the proper range.
+	 *
+	 *     * If r is even, then u0 must be even; if v0 is odd, then
+	 *       adding r to u0 and e to v0 makes them both even, and the
+	 *       division by 2 brings them back to the proper range.
+	 *
+	 *    Thus, all we need to do is to look at the parity of v0,
+	 *    and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
+	 *    a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
+	 *    division (r+1 does not overflow since r < e; and (e/2)+1
+	 *    is equal to (e+1)/2 since e is odd).
+	 *
+	 *  - When we subtract b from a, three cases may occur:
+	 *
+	 *     * u1 <= u0 and v1 <= v0: just do the subtractions
+	 *
+	 *     * u1 > u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *     * u1 <= u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *    The fourth case (u1 > u0 and v1 <= v0) is not possible
+	 *    because it would contradict "b < a" (which is the reason
+	 *    why we subtract b from a).
+	 *
+	 *    The tricky case is the third one: from the equations, it
+	 *    seems that u0 may go out of range. However, the invariants
+	 *    and ranges of other values imply that, in that case, the
+	 *    new u0 does not actually exceed the range.
+	 *
+	 *    We can thus handle the subtraction by adding (r,e) based
+	 *    solely on the comparison between v0 and v1.
+	 */
+	a = e;
+	b = r;
+	u0 = 1;
+	v0 = 0;
+	u1 = r;
+	v1 = e - 1;
+	hr = (r + 1) >> 1;
+	he = (e >> 1) + 1;
+	for (i = 0; i < 62; i ++) {
+		uint32_t oa, ob, agtb, bgta;
+		uint32_t sab, sba, da, db;
+		uint32_t ctl;
+
+		oa = a & 1;                  /* 1 if a is odd */
+		ob = b & 1;                  /* 1 if b is odd */
+		agtb = GT(a, b);             /* 1 if a > b */
+		bgta = GT(b, a);             /* 1 if b > a */
+
+		sab = oa & ob & agtb;        /* 1 if a <- a-b */
+		sba = oa & ob & bgta;        /* 1 if b <- b-a */
+
+		/* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
+		ctl = GT(v1, v0);
+		a -= b & -sab;
+		u0 -= (u1 - (r & -ctl)) & -sab;
+		v0 -= (v1 - (e & -ctl)) & -sab;
+
+		/* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
+		ctl = GT(v0, v1);
+		b -= a & -sba;
+		u1 -= (u0 - (r & -ctl)) & -sba;
+		v1 -= (v0 - (e & -ctl)) & -sba;
+
+		da = NOT(oa) | sab;          /* 1 if a <- a/2 */
+		db = (oa & NOT(ob)) | sba;   /* 1 if b <- b/2 */
+
+		/* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
+		ctl = v0 & 1;
+		a ^= (a ^ (a >> 1)) & -da;
+		u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
+		v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
+
+		/* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
+		ctl = v1 & 1;
+		b ^= (b ^ (b >> 1)) & -db;
+		u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
+		v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
+	}
+
+	/*
+	 * Check that the GCD is indeed 1. If not, then the key is invalid
+	 * (and there's no harm in leaking that piece of information).
+	 */
+	if (a != 1) {
+		return 0;
+	}
+
+	/*
+	 * Now we have u0*e - v0*r = 1. Let's compute the result as:
+	 *   d = u0 + v0*k
+	 * We still have k in the tmp[] array, and its announced bit
+	 * length is that of phi.
+	 */
+	m = k + 1 + len;
+	m[0] = (1 << 5) + 1;  /* bit length is 32 bits, encoded */
+	m[1] = v0 & 0x7FFFFFFF;
+	m[2] = v0 >> 31;
+	z = m + 3;
+	br_i31_zero(z, k[0]);
+	z[1] = u0 & 0x7FFFFFFF;
+	z[2] = u0 >> 31;
+	br_i31_mulacc(z, k, m);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i31_encode(d, dlen, z);
+	return dlen;
+}
diff --git a/third_party/bearssl/src/rsa_i31_pss_sign.c b/third_party/bearssl/src/rsa_i31_pss_sign.c
new file mode 100644
index 0000000..b06f3e2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pss_sign.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i31_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pss_vrfy.c b/third_party/bearssl/src/rsa_i31_pss_vrfy.c
new file mode 100644
index 0000000..77a9b28
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pss_vrfy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i31_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pub.c b/third_party/bearssl/src/rsa_i31_pub.c
new file mode 100644
index 0000000..d5f3fe2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pub.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * As a strict minimum, we need four buffers that can hold a
+ * modular integer.
+ */
+#define TLEN   (4 * (2 + ((BR_MAX_RSA_SIZE + 30) / 31)))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint32_t tmp[1 + TLEN];
+	uint32_t *m, *a, *t;
+	size_t fwlen;
+	long z;
+	uint32_t m0i, r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	z = (long)nlen << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+	/*
+	 * Round up length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * The modulus gets decoded into m[].
+	 * The value to exponentiate goes into a[].
+	 * The temporaries for modular exponentiation are in t[].
+	 */
+	m = tmp;
+	a = m + fwlen;
+	t = m + 2 * fwlen;
+
+	/*
+	 * Decode the modulus.
+	 */
+	br_i31_decode(m, n, nlen);
+	m0i = br_i31_ninv31(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i31_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i31_modpow_opt(a, pk->e, pk->elen, m, m0i, t, TLEN - 2 * fwlen);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i31_encode(x, xlen, a);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_pubexp.c b/third_party/bearssl/src/rsa_i31_pubexp.c
new file mode 100644
index 0000000..f26537d
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pubexp.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Recompute public exponent, based on factor p and reduced private
+ * exponent dp.
+ */
+static uint32_t
+get_pubexp(const unsigned char *pbuf, size_t plen,
+	const unsigned char *dpbuf, size_t dplen)
+{
+	/*
+	 * dp is the inverse of e modulo p-1. If p = 3 mod 4, then
+	 * p-1 = 2*((p-1)/2). Taken modulo 2, e is odd and has inverse 1;
+	 * thus, dp must be odd.
+	 *
+	 * We compute the inverse of dp modulo (p-1)/2. This requires
+	 * first reducing dp modulo (p-1)/2 (this can be done with a
+	 * conditional subtract, no need to use the generic modular
+	 * reduction function); then, we use moddiv.
+	 */
+
+	uint32_t tmp[6 * ((BR_MAX_RSA_FACTOR + 61) / 31)];
+	uint32_t *p, *dp, *x;
+	size_t len;
+	uint32_t e;
+
+	/*
+	 * Compute actual factor length (in bytes) and check that it fits
+	 * under our size constraints.
+	 */
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen == 0 || plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)) {
+		return 0;
+	}
+
+	/*
+	 * Compute actual reduced exponent length (in bytes) and check that
+	 * it is not longer than p.
+	 */
+	while (dplen > 0 && *dpbuf == 0) {
+		dpbuf ++;
+		dplen --;
+	}
+	if (dplen > plen || dplen == 0
+		|| (dplen == plen && dpbuf[0] > pbuf[0]))
+	{
+		return 0;
+	}
+
+	/*
+	 * Verify that p = 3 mod 4 and that dp is odd.
+	 */
+	if ((pbuf[plen - 1] & 3) != 3 || (dpbuf[dplen - 1] & 1) != 1) {
+		return 0;
+	}
+
+	/*
+	 * Decode p and compute (p-1)/2.
+	 */
+	p = tmp;
+	br_i31_decode(p, pbuf, plen);
+	len = (p[0] + 63) >> 5;
+	br_i31_rshift(p, 1);
+
+	/*
+	 * Decode dp and make sure its announced bit length matches that of
+	 * p (we already know that the size of dp, in bits, does not exceed
+	 * the size of p, so we just have to copy the header word).
+	 */
+	dp = p + len;
+	memset(dp, 0, len * sizeof *dp);
+	br_i31_decode(dp, dpbuf, dplen);
+	dp[0] = p[0];
+
+	/*
+	 * Subtract (p-1)/2 from dp if necessary.
+	 */
+	br_i31_sub(dp, p, NOT(br_i31_sub(dp, p, 0)));
+
+	/*
+	 * If another subtraction is needed, then this means that the
+	 * value was invalid. We don't care to leak information about
+	 * invalid keys.
+	 */
+	if (br_i31_sub(dp, p, 0) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Invert dp modulo (p-1)/2. If the inversion fails, then the
+	 * key value was invalid.
+	 */
+	x = dp + len;
+	br_i31_zero(x, p[0]);
+	x[1] = 1;
+	if (br_i31_moddiv(x, dp, p, br_i31_ninv31(p[1]), x + len) == 0) {
+		return 0;
+	}
+
+	/*
+	 * We now have an inverse. We must set it to zero (error) if its
+	 * length is greater than 32 bits and/or if it is an even integer.
+	 * Take care that the bit_length function returns an encoded
+	 * bit length.
+	 */
+	e = (uint32_t)x[1] | ((uint32_t)x[2] << 31);
+	e &= -LT(br_i31_bit_length(x + 1, len - 1), 34);
+	e &= -(e & 1);
+	return e;
+}
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_compute_pubexp(const br_rsa_private_key *sk)
+{
+	/*
+	 * Get the public exponent from both p and q. This is the right
+	 * exponent if we get twice the same value.
+	 */
+	uint32_t ep, eq;
+
+	ep = get_pubexp(sk->p, sk->plen, sk->dp, sk->dplen);
+	eq = get_pubexp(sk->q, sk->qlen, sk->dq, sk->dqlen);
+	return ep & -EQ(ep, eq);
+}
diff --git a/third_party/bearssl/src/rsa_i32_oaep_decrypt.c b/third_party/bearssl/src/rsa_i32_oaep_decrypt.c
new file mode 100644
index 0000000..ecfd92b
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_oaep_decrypt.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i32_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i32_oaep_encrypt.c b/third_party/bearssl/src/rsa_i32_oaep_encrypt.c
new file mode 100644
index 0000000..dc17f3f
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_oaep_encrypt.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i32_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i32_public(dst, dlen, pk);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pkcs1_sign.c b/third_party/bearssl/src/rsa_i32_pkcs1_sign.c
new file mode 100644
index 0000000..44b6e6d
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i32_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c
new file mode 100644
index 0000000..6ee7a19
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i32_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/third_party/bearssl/src/rsa_i32_priv.c b/third_party/bearssl/src/rsa_i32_priv.c
new file mode 100644
index 0000000..05c22ec
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_priv.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U   (1 + (BR_MAX_RSA_FACTOR >> 5))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	uint32_t tmp[6 * U];
+	uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t p0i, q0i;
+	size_t xlen, u;
+	uint32_t r;
+
+	/*
+	 * All our temporary buffers are from the tmp[] array.
+	 *
+	 * The mp, mq, s1, s2, t1 and t2 buffers are large enough to
+	 * contain a RSA factor. The t3 buffer can contain a complete
+	 * RSA modulus. t3 shares its storage space with s2, s1 and t1,
+	 * in that order (this is important, see below).
+	 */
+	mq = tmp;
+	mp = tmp + U;
+	t2 = tmp + 2 * U;
+	s2 = tmp + 3 * U;
+	s1 = tmp + 4 * U;
+	t1 = tmp + 5 * U;
+	t3 = s2;
+
+	/*
+	 * Compute the actual lengths (in bytes) of p and q, and check
+	 * that they fit within our stack buffers.
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+	if (plen > (BR_MAX_RSA_FACTOR >> 3)
+		|| qlen > (BR_MAX_RSA_FACTOR >> 3))
+	{
+		return 0;
+	}
+
+	/*
+	 * Decode p and q.
+	 */
+	br_i32_decode(mp, p, plen);
+	br_i32_decode(mq, q, qlen);
+
+	/*
+	 * Recompute modulus, to compare with the source value.
+	 */
+	br_i32_zero(t2, mp[0]);
+	br_i32_mulacc(t2, mp, mq);
+	xlen = (sk->n_bitlen + 7) >> 3;
+	br_i32_encode(t2 + 2 * U, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)(t2 + 2 * U))[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i32_ninv32(mp[1]);
+	br_i32_decode_reduce(s1, x, xlen, mp);
+	br_i32_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i32_ninv32(mq[1]);
+	br_i32_decode_reduce(s2, x, xlen, mq);
+	br_i32_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i32_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	br_i32_reduce(t2, s2, mp);
+	br_i32_add(s1, mp, br_i32_sub(s1, t2, 1));
+	br_i32_to_monty(s1, mp);
+	br_i32_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i32_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed. Moreover,
+	 * the first step is to copy s2 into the destination buffer t3.
+	 * We thus arranged for t3 to actually share space with s2, and
+	 * to be followed by the space formerly used by s1 and t1.
+	 */
+	br_i32_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i32_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
diff --git a/third_party/bearssl/src/rsa_i32_pss_sign.c b/third_party/bearssl/src/rsa_i32_pss_sign.c
new file mode 100644
index 0000000..0f72f92
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pss_sign.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i32_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pss_vrfy.c b/third_party/bearssl/src/rsa_i32_pss_vrfy.c
new file mode 100644
index 0000000..2e70d23
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pss_vrfy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i32_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pub.c b/third_party/bearssl/src/rsa_i32_pub.c
new file mode 100644
index 0000000..6e8d8e3
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pub.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint32_t m[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t a[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t t1[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t t2[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t m0i, r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	br_i32_decode(m, n, nlen);
+	m0i = br_i32_ninv32(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i32_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i32_modpow(a, pk->e, pk->elen, m, m0i, t1, t2);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i32_encode(x, xlen, a);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i62_keygen.c b/third_party/bearssl/src/rsa_i62_keygen.c
new file mode 100644
index 0000000..992fe97
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_keygen.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_keygen(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp)
+{
+	return br_rsa_i31_keygen_inner(rng,
+		sk, kbuf_priv, pk, kbuf_pub, size, pubexp,
+		&br_i62_modpow_opt_as_i31);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_keygen
+br_rsa_i62_keygen_get(void)
+{
+	return &br_rsa_i62_keygen;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_keygen
+br_rsa_i62_keygen_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_oaep_decrypt.c b/third_party/bearssl/src/rsa_i62_oaep_decrypt.c
new file mode 100644
index 0000000..38470dd
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_oaep_decrypt.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i62_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_decrypt
+br_rsa_i62_oaep_decrypt_get(void)
+{
+	return &br_rsa_i62_oaep_decrypt;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_decrypt
+br_rsa_i62_oaep_decrypt_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_oaep_encrypt.c b/third_party/bearssl/src/rsa_i62_oaep_encrypt.c
new file mode 100644
index 0000000..cf41ecb
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_oaep_encrypt.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i62_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i62_public(dst, dlen, pk);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_encrypt
+br_rsa_i62_oaep_encrypt_get(void)
+{
+	return &br_rsa_i62_oaep_encrypt;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_encrypt
+br_rsa_i62_oaep_encrypt_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pkcs1_sign.c b/third_party/bearssl/src/rsa_i62_pkcs1_sign.c
new file mode 100644
index 0000000..a20a084
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pkcs1_sign.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i62_private(x, sk);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_sign
+br_rsa_i62_pkcs1_sign_get(void)
+{
+	return &br_rsa_i62_pkcs1_sign;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_sign
+br_rsa_i62_pkcs1_sign_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c
new file mode 100644
index 0000000..6519161
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i62_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_vrfy
+br_rsa_i62_pkcs1_vrfy_get(void)
+{
+	return &br_rsa_i62_pkcs1_vrfy;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_vrfy
+br_rsa_i62_pkcs1_vrfy_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_priv.c b/third_party/bearssl/src/rsa_i62_priv.c
new file mode 100644
index 0000000..f0da600
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_priv.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#define U      (2 + ((BR_MAX_RSA_FACTOR + 30) / 31))
+#define TLEN   (4 * U)  /* TLEN is counted in 64-bit words */
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	size_t fwlen;
+	uint32_t p0i, q0i;
+	size_t xlen, u;
+	uint64_t tmp[TLEN];
+	long z;
+	uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t r;
+
+	/*
+	 * Compute the actual lengths of p and q, in bytes.
+	 * These lengths are not considered secret (we cannot really hide
+	 * them anyway in constant-time code).
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+
+	/*
+	 * Compute the maximum factor length, in words.
+	 */
+	z = (long)(plen > qlen ? plen : qlen) << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+
+	/*
+	 * Convert size to 62-bit words.
+	 */
+	fwlen = (fwlen + 1) >> 1;
+
+	/*
+	 * We need to fit at least 6 values in the stack buffer.
+	 */
+	if (6 * fwlen > TLEN) {
+		return 0;
+	}
+
+	/*
+	 * Compute signature length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Decode q.
+	 */
+	mq = (uint32_t *)tmp;
+	br_i31_decode(mq, q, qlen);
+
+	/*
+	 * Decode p.
+	 */
+	t1 = (uint32_t *)(tmp + fwlen);
+	br_i31_decode(t1, p, plen);
+
+	/*
+	 * Compute the modulus (product of the two factors), to compare
+	 * it with the source value. We use br_i31_mulacc(), since it's
+	 * already used later on.
+	 */
+	t2 = (uint32_t *)(tmp + 2 * fwlen);
+	br_i31_zero(t2, mq[0]);
+	br_i31_mulacc(t2, mq, t1);
+
+	/*
+	 * We encode the modulus into bytes, to perform the comparison
+	 * with bytes. We know that the product length, in bytes, is
+	 * exactly xlen.
+	 * The comparison actually computes the carry when subtracting
+	 * the modulus from the source value; that carry must be 1 for
+	 * a value in the correct range. We keep it in r, which is our
+	 * accumulator for the error code.
+	 */
+	t3 = (uint32_t *)(tmp + 4 * fwlen);
+	br_i31_encode(t3, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)t3)[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Move the decoded p to another temporary buffer.
+	 */
+	mp = (uint32_t *)(tmp + 2 * fwlen);
+	memmove(mp, t1, 2 * fwlen * sizeof *t1);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i31_ninv31(mq[1]);
+	s2 = (uint32_t *)(tmp + fwlen);
+	br_i31_decode_reduce(s2, x, xlen, mq);
+	r &= br_i62_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+		tmp + 3 * fwlen, TLEN - 3 * fwlen);
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i31_ninv31(mp[1]);
+	s1 = (uint32_t *)(tmp + 3 * fwlen);
+	br_i31_decode_reduce(s1, x, xlen, mp);
+	r &= br_i62_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+		tmp + 4 * fwlen, TLEN - 4 * fwlen);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i31_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	t1 = (uint32_t *)(tmp + 4 * fwlen);
+	t2 = (uint32_t *)(tmp + 5 * fwlen);
+	br_i31_reduce(t2, s2, mp);
+	br_i31_add(s1, mp, br_i31_sub(s1, t2, 1));
+	br_i31_to_monty(s1, mp);
+	br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i31_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed, so we can
+	 * reuse them for t3. Moreover, the first step of the computation
+	 * is to copy s2 into t3, after which s2 is not needed. Right
+	 * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5.
+	 * Therefore, we have ample room for t3 by simply using s2.
+	 */
+	t3 = s2;
+	br_i31_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i31_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
+
+/* see bearssl_rsa.h */
+br_rsa_private
+br_rsa_i62_private_get(void)
+{
+	return &br_rsa_i62_private;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_private
+br_rsa_i62_private_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pss_sign.c b/third_party/bearssl/src/rsa_i62_pss_sign.c
new file mode 100644
index 0000000..7232f6d
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pss_sign.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i62_private(x, sk);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pss_sign
+br_rsa_i62_pss_sign_get(void)
+{
+	return &br_rsa_i62_pss_sign;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pss_sign
+br_rsa_i62_pss_sign_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pss_vrfy.c b/third_party/bearssl/src/rsa_i62_pss_vrfy.c
new file mode 100644
index 0000000..e726e82
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pss_vrfy.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i62_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pss_vrfy
+br_rsa_i62_pss_vrfy_get(void)
+{
+	return &br_rsa_i62_pss_vrfy;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pss_vrfy
+br_rsa_i62_pss_vrfy_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pub.c b/third_party/bearssl/src/rsa_i62_pub.c
new file mode 100644
index 0000000..70cf61b
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pub.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/*
+ * As a strict minimum, we need four buffers that can hold a
+ * modular integer. But TLEN is expressed in 64-bit words.
+ */
+#define TLEN   (2 * (2 + ((BR_MAX_RSA_SIZE + 30) / 31)))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint64_t tmp[TLEN];
+	uint32_t *m, *a;
+	size_t fwlen;
+	long z;
+	uint32_t m0i, r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	z = (long)nlen << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+	/*
+	 * Convert fwlen to a count in 62-bit words.
+	 */
+	fwlen = (fwlen + 1) >> 1;
+
+	/*
+	 * The modulus gets decoded into m[].
+	 * The value to exponentiate goes into a[].
+	 */
+	m = (uint32_t *)tmp;
+	a = (uint32_t *)(tmp + fwlen);
+
+	/*
+	 * Decode the modulus.
+	 */
+	br_i31_decode(m, n, nlen);
+	m0i = br_i31_ninv31(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i31_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i62_modpow_opt(a, pk->e, pk->elen, m, m0i,
+		tmp + 2 * fwlen, TLEN - 2 * fwlen);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i31_encode(x, xlen, a);
+	return r;
+}
+
+/* see bearssl_rsa.h */
+br_rsa_public
+br_rsa_i62_public_get(void)
+{
+	return &br_rsa_i62_public;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_public
+br_rsa_i62_public_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_oaep_pad.c b/third_party/bearssl/src/rsa_oaep_pad.c
new file mode 100644
index 0000000..5327dc2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_oaep_pad.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Hash some data. This is put as a separate function so that stack
+ * allocation of the hash function context is done only for the duration
+ * of the hash.
+ */
+static void
+hash_data(const br_hash_class *dig, void *dst, const void *src, size_t len)
+{
+	br_hash_compat_context hc;
+
+	hc.vtable = dig;
+	dig->init(&hc.vtable);
+	dig->update(&hc.vtable, src, len);
+	dig->out(&hc.vtable, dst);
+}
+
+/* see inner.h */
+size_t
+br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t k, hlen;
+	unsigned char *buf;
+
+	hlen = br_digest_size(dig);
+
+	/*
+	 * Compute actual modulus length (in bytes).
+	 */
+	k = pk->nlen;
+	while (k > 0 && pk->n[k - 1] == 0) {
+		k --;
+	}
+
+	/*
+	 * An error is reported if:
+	 *  - the modulus is too short;
+	 *  - the source message length is too long;
+	 *  - the destination buffer is too short.
+	 */
+	if (k < ((hlen << 1) + 2)
+		|| src_len > (k - (hlen << 1) - 2)
+		|| dst_max_len < k)
+	{
+		return 0;
+	}
+
+	/*
+	 * Apply padding. At this point, things cannot fail.
+	 */
+	buf = dst;
+
+	/*
+	 * Assemble: DB = lHash || PS || 0x01 || M
+	 * We first place the source message M with memmove(), so that
+	 * overlaps between source and destination buffers are supported.
+	 */
+	memmove(buf + k - src_len, src, src_len);
+	hash_data(dig, buf + 1 + hlen, label, label_len);
+	memset(buf + 1 + (hlen << 1), 0, k - src_len - (hlen << 1) - 2);
+	buf[k - src_len - 1] = 0x01;
+
+	/*
+	 * Make the random seed.
+	 */
+	(*rnd)->generate(rnd, buf + 1, hlen);
+
+	/*
+	 * Mask DB with the mask generated from the seed.
+	 */
+	br_mgf1_xor(buf + 1 + hlen, k - hlen - 1, dig, buf + 1, hlen);
+
+	/*
+	 * Mask the seed with the mask generated from the masked DB.
+	 */
+	br_mgf1_xor(buf + 1, hlen, dig, buf + 1 + hlen, k - hlen - 1);
+
+	/*
+	 * Padding result: EM = 0x00 || maskedSeed || maskedDB.
+	 */
+	buf[0] = 0x00;
+	return k;
+}
diff --git a/third_party/bearssl/src/rsa_oaep_unpad.c b/third_party/bearssl/src/rsa_oaep_unpad.c
new file mode 100644
index 0000000..7c4be6a
--- /dev/null
+++ b/third_party/bearssl/src/rsa_oaep_unpad.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Hash some data and XOR the result into the provided buffer. This is put
+ * as a separate function so that stack allocation of the hash function
+ * context is done only for the duration of the hash.
+ */
+static void
+xor_hash_data(const br_hash_class *dig, void *dst, const void *src, size_t len)
+{
+	br_hash_compat_context hc;
+	unsigned char tmp[64];
+	unsigned char *buf;
+	size_t u, hlen;
+
+	hc.vtable = dig;
+	dig->init(&hc.vtable);
+	dig->update(&hc.vtable, src, len);
+	dig->out(&hc.vtable, tmp);
+	buf = dst;
+	hlen = br_digest_size(dig);
+	for (u = 0; u < hlen; u ++) {
+		buf[u] ^= tmp[u];
+	}
+}
+
+/* see inner.h */
+uint32_t
+br_rsa_oaep_unpad(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	void *data, size_t *len)
+{
+	size_t u, k, hlen;
+	unsigned char *buf;
+	uint32_t r, s, zlen;
+
+	hlen = br_digest_size(dig);
+	k = *len;
+	buf = data;
+
+	/*
+	 * There must be room for the padding.
+	 */
+	if (k < ((hlen << 1) + 2)) {
+		return 0;
+	}
+
+	/*
+	 * Unmask the seed, then the DB value.
+	 */
+	br_mgf1_xor(buf + 1, hlen, dig, buf + 1 + hlen, k - hlen - 1);
+	br_mgf1_xor(buf + 1 + hlen, k - hlen - 1, dig, buf + 1, hlen);
+
+	/*
+	 * Hash the label and XOR it with the value in the array; if
+	 * they are equal then these should yield only zeros.
+	 */
+	xor_hash_data(dig, buf + 1 + hlen, label, label_len);
+
+	/*
+	 * At that point, if the padding was correct, when we should
+	 * have: 0x00 || seed || 0x00 ... 0x00 0x01 || M
+	 * Padding is valid as long as:
+	 *  - There is at least hlen+1 leading bytes of value 0x00.
+	 *  - There is at least one non-zero byte.
+	 *  - The first (leftmost) non-zero byte has value 0x01.
+	 *
+	 * Ultimately, we may leak the resulting message length, i.e.
+	 * the position of the byte of value 0x01, but we must take care
+	 * to do so only if the number of zero bytes has been verified
+	 * to be at least hlen+1.
+	 *
+	 * The loop below counts the number of bytes of value 0x00, and
+	 * checks that the next byte has value 0x01, in constant-time.
+	 *
+	 *  - If the initial byte (before the seed) is not 0x00, then
+	 *    r and s are set to 0, and stay there.
+	 *  - Value r is 1 until the first non-zero byte is reached
+	 *    (after the seed); it switches to 0 at that point.
+	 *  - Value s is set to 1 if and only if the data encountered
+	 *    at the time of the transition of r from 1 to 0 has value
+	 *    exactly 0x01.
+	 *  - Value zlen counts the number of leading bytes of value zero
+	 *    (after the seed).
+	 */
+	r = 1 - ((buf[0] + 0xFF) >> 8);
+	s = 0;
+	zlen = 0;
+	for (u = hlen + 1; u < k; u ++) {
+		uint32_t w, nz;
+
+		w = buf[u];
+
+		/*
+		 * nz == 1 only for the first non-zero byte.
+		 */
+		nz = r & ((w + 0xFF) >> 8);
+		s |= nz & EQ(w, 0x01);
+		r &= NOT(nz);
+		zlen += r;
+	}
+
+	/*
+	 * Padding is correct only if s == 1, _and_ zlen >= hlen.
+	 */
+	s &= GE(zlen, (uint32_t)hlen);
+
+	/*
+	 * At that point, padding was verified, and we are now allowed
+	 * to make conditional jumps.
+	 */
+	if (s) {
+		size_t plen;
+
+		plen = 2 + hlen + zlen;
+		k -= plen;
+		memmove(buf, buf + plen, k);
+		*len = k;
+	}
+	return s;
+}
diff --git a/third_party/bearssl/src/rsa_pkcs1_sig_pad.c b/third_party/bearssl/src/rsa_pkcs1_sig_pad.c
new file mode 100644
index 0000000..06c3bd7
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pkcs1_sig_pad.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	uint32_t n_bitlen, unsigned char *x)
+{
+	size_t u, x3, xlen;
+
+	/*
+	 * Padded hash value has format:
+	 *  00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH
+	 *
+	 * with the following rules:
+	 *
+	 *  -- Total length is equal to the modulus length (unsigned
+	 *     encoding).
+	 *
+	 *  -- There must be at least eight bytes of value 0xFF.
+	 *
+	 *  -- x4 is equal to the hash length (hash_len).
+	 *
+	 *  -- x3 is equal to the encoded OID value length (hash_oid[0]).
+	 *
+	 *  -- x2 = x3 + 4.
+	 *
+	 *  -- x1 = x2 + x4 + 4 = x3 + x4 + 8.
+	 *
+	 * Note: the "05 00" is optional (signatures with and without
+	 * that sequence exist in practice), but notes in PKCS#1 seem to
+	 * indicate that the presence of that sequence (specifically,
+	 * an ASN.1 NULL value for the hash parameters) may be slightly
+	 * more "standard" than the opposite.
+	 */
+	xlen = (n_bitlen + 7) >> 3;
+
+	if (hash_oid == NULL) {
+		if (xlen < hash_len + 11) {
+			return 0;
+		}
+		x[0] = 0x00;
+		x[1] = 0x01;
+		u = xlen - hash_len;
+		memset(x + 2, 0xFF, u - 3);
+		x[u - 1] = 0x00;
+	} else {
+		x3 = hash_oid[0];
+
+		/*
+		 * Check that there is enough room for all the elements,
+		 * including at least eight bytes of value 0xFF.
+		 */
+		if (xlen < (x3 + hash_len + 21)) {
+			return 0;
+		}
+		x[0] = 0x00;
+		x[1] = 0x01;
+		u = xlen - x3 - hash_len - 11;
+		memset(x + 2, 0xFF, u - 2);
+		x[u] = 0x00;
+		x[u + 1] = 0x30;
+		x[u + 2] = x3 + hash_len + 8;
+		x[u + 3] = 0x30;
+		x[u + 4] = x3 + 4;
+		x[u + 5] = 0x06;
+		memcpy(x + u + 6, hash_oid, x3 + 1);
+		u += x3 + 7;
+		x[u ++] = 0x05;
+		x[u ++] = 0x00;
+		x[u ++] = 0x04;
+		x[u ++] = hash_len;
+	}
+	memcpy(x + u, hash, hash_len);
+	return 1;
+}
diff --git a/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c b/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c
new file mode 100644
index 0000000..c8ae08f
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
+	const unsigned char *hash_oid, size_t hash_len,
+	unsigned char *hash_out)
+{
+	static const unsigned char pad1[] = {
+		0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+	};
+
+	unsigned char pad2[43];
+	size_t u, x2, x3, pad_len, zlen;
+
+	if (sig_len < 11) {
+		return 0;
+	}
+
+	/*
+	 * Expected format:
+	 *  00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH
+	 *
+	 * with the following rules:
+	 *
+	 *  -- Total length is that of the modulus and the signature
+	 *     (this was already verified by br_rsa_i31_public()).
+	 *
+	 *  -- There are at least eight bytes of value 0xFF.
+	 *
+	 *  -- x4 is equal to the hash length (hash_len).
+	 *
+	 *  -- x3 is equal to the encoded OID value length (so x3 is the
+	 *     first byte of hash_oid[]).
+	 *
+	 *  -- If the "05 00" is present, then x2 == x3 + 4; otherwise,
+	 *     x2 == x3 + 2.
+	 *
+	 *  -- x1 == x2 + x4 + 4.
+	 *
+	 * So the total length after the last "FF" is either x3 + x4 + 11
+	 * (with the "05 00") or x3 + x4 + 9 (without the "05 00").
+	 */
+
+	/*
+	 * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes.
+	 * The comparison is valid because we made sure that the signature
+	 * is at least 11 bytes long.
+	 */
+	if (memcmp(sig, pad1, sizeof pad1) != 0) {
+		return 0;
+	}
+	for (u = sizeof pad1; u < sig_len; u ++) {
+		if (sig[u] != 0xFF) {
+			break;
+		}
+	}
+
+	/*
+	 * Remaining length is sig_len - u bytes (including the 00 just
+	 * after the last FF). This must be equal to one of the two
+	 * possible values (depending on whether the "05 00" sequence is
+	 * present or not).
+	 */
+	if (hash_oid == NULL) {
+		if (sig_len - u != hash_len + 1 || sig[u] != 0x00) {
+			return 0;
+		}
+	} else {
+		x3 = hash_oid[0];
+		pad_len = x3 + 9;
+		memset(pad2, 0, pad_len);
+		zlen = sig_len - u - hash_len;
+		if (zlen == pad_len) {
+			x2 = x3 + 2;
+		} else if (zlen == pad_len + 2) {
+			x2 = x3 + 4;
+			pad_len = zlen;
+			pad2[pad_len - 4] = 0x05;
+		} else {
+			return 0;
+		}
+		pad2[1] = 0x30;
+		pad2[2] = x2 + hash_len + 4;
+		pad2[3] = 0x30;
+		pad2[4] = x2;
+		pad2[5] = 0x06;
+		memcpy(pad2 + 6, hash_oid, x3 + 1);
+		pad2[pad_len - 2] = 0x04;
+		pad2[pad_len - 1] = hash_len;
+		if (memcmp(pad2, sig + u, pad_len) != 0) {
+			return 0;
+		}
+	}
+	memcpy(hash_out, sig + sig_len - hash_len, hash_len);
+	return 1;
+}
diff --git a/third_party/bearssl/src/rsa_pss_sig_pad.c b/third_party/bearssl/src/rsa_pss_sig_pad.c
new file mode 100644
index 0000000..13e9027
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pss_sig_pad.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pss_sig_pad(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	uint32_t n_bitlen, unsigned char *x)
+{
+	size_t xlen, hash_len;
+	br_hash_compat_context hc;
+	unsigned char *salt, *seed;
+
+	hash_len = br_digest_size(hf_data);
+
+	/*
+	 * The padded string is one bit smaller than the modulus;
+	 * notably, if the modulus length is equal to 1 modulo 8, then
+	 * the padded string will be one _byte_ smaller, and the first
+	 * byte will be set to 0. We apply these transformations here.
+	 */
+	n_bitlen --;
+	if ((n_bitlen & 7) == 0) {
+		*x ++ = 0;
+	}
+	xlen = (n_bitlen + 7) >> 3;
+
+	/*
+	 * Check that the modulus is large enough for the hash value
+	 * length combined with the intended salt length.
+	 */
+	if (hash_len > xlen || salt_len > xlen
+		|| (hash_len + salt_len + 2) > xlen)
+	{
+		return 0;
+	}
+
+	/*
+	 * Produce a random salt.
+	 */
+	salt = x + xlen - hash_len - salt_len - 1;
+	if (salt_len != 0) {
+		(*rng)->generate(rng, salt, salt_len);
+	}
+
+	/*
+	 * Compute the seed for MGF1.
+	 */
+	seed = x + xlen - hash_len - 1;
+	hf_data->init(&hc.vtable);
+	memset(seed, 0, 8);
+	hf_data->update(&hc.vtable, seed, 8);
+	hf_data->update(&hc.vtable, hash, hash_len);
+	hf_data->update(&hc.vtable, salt, salt_len);
+	hf_data->out(&hc.vtable, seed);
+
+	/*
+	 * Prepare string PS (padded salt). The salt is already at the
+	 * right place.
+	 */
+	memset(x, 0, xlen - salt_len - hash_len - 2);
+	x[xlen - salt_len - hash_len - 2] = 0x01;
+
+	/*
+	 * Generate the mask and XOR it into PS.
+	 */
+	br_mgf1_xor(x, xlen - hash_len - 1, hf_mgf1, seed, hash_len);
+
+	/*
+	 * Clear the top bits to ensure the value is lower than the
+	 * modulus.
+	 */
+	x[0] &= 0xFF >> (((uint32_t)xlen << 3) - n_bitlen);
+
+	/*
+	 * The seed (H) is already in the right place. We just set the
+	 * last byte.
+	 */
+	x[xlen - 1] = 0xBC;
+
+	return 1;
+}
diff --git a/third_party/bearssl/src/rsa_pss_sig_unpad.c b/third_party/bearssl/src/rsa_pss_sig_unpad.c
new file mode 100644
index 0000000..0c6ae99
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pss_sig_unpad.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pss_sig_unpad(const br_hash_class *hf_data,
+	const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_public_key *pk, unsigned char *x)
+{
+	size_t u, xlen, hash_len;
+	br_hash_compat_context hc;
+	unsigned char *seed, *salt;
+	unsigned char tmp[64];
+	uint32_t r, n_bitlen;
+
+	hash_len = br_digest_size(hf_data);
+
+	/*
+	 * Value r will be set to a non-zero value is any test fails.
+	 */
+	r = 0;
+
+	/*
+	 * The value bit length (as an integer) must be strictly less than
+	 * that of the modulus.
+	 */
+	for (u = 0; u < pk->nlen; u ++) {
+		if (pk->n[u] != 0) {
+			break;
+		}
+	}
+	if (u == pk->nlen) {
+		return 0;
+	}
+	n_bitlen = BIT_LENGTH(pk->n[u]) + ((uint32_t)(pk->nlen - u - 1) << 3);
+	n_bitlen --;
+	if ((n_bitlen & 7) == 0) {
+		r |= *x ++;
+	} else {
+		r |= x[0] & (0xFF << (n_bitlen & 7));
+	}
+	xlen = (n_bitlen + 7) >> 3;
+
+	/*
+	 * Check that the modulus is large enough for the hash value
+	 * length combined with the intended salt length.
+	 */
+	if (hash_len > xlen || salt_len > xlen
+		|| (hash_len + salt_len + 2) > xlen)
+	{
+		return 0;
+	}
+
+	/*
+	 * Check value of rightmost byte.
+	 */
+	r |= x[xlen - 1] ^ 0xBC;
+
+	/*
+	 * Generate the mask and XOR it into the first bytes to reveal PS;
+	 * we must also mask out the leading bits.
+	 */
+	seed = x + xlen - hash_len - 1;
+	br_mgf1_xor(x, xlen - hash_len - 1, hf_mgf1, seed, hash_len);
+	if ((n_bitlen & 7) != 0) {
+		x[0] &= 0xFF >> (8 - (n_bitlen & 7));
+	}
+
+	/*
+	 * Check that all padding bytes have the expected value.
+	 */
+	for (u = 0; u < (xlen - hash_len - salt_len - 2); u ++) {
+		r |= x[u];
+	}
+	r |= x[xlen - hash_len - salt_len - 2] ^ 0x01;
+
+	/*
+	 * Recompute H.
+	 */
+	salt = x + xlen - hash_len - salt_len - 1;
+	hf_data->init(&hc.vtable);
+	memset(tmp, 0, 8);
+	hf_data->update(&hc.vtable, tmp, 8);
+	hf_data->update(&hc.vtable, hash, hash_len);
+	hf_data->update(&hc.vtable, salt, salt_len);
+	hf_data->out(&hc.vtable, tmp);
+
+	/*
+	 * Check that the recomputed H value matches the one appearing
+	 * in the string.
+	 */
+	for (u = 0; u < hash_len; u ++) {
+		r |= tmp[u] ^ x[(xlen - hash_len - 1) + u];
+	}
+
+	return EQ0(r);
+}
diff --git a/third_party/bearssl/src/rsa_ssl_decrypt.c b/third_party/bearssl/src/rsa_ssl_decrypt.c
new file mode 100644
index 0000000..047eb18
--- /dev/null
+++ b/third_party/bearssl/src/rsa_ssl_decrypt.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_ssl_decrypt(br_rsa_private core, const br_rsa_private_key *sk,
+	unsigned char *data, size_t len)
+{
+	uint32_t x;
+	size_t u;
+
+	/*
+	 * A first check on length. Since this test works only on the
+	 * buffer length, it needs not (and cannot) be constant-time.
+	 */
+	if (len < 59 || len != (sk->n_bitlen + 7) >> 3) {
+		return 0;
+	}
+	x = core(data, sk);
+
+	x &= EQ(data[0], 0x00);
+	x &= EQ(data[1], 0x02);
+	for (u = 2; u < (len - 49); u ++) {
+		x &= NEQ(data[u], 0);
+	}
+	x &= EQ(data[len - 49], 0x00);
+	memmove(data, data + len - 48, 48);
+	return x;
+}
diff --git a/third_party/bearssl/src/settings.c b/third_party/bearssl/src/settings.c
new file mode 100644
index 0000000..309271c
--- /dev/null
+++ b/third_party/bearssl/src/settings.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const br_config_option config[] = {
+	{ "BR_64",
+#if BR_64
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_AES_X86NI",
+#if BR_AES_X86NI
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_amd64",
+#if BR_amd64
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_ARMEL_CORTEXM_GCC",
+#if BR_ARMEL_CORTEXM_GCC
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_BE_UNALIGNED",
+#if BR_BE_UNALIGNED
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CLANG",
+#if BR_CLANG
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CLANG_3_7",
+#if BR_CLANG_3_7
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CLANG_3_8",
+#if BR_CLANG_3_8
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CT_MUL15",
+#if BR_CT_MUL15
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CT_MUL31",
+#if BR_CT_MUL31
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC",
+#if BR_GCC
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_4",
+#if BR_GCC_4_4
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_5",
+#if BR_GCC_4_5
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_6",
+#if BR_GCC_4_6
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_7",
+#if BR_GCC_4_7
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_8",
+#if BR_GCC_4_8
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_9",
+#if BR_GCC_4_9
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_5_0",
+#if BR_GCC_5_0
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_i386",
+#if BR_i386
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_INT128",
+#if BR_INT128
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_LE_UNALIGNED",
+#if BR_LE_UNALIGNED
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_LOMUL",
+#if BR_LOMUL
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MAX_EC_SIZE", BR_MAX_EC_SIZE },
+	{ "BR_MAX_RSA_SIZE", BR_MAX_RSA_SIZE },
+	{ "BR_MAX_RSA_FACTOR", BR_MAX_RSA_FACTOR },
+	{ "BR_MSC",
+#if BR_MSC
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2005",
+#if BR_MSC_2005
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2008",
+#if BR_MSC_2008
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2010",
+#if BR_MSC_2010
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2012",
+#if BR_MSC_2012
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2013",
+#if BR_MSC_2013
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2015",
+#if BR_MSC_2015
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_POWER8",
+#if BR_POWER8
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_RDRAND",
+#if BR_RDRAND
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_SLOW_MUL",
+#if BR_SLOW_MUL
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_SLOW_MUL15",
+#if BR_SLOW_MUL15
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_SSE2",
+#if BR_SSE2
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_UMUL128",
+#if BR_UMUL128
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_USE_UNIX_TIME",
+#if BR_USE_UNIX_TIME
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_USE_WIN32_RAND",
+#if BR_USE_WIN32_RAND
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_USE_WIN32_TIME",
+#if BR_USE_WIN32_TIME
+	 1
+#else
+	 0
+#endif
+	},
+
+	{ NULL, 0 }
+};
+
+/* see bearssl.h */
+const br_config_option *
+br_get_config(void)
+{
+	return config;
+}
diff --git a/third_party/bearssl/src/sha1.c b/third_party/bearssl/src/sha1.c
new file mode 100644
index 0000000..4f65d84
--- /dev/null
+++ b/third_party/bearssl/src/sha1.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((B) ^ (C) ^ (D))
+#define H(B, C, D)     (((D) & (C)) | (((D) | (C)) & (B)))
+#define I(B, C, D)     G(B, C, D)
+
+#define ROTL(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+
+#define K1     ((uint32_t)0x5A827999)
+#define K2     ((uint32_t)0x6ED9EBA1)
+#define K3     ((uint32_t)0x8F1BBCDC)
+#define K4     ((uint32_t)0xCA62C1D6)
+
+/* see inner.h */
+const uint32_t br_sha1_IV[5] = {
+	0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0
+};
+
+/* see inner.h */
+void
+br_sha1_round(const unsigned char *buf, uint32_t *val)
+{
+	uint32_t m[80];
+	uint32_t a, b, c, d, e;
+	int i;
+
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	br_range_dec32be(m, 16, buf);
+	for (i = 16; i < 80; i ++) {
+		uint32_t x = m[i - 3] ^ m[i - 8] ^ m[i - 14] ^ m[i - 16];
+		m[i] = ROTL(x, 1);
+	}
+
+	for (i = 0; i < 20; i += 5) {
+		e += ROTL(a, 5) + F(b, c, d) + K1 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + F(a, b, c) + K1 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + F(e, a, b) + K1 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + F(d, e, a) + K1 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + F(c, d, e) + K1 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 20; i < 40; i += 5) {
+		e += ROTL(a, 5) + G(b, c, d) + K2 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + G(a, b, c) + K2 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + G(e, a, b) + K2 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + G(d, e, a) + K2 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + G(c, d, e) + K2 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 40; i < 60; i += 5) {
+		e += ROTL(a, 5) + H(b, c, d) + K3 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + H(a, b, c) + K3 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + H(e, a, b) + K3 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + H(d, e, a) + K3 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + H(c, d, e) + K3 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 60; i < 80; i += 5) {
+		e += ROTL(a, 5) + I(b, c, d) + K4 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + I(a, b, c) + K4 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + I(e, a, b) + K4 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + I(d, e, a) + K4 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + I(c, d, e) + K4 + m[i + 4]; c = ROTL(c, 30);
+	}
+
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+}
+
+/* see bearssl.h */
+void
+br_sha1_init(br_sha1_context *cc)
+{
+	cc->vtable = &br_sha1_vtable;
+	memcpy(cc->val, br_sha1_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha1_update(br_sha1_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_sha1_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_sha1_out(const br_sha1_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val[5];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_sha1_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64be(buf + 56, cc->count << 3);
+	br_sha1_round(buf, val);
+	br_range_enc32be(dst, val, 5);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha1_state(const br_sha1_context *cc, void *dst)
+{
+	br_range_enc32be(dst, cc->val, 5);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha1_set_state(br_sha1_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32be(cc->val, 5, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha1_vtable = {
+	sizeof(br_sha1_context),
+	BR_HASHDESC_ID(br_sha1_ID)
+		| BR_HASHDESC_OUT(20)
+		| BR_HASHDESC_STATE(20)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha1_init,
+	(void (*)(const br_hash_class **, const void *, size_t))&br_sha1_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha1_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha1_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha1_set_state
+};
diff --git a/third_party/bearssl/src/sha2big.c b/third_party/bearssl/src/sha2big.c
new file mode 100644
index 0000000..5be92ed
--- /dev/null
+++ b/third_party/bearssl/src/sha2big.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR(x, n)    (((uint64_t)(x) << (64 - (n))) | ((uint64_t)(x) >> (n)))
+
+#define BSG5_0(x)      (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
+#define BSG5_1(x)      (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
+#define SSG5_0(x)      (ROTR(x, 1) ^ ROTR(x, 8) ^ (uint64_t)((x) >> 7))
+#define SSG5_1(x)      (ROTR(x, 19) ^ ROTR(x, 61) ^ (uint64_t)((x) >> 6))
+
+static const uint64_t IV384[8] = {
+	0xCBBB9D5DC1059ED8, 0x629A292A367CD507,
+	0x9159015A3070DD17, 0x152FECD8F70E5939,
+	0x67332667FFC00B31, 0x8EB44A8768581511,
+	0xDB0C2E0D64F98FA7, 0x47B5481DBEFA4FA4
+};
+
+static const uint64_t IV512[8] = {
+	0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
+	0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
+	0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
+	0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
+};
+
+static const uint64_t K[80] = {
+	0x428A2F98D728AE22, 0x7137449123EF65CD,
+	0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
+	0x3956C25BF348B538, 0x59F111F1B605D019,
+	0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
+	0xD807AA98A3030242, 0x12835B0145706FBE,
+	0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
+	0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
+	0x9BDC06A725C71235, 0xC19BF174CF692694,
+	0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
+	0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
+	0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
+	0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
+	0x983E5152EE66DFAB, 0xA831C66D2DB43210,
+	0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
+	0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
+	0x06CA6351E003826F, 0x142929670A0E6E70,
+	0x27B70A8546D22FFC, 0x2E1B21385C26C926,
+	0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
+	0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
+	0x81C2C92E47EDAEE6, 0x92722C851482353B,
+	0xA2BFE8A14CF10364, 0xA81A664BBC423001,
+	0xC24B8B70D0F89791, 0xC76C51A30654BE30,
+	0xD192E819D6EF5218, 0xD69906245565A910,
+	0xF40E35855771202A, 0x106AA07032BBD1B8,
+	0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
+	0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
+	0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
+	0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
+	0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
+	0x84C87814A1F0AB72, 0x8CC702081A6439EC,
+	0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
+	0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
+	0xCA273ECEEA26619C, 0xD186B8C721C0C207,
+	0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
+	0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
+	0x113F9804BEF90DAE, 0x1B710B35131C471B,
+	0x28DB77F523047D84, 0x32CAAB7B40C72493,
+	0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
+	0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
+	0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817
+};
+
+static void
+sha2big_round(const unsigned char *buf, uint64_t *val)
+{
+
+#define SHA2BIG_STEP(A, B, C, D, E, F, G, H, j)   do { \
+		uint64_t T1, T2; \
+		T1 = H + BSG5_1(E) + CH(E, F, G) + K[j] + w[j]; \
+		T2 = BSG5_0(A) + MAJ(A, B, C); \
+		D += T1; \
+		H = T1 + T2; \
+	} while (0)
+
+	int i;
+	uint64_t a, b, c, d, e, f, g, h;
+	uint64_t w[80];
+
+	br_range_dec64be(w, 16, buf);
+	for (i = 16; i < 80; i ++) {
+		w[i] = SSG5_1(w[i - 2]) + w[i - 7]
+			+ SSG5_0(w[i - 15]) + w[i - 16];
+	}
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	f = val[5];
+	g = val[6];
+	h = val[7];
+	for (i = 0; i < 80; i += 8) {
+		SHA2BIG_STEP(a, b, c, d, e, f, g, h, i + 0);
+		SHA2BIG_STEP(h, a, b, c, d, e, f, g, i + 1);
+		SHA2BIG_STEP(g, h, a, b, c, d, e, f, i + 2);
+		SHA2BIG_STEP(f, g, h, a, b, c, d, e, i + 3);
+		SHA2BIG_STEP(e, f, g, h, a, b, c, d, i + 4);
+		SHA2BIG_STEP(d, e, f, g, h, a, b, c, i + 5);
+		SHA2BIG_STEP(c, d, e, f, g, h, a, b, i + 6);
+		SHA2BIG_STEP(b, c, d, e, f, g, h, a, i + 7);
+	}
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+	val[5] += f;
+	val[6] += g;
+	val[7] += h;
+}
+
+static void
+sha2big_update(br_sha384_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 127;
+	cc->count += (uint64_t)len;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 128 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		if (ptr == 128) {
+			sha2big_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+static void
+sha2big_out(const br_sha384_context *cc, void *dst, int num)
+{
+	unsigned char buf[128];
+	uint64_t val[8];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 127;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 112) {
+		memset(buf + ptr, 0, 128 - ptr);
+		sha2big_round(buf, val);
+		memset(buf, 0, 112);
+	} else {
+		memset(buf + ptr, 0, 112 - ptr);
+	}
+	br_enc64be(buf + 112, cc->count >> 61);
+	br_enc64be(buf + 120, cc->count << 3);
+	sha2big_round(buf, val);
+	br_range_enc64be(dst, val, num);
+}
+
+/* see bearssl.h */
+void
+br_sha384_init(br_sha384_context *cc)
+{
+	cc->vtable = &br_sha384_vtable;
+	memcpy(cc->val, IV384, sizeof IV384);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha384_update(br_sha384_context *cc, const void *data, size_t len)
+{
+	sha2big_update(cc, data, len);
+}
+
+/* see bearssl.h */
+void
+br_sha384_out(const br_sha384_context *cc, void *dst)
+{
+	sha2big_out(cc, dst, 6);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha384_state(const br_sha384_context *cc, void *dst)
+{
+	br_range_enc64be(dst, cc->val, 8);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha384_set_state(br_sha384_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec64be(cc->val, 8, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+void
+br_sha512_init(br_sha512_context *cc)
+{
+	cc->vtable = &br_sha512_vtable;
+	memcpy(cc->val, IV512, sizeof IV512);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha512_out(const br_sha512_context *cc, void *dst)
+{
+	sha2big_out(cc, dst, 8);
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha384_vtable = {
+	sizeof(br_sha384_context),
+	BR_HASHDESC_ID(br_sha384_ID)
+		| BR_HASHDESC_OUT(48)
+		| BR_HASHDESC_STATE(64)
+		| BR_HASHDESC_LBLEN(7)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE
+		| BR_HASHDESC_MD_PADDING_128,
+	(void (*)(const br_hash_class **))&br_sha384_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_sha384_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha384_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha384_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha384_set_state
+};
+
+/* see bearssl.h */
+const br_hash_class br_sha512_vtable = {
+	sizeof(br_sha512_context),
+	BR_HASHDESC_ID(br_sha512_ID)
+		| BR_HASHDESC_OUT(64)
+		| BR_HASHDESC_STATE(64)
+		| BR_HASHDESC_LBLEN(7)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE
+		| BR_HASHDESC_MD_PADDING_128,
+	(void (*)(const br_hash_class **))&br_sha512_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_sha512_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha512_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha512_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha512_set_state
+};
diff --git a/third_party/bearssl/src/sha2small.c b/third_party/bearssl/src/sha2small.c
new file mode 100644
index 0000000..ca19655
--- /dev/null
+++ b/third_party/bearssl/src/sha2small.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR(x, n)    (((uint32_t)(x) << (32 - (n))) | ((uint32_t)(x) >> (n)))
+
+#define BSG2_0(x)      (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define BSG2_1(x)      (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define SSG2_0(x)      (ROTR(x, 7) ^ ROTR(x, 18) ^ (uint32_t)((x) >> 3))
+#define SSG2_1(x)      (ROTR(x, 17) ^ ROTR(x, 19) ^ (uint32_t)((x) >> 10))
+
+/* see inner.h */
+const uint32_t br_sha224_IV[8] = {
+	0xC1059ED8, 0x367CD507, 0x3070DD17, 0xF70E5939,
+	0xFFC00B31, 0x68581511, 0x64F98FA7, 0xBEFA4FA4
+};
+
+/* see inner.h */
+const uint32_t br_sha256_IV[8] = {
+	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+};
+
+static const uint32_t K[64] = {
+	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
+	0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
+	0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
+	0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
+	0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
+	0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
+	0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
+	0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
+	0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
+	0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
+	0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
+	0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
+	0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
+	0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
+	0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
+	0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
+};
+
+/* see inner.h */
+void
+br_sha2small_round(const unsigned char *buf, uint32_t *val)
+{
+
+#define SHA2_STEP(A, B, C, D, E, F, G, H, j)   do { \
+		uint32_t T1, T2; \
+		T1 = H + BSG2_1(E) + CH(E, F, G) + K[j] + w[j]; \
+		T2 = BSG2_0(A) + MAJ(A, B, C); \
+		D += T1; \
+		H = T1 + T2; \
+	} while (0)
+
+	int i;
+	uint32_t a, b, c, d, e, f, g, h;
+	uint32_t w[64];
+
+	br_range_dec32be(w, 16, buf);
+	for (i = 16; i < 64; i ++) {
+		w[i] = SSG2_1(w[i - 2]) + w[i - 7]
+			+ SSG2_0(w[i - 15]) + w[i - 16];
+	}
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	f = val[5];
+	g = val[6];
+	h = val[7];
+	for (i = 0; i < 64; i += 8) {
+		SHA2_STEP(a, b, c, d, e, f, g, h, i + 0);
+		SHA2_STEP(h, a, b, c, d, e, f, g, i + 1);
+		SHA2_STEP(g, h, a, b, c, d, e, f, i + 2);
+		SHA2_STEP(f, g, h, a, b, c, d, e, i + 3);
+		SHA2_STEP(e, f, g, h, a, b, c, d, i + 4);
+		SHA2_STEP(d, e, f, g, h, a, b, c, i + 5);
+		SHA2_STEP(c, d, e, f, g, h, a, b, i + 6);
+		SHA2_STEP(b, c, d, e, f, g, h, a, i + 7);
+	}
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+	val[5] += f;
+	val[6] += g;
+	val[7] += h;
+
+#if 0
+/* obsolete */
+#define SHA2_MEXP1(pc)   do { \
+		W[pc] = br_dec32be(buf + ((pc) << 2)); \
+	} while (0)
+
+#define SHA2_MEXP2(pc)   do { \
+		W[(pc) & 0x0F] = SSG2_1(W[((pc) - 2) & 0x0F]) \
+			+ W[((pc) - 7) & 0x0F] \
+			+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]; \
+	} while (0)
+
+#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, pc)   do { \
+		uint32_t t1, t2; \
+		SHA2_MEXP ## n(pc); \
+		t1 = h + BSG2_1(e) + CH(e, f, g) \
+			+ K[pcount + (pc)] + W[(pc) & 0x0F]; \
+		t2 = BSG2_0(a) + MAJ(a, b, c); \
+		d += t1; \
+		h = t1 + t2; \
+	} while (0)
+
+#define SHA2_STEP1(a, b, c, d, e, f, g, h, pc) \
+	SHA2_STEPn(1, a, b, c, d, e, f, g, h, pc)
+#define SHA2_STEP2(a, b, c, d, e, f, g, h, pc) \
+	SHA2_STEPn(2, a, b, c, d, e, f, g, h, pc)
+
+	uint32_t A, B, C, D, E, F, G, H;
+	uint32_t W[16];
+	unsigned pcount;
+
+	A = val[0];
+	B = val[1];
+	C = val[2];
+	D = val[3];
+	E = val[4];
+	F = val[5];
+	G = val[6];
+	H = val[7];
+	pcount = 0;
+	SHA2_STEP1(A, B, C, D, E, F, G, H,  0);
+	SHA2_STEP1(H, A, B, C, D, E, F, G,  1);
+	SHA2_STEP1(G, H, A, B, C, D, E, F,  2);
+	SHA2_STEP1(F, G, H, A, B, C, D, E,  3);
+	SHA2_STEP1(E, F, G, H, A, B, C, D,  4);
+	SHA2_STEP1(D, E, F, G, H, A, B, C,  5);
+	SHA2_STEP1(C, D, E, F, G, H, A, B,  6);
+	SHA2_STEP1(B, C, D, E, F, G, H, A,  7);
+	SHA2_STEP1(A, B, C, D, E, F, G, H,  8);
+	SHA2_STEP1(H, A, B, C, D, E, F, G,  9);
+	SHA2_STEP1(G, H, A, B, C, D, E, F, 10);
+	SHA2_STEP1(F, G, H, A, B, C, D, E, 11);
+	SHA2_STEP1(E, F, G, H, A, B, C, D, 12);
+	SHA2_STEP1(D, E, F, G, H, A, B, C, 13);
+	SHA2_STEP1(C, D, E, F, G, H, A, B, 14);
+	SHA2_STEP1(B, C, D, E, F, G, H, A, 15);
+	for (pcount = 16; pcount < 64; pcount += 16) {
+		SHA2_STEP2(A, B, C, D, E, F, G, H,  0);
+		SHA2_STEP2(H, A, B, C, D, E, F, G,  1);
+		SHA2_STEP2(G, H, A, B, C, D, E, F,  2);
+		SHA2_STEP2(F, G, H, A, B, C, D, E,  3);
+		SHA2_STEP2(E, F, G, H, A, B, C, D,  4);
+		SHA2_STEP2(D, E, F, G, H, A, B, C,  5);
+		SHA2_STEP2(C, D, E, F, G, H, A, B,  6);
+		SHA2_STEP2(B, C, D, E, F, G, H, A,  7);
+		SHA2_STEP2(A, B, C, D, E, F, G, H,  8);
+		SHA2_STEP2(H, A, B, C, D, E, F, G,  9);
+		SHA2_STEP2(G, H, A, B, C, D, E, F, 10);
+		SHA2_STEP2(F, G, H, A, B, C, D, E, 11);
+		SHA2_STEP2(E, F, G, H, A, B, C, D, 12);
+		SHA2_STEP2(D, E, F, G, H, A, B, C, 13);
+		SHA2_STEP2(C, D, E, F, G, H, A, B, 14);
+		SHA2_STEP2(B, C, D, E, F, G, H, A, 15);
+	}
+	val[0] += A;
+	val[1] += B;
+	val[2] += C;
+	val[3] += D;
+	val[4] += E;
+	val[5] += F;
+	val[6] += G;
+	val[7] += H;
+#endif
+}
+
+static void
+sha2small_update(br_sha224_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	cc->count += (uint64_t)len;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		if (ptr == 64) {
+			br_sha2small_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+static void
+sha2small_out(const br_sha224_context *cc, void *dst, int num)
+{
+	unsigned char buf[64];
+	uint32_t val[8];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_sha2small_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64be(buf + 56, cc->count << 3);
+	br_sha2small_round(buf, val);
+	br_range_enc32be(dst, val, num);
+}
+
+/* see bearssl.h */
+void
+br_sha224_init(br_sha224_context *cc)
+{
+	cc->vtable = &br_sha224_vtable;
+	memcpy(cc->val, br_sha224_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha224_update(br_sha224_context *cc, const void *data, size_t len)
+{
+	sha2small_update(cc, data, len);
+}
+
+/* see bearssl.h */
+void
+br_sha224_out(const br_sha224_context *cc, void *dst)
+{
+	sha2small_out(cc, dst, 7);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha224_state(const br_sha224_context *cc, void *dst)
+{
+	br_range_enc32be(dst, cc->val, 8);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha224_set_state(br_sha224_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32be(cc->val, 8, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+void
+br_sha256_init(br_sha256_context *cc)
+{
+	cc->vtable = &br_sha256_vtable;
+	memcpy(cc->val, br_sha256_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha256_out(const br_sha256_context *cc, void *dst)
+{
+	sha2small_out(cc, dst, 8);
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha224_vtable = {
+	sizeof(br_sha224_context),
+	BR_HASHDESC_ID(br_sha224_ID)
+		| BR_HASHDESC_OUT(28)
+		| BR_HASHDESC_STATE(32)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha224_init,
+	(void (*)(const br_hash_class **,
+		const void *, size_t))&br_sha224_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha224_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha224_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha224_set_state
+};
+
+/* see bearssl.h */
+const br_hash_class br_sha256_vtable = {
+	sizeof(br_sha256_context),
+	BR_HASHDESC_ID(br_sha256_ID)
+		| BR_HASHDESC_OUT(32)
+		| BR_HASHDESC_STATE(32)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha256_init,
+	(void (*)(const br_hash_class **,
+		const void *, size_t))&br_sha256_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha256_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha256_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha256_set_state
+};
diff --git a/third_party/bearssl/src/shake.c b/third_party/bearssl/src/shake.c
new file mode 100644
index 0000000..80d7176
--- /dev/null
+++ b/third_party/bearssl/src/shake.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Round constants.
+ */
+static const uint64_t RC[] = {
+	0x0000000000000001, 0x0000000000008082,
+	0x800000000000808A, 0x8000000080008000,
+	0x000000000000808B, 0x0000000080000001,
+	0x8000000080008081, 0x8000000000008009,
+	0x000000000000008A, 0x0000000000000088,
+	0x0000000080008009, 0x000000008000000A,
+	0x000000008000808B, 0x800000000000008B,
+	0x8000000000008089, 0x8000000000008003,
+	0x8000000000008002, 0x8000000000000080,
+	0x000000000000800A, 0x800000008000000A,
+	0x8000000080008081, 0x8000000000008080,
+	0x0000000080000001, 0x8000000080008008
+};
+
+/*
+ * XOR a block of data into the provided state. This supports only
+ * blocks whose length is a multiple of 64 bits.
+ */
+static void
+xor_block(uint64_t *A, const void *data, size_t rate)
+{
+	size_t u;
+
+	for (u = 0; u < rate; u += 8) {
+		A[u >> 3] ^= br_dec64le((const unsigned char *)data + u);
+	}
+}
+
+/*
+ * Process a block with the provided data. The data length must be a
+ * multiple of 8 (in bytes); normally, this is the "rate".
+ */
+static void
+process_block(uint64_t *A)
+{
+	uint64_t t0, t1, t2, t3, t4;
+	uint64_t tt0, tt1, tt2, tt3;
+	uint64_t t, kt;
+	uint64_t c0, c1, c2, c3, c4, bnn;
+	int j;
+
+	/*
+	 * Compute the 24 rounds. This loop is partially unrolled (each
+	 * iteration computes two rounds).
+	 */
+	for (j = 0; j < 24; j += 2) {
+
+		tt0 = A[ 1] ^ A[ 6];
+		tt1 = A[11] ^ A[16];
+		tt0 ^= A[21] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 4] ^ A[ 9];
+		tt3 = A[14] ^ A[19];
+		tt0 ^= A[24];
+		tt2 ^= tt3;
+		t0 = tt0 ^ tt2;
+
+		tt0 = A[ 2] ^ A[ 7];
+		tt1 = A[12] ^ A[17];
+		tt0 ^= A[22] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 0] ^ A[ 5];
+		tt3 = A[10] ^ A[15];
+		tt0 ^= A[20];
+		tt2 ^= tt3;
+		t1 = tt0 ^ tt2;
+
+		tt0 = A[ 3] ^ A[ 8];
+		tt1 = A[13] ^ A[18];
+		tt0 ^= A[23] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 1] ^ A[ 6];
+		tt3 = A[11] ^ A[16];
+		tt0 ^= A[21];
+		tt2 ^= tt3;
+		t2 = tt0 ^ tt2;
+
+		tt0 = A[ 4] ^ A[ 9];
+		tt1 = A[14] ^ A[19];
+		tt0 ^= A[24] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 2] ^ A[ 7];
+		tt3 = A[12] ^ A[17];
+		tt0 ^= A[22];
+		tt2 ^= tt3;
+		t3 = tt0 ^ tt2;
+
+		tt0 = A[ 0] ^ A[ 5];
+		tt1 = A[10] ^ A[15];
+		tt0 ^= A[20] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 3] ^ A[ 8];
+		tt3 = A[13] ^ A[18];
+		tt0 ^= A[23];
+		tt2 ^= tt3;
+		t4 = tt0 ^ tt2;
+
+		A[ 0] = A[ 0] ^ t0;
+		A[ 5] = A[ 5] ^ t0;
+		A[10] = A[10] ^ t0;
+		A[15] = A[15] ^ t0;
+		A[20] = A[20] ^ t0;
+		A[ 1] = A[ 1] ^ t1;
+		A[ 6] = A[ 6] ^ t1;
+		A[11] = A[11] ^ t1;
+		A[16] = A[16] ^ t1;
+		A[21] = A[21] ^ t1;
+		A[ 2] = A[ 2] ^ t2;
+		A[ 7] = A[ 7] ^ t2;
+		A[12] = A[12] ^ t2;
+		A[17] = A[17] ^ t2;
+		A[22] = A[22] ^ t2;
+		A[ 3] = A[ 3] ^ t3;
+		A[ 8] = A[ 8] ^ t3;
+		A[13] = A[13] ^ t3;
+		A[18] = A[18] ^ t3;
+		A[23] = A[23] ^ t3;
+		A[ 4] = A[ 4] ^ t4;
+		A[ 9] = A[ 9] ^ t4;
+		A[14] = A[14] ^ t4;
+		A[19] = A[19] ^ t4;
+		A[24] = A[24] ^ t4;
+		A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
+		A[10] = (A[10] <<  3) | (A[10] >> (64 -  3));
+		A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
+		A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
+		A[ 1] = (A[ 1] <<  1) | (A[ 1] >> (64 -  1));
+		A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
+		A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
+		A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
+		A[21] = (A[21] <<  2) | (A[21] >> (64 - 2));
+		A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
+		A[ 7] = (A[ 7] <<  6) | (A[ 7] >> (64 -  6));
+		A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
+		A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
+		A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
+		A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
+		A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
+		A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
+		A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
+		A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
+		A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
+		A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
+		A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
+		A[19] = (A[19] <<  8) | (A[19] >> (64 -  8));
+		A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
+		bnn = ~A[12];
+		kt = A[ 6] | A[12];
+		c0 = A[ 0] ^ kt;
+		kt = bnn | A[18];
+		c1 = A[ 6] ^ kt;
+		kt = A[18] & A[24];
+		c2 = A[12] ^ kt;
+		kt = A[24] | A[ 0];
+		c3 = A[18] ^ kt;
+		kt = A[ 0] & A[ 6];
+		c4 = A[24] ^ kt;
+		A[ 0] = c0;
+		A[ 6] = c1;
+		A[12] = c2;
+		A[18] = c3;
+		A[24] = c4;
+		bnn = ~A[22];
+		kt = A[ 9] | A[10];
+		c0 = A[ 3] ^ kt;
+		kt = A[10] & A[16];
+		c1 = A[ 9] ^ kt;
+		kt = A[16] | bnn;
+		c2 = A[10] ^ kt;
+		kt = A[22] | A[ 3];
+		c3 = A[16] ^ kt;
+		kt = A[ 3] & A[ 9];
+		c4 = A[22] ^ kt;
+		A[ 3] = c0;
+		A[ 9] = c1;
+		A[10] = c2;
+		A[16] = c3;
+		A[22] = c4;
+		bnn = ~A[19];
+		kt = A[ 7] | A[13];
+		c0 = A[ 1] ^ kt;
+		kt = A[13] & A[19];
+		c1 = A[ 7] ^ kt;
+		kt = bnn & A[20];
+		c2 = A[13] ^ kt;
+		kt = A[20] | A[ 1];
+		c3 = bnn ^ kt;
+		kt = A[ 1] & A[ 7];
+		c4 = A[20] ^ kt;
+		A[ 1] = c0;
+		A[ 7] = c1;
+		A[13] = c2;
+		A[19] = c3;
+		A[20] = c4;
+		bnn = ~A[17];
+		kt = A[ 5] & A[11];
+		c0 = A[ 4] ^ kt;
+		kt = A[11] | A[17];
+		c1 = A[ 5] ^ kt;
+		kt = bnn | A[23];
+		c2 = A[11] ^ kt;
+		kt = A[23] & A[ 4];
+		c3 = bnn ^ kt;
+		kt = A[ 4] | A[ 5];
+		c4 = A[23] ^ kt;
+		A[ 4] = c0;
+		A[ 5] = c1;
+		A[11] = c2;
+		A[17] = c3;
+		A[23] = c4;
+		bnn = ~A[ 8];
+		kt = bnn & A[14];
+		c0 = A[ 2] ^ kt;
+		kt = A[14] | A[15];
+		c1 = bnn ^ kt;
+		kt = A[15] & A[21];
+		c2 = A[14] ^ kt;
+		kt = A[21] | A[ 2];
+		c3 = A[15] ^ kt;
+		kt = A[ 2] & A[ 8];
+		c4 = A[21] ^ kt;
+		A[ 2] = c0;
+		A[ 8] = c1;
+		A[14] = c2;
+		A[15] = c3;
+		A[21] = c4;
+		A[ 0] = A[ 0] ^ RC[j + 0];
+
+		tt0 = A[ 6] ^ A[ 9];
+		tt1 = A[ 7] ^ A[ 5];
+		tt0 ^= A[ 8] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[24] ^ A[22];
+		tt3 = A[20] ^ A[23];
+		tt0 ^= A[21];
+		tt2 ^= tt3;
+		t0 = tt0 ^ tt2;
+
+		tt0 = A[12] ^ A[10];
+		tt1 = A[13] ^ A[11];
+		tt0 ^= A[14] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 0] ^ A[ 3];
+		tt3 = A[ 1] ^ A[ 4];
+		tt0 ^= A[ 2];
+		tt2 ^= tt3;
+		t1 = tt0 ^ tt2;
+
+		tt0 = A[18] ^ A[16];
+		tt1 = A[19] ^ A[17];
+		tt0 ^= A[15] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 6] ^ A[ 9];
+		tt3 = A[ 7] ^ A[ 5];
+		tt0 ^= A[ 8];
+		tt2 ^= tt3;
+		t2 = tt0 ^ tt2;
+
+		tt0 = A[24] ^ A[22];
+		tt1 = A[20] ^ A[23];
+		tt0 ^= A[21] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[12] ^ A[10];
+		tt3 = A[13] ^ A[11];
+		tt0 ^= A[14];
+		tt2 ^= tt3;
+		t3 = tt0 ^ tt2;
+
+		tt0 = A[ 0] ^ A[ 3];
+		tt1 = A[ 1] ^ A[ 4];
+		tt0 ^= A[ 2] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[18] ^ A[16];
+		tt3 = A[19] ^ A[17];
+		tt0 ^= A[15];
+		tt2 ^= tt3;
+		t4 = tt0 ^ tt2;
+
+		A[ 0] = A[ 0] ^ t0;
+		A[ 3] = A[ 3] ^ t0;
+		A[ 1] = A[ 1] ^ t0;
+		A[ 4] = A[ 4] ^ t0;
+		A[ 2] = A[ 2] ^ t0;
+		A[ 6] = A[ 6] ^ t1;
+		A[ 9] = A[ 9] ^ t1;
+		A[ 7] = A[ 7] ^ t1;
+		A[ 5] = A[ 5] ^ t1;
+		A[ 8] = A[ 8] ^ t1;
+		A[12] = A[12] ^ t2;
+		A[10] = A[10] ^ t2;
+		A[13] = A[13] ^ t2;
+		A[11] = A[11] ^ t2;
+		A[14] = A[14] ^ t2;
+		A[18] = A[18] ^ t3;
+		A[16] = A[16] ^ t3;
+		A[19] = A[19] ^ t3;
+		A[17] = A[17] ^ t3;
+		A[15] = A[15] ^ t3;
+		A[24] = A[24] ^ t4;
+		A[22] = A[22] ^ t4;
+		A[20] = A[20] ^ t4;
+		A[23] = A[23] ^ t4;
+		A[21] = A[21] ^ t4;
+		A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
+		A[ 1] = (A[ 1] <<  3) | (A[ 1] >> (64 -  3));
+		A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
+		A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
+		A[ 6] = (A[ 6] <<  1) | (A[ 6] >> (64 -  1));
+		A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
+		A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
+		A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
+		A[ 8] = (A[ 8] <<  2) | (A[ 8] >> (64 - 2));
+		A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
+		A[10] = (A[10] <<  6) | (A[10] >> (64 -  6));
+		A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
+		A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
+		A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
+		A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
+		A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
+		A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
+		A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
+		A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
+		A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
+		A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
+		A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
+		A[23] = (A[23] <<  8) | (A[23] >> (64 -  8));
+		A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
+		bnn = ~A[13];
+		kt = A[ 9] | A[13];
+		c0 = A[ 0] ^ kt;
+		kt = bnn | A[17];
+		c1 = A[ 9] ^ kt;
+		kt = A[17] & A[21];
+		c2 = A[13] ^ kt;
+		kt = A[21] | A[ 0];
+		c3 = A[17] ^ kt;
+		kt = A[ 0] & A[ 9];
+		c4 = A[21] ^ kt;
+		A[ 0] = c0;
+		A[ 9] = c1;
+		A[13] = c2;
+		A[17] = c3;
+		A[21] = c4;
+		bnn = ~A[14];
+		kt = A[22] | A[ 1];
+		c0 = A[18] ^ kt;
+		kt = A[ 1] & A[ 5];
+		c1 = A[22] ^ kt;
+		kt = A[ 5] | bnn;
+		c2 = A[ 1] ^ kt;
+		kt = A[14] | A[18];
+		c3 = A[ 5] ^ kt;
+		kt = A[18] & A[22];
+		c4 = A[14] ^ kt;
+		A[18] = c0;
+		A[22] = c1;
+		A[ 1] = c2;
+		A[ 5] = c3;
+		A[14] = c4;
+		bnn = ~A[23];
+		kt = A[10] | A[19];
+		c0 = A[ 6] ^ kt;
+		kt = A[19] & A[23];
+		c1 = A[10] ^ kt;
+		kt = bnn & A[ 2];
+		c2 = A[19] ^ kt;
+		kt = A[ 2] | A[ 6];
+		c3 = bnn ^ kt;
+		kt = A[ 6] & A[10];
+		c4 = A[ 2] ^ kt;
+		A[ 6] = c0;
+		A[10] = c1;
+		A[19] = c2;
+		A[23] = c3;
+		A[ 2] = c4;
+		bnn = ~A[11];
+		kt = A[ 3] & A[ 7];
+		c0 = A[24] ^ kt;
+		kt = A[ 7] | A[11];
+		c1 = A[ 3] ^ kt;
+		kt = bnn | A[15];
+		c2 = A[ 7] ^ kt;
+		kt = A[15] & A[24];
+		c3 = bnn ^ kt;
+		kt = A[24] | A[ 3];
+		c4 = A[15] ^ kt;
+		A[24] = c0;
+		A[ 3] = c1;
+		A[ 7] = c2;
+		A[11] = c3;
+		A[15] = c4;
+		bnn = ~A[16];
+		kt = bnn & A[20];
+		c0 = A[12] ^ kt;
+		kt = A[20] | A[ 4];
+		c1 = bnn ^ kt;
+		kt = A[ 4] & A[ 8];
+		c2 = A[20] ^ kt;
+		kt = A[ 8] | A[12];
+		c3 = A[ 4] ^ kt;
+		kt = A[12] & A[16];
+		c4 = A[ 8] ^ kt;
+		A[12] = c0;
+		A[16] = c1;
+		A[20] = c2;
+		A[ 4] = c3;
+		A[ 8] = c4;
+		A[ 0] = A[ 0] ^ RC[j + 1];
+		t = A[ 5];
+		A[ 5] = A[18];
+		A[18] = A[11];
+		A[11] = A[10];
+		A[10] = A[ 6];
+		A[ 6] = A[22];
+		A[22] = A[20];
+		A[20] = A[12];
+		A[12] = A[19];
+		A[19] = A[15];
+		A[15] = A[24];
+		A[24] = A[ 8];
+		A[ 8] = t;
+		t = A[ 1];
+		A[ 1] = A[ 9];
+		A[ 9] = A[14];
+		A[14] = A[ 2];
+		A[ 2] = A[13];
+		A[13] = A[23];
+		A[23] = A[ 4];
+		A[ 4] = A[21];
+		A[21] = A[16];
+		A[16] = A[ 3];
+		A[ 3] = A[17];
+		A[17] = A[ 7];
+		A[ 7] = t;
+	}
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_init(br_shake_context *sc, int security_level)
+{
+	sc->rate = 200 - (size_t)(security_level >> 2);
+	sc->dptr = 0;
+	memset(sc->A, 0, sizeof sc->A);
+	sc->A[ 1] = ~(uint64_t)0;
+	sc->A[ 2] = ~(uint64_t)0;
+	sc->A[ 8] = ~(uint64_t)0;
+	sc->A[12] = ~(uint64_t)0;
+	sc->A[17] = ~(uint64_t)0;
+	sc->A[20] = ~(uint64_t)0;
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_inject(br_shake_context *sc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t rate, dptr;
+
+	buf = data;
+	rate = sc->rate;
+	dptr = sc->dptr;
+	while (len > 0) {
+		size_t clen;
+
+		clen = rate - dptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(sc->dbuf + dptr, buf, clen);
+		dptr += clen;
+		buf += clen;
+		len -= clen;
+		if (dptr == rate) {
+			xor_block(sc->A, sc->dbuf, rate);
+			process_block(sc->A);
+			dptr = 0;
+		}
+	}
+	sc->dptr = dptr;
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_flip(br_shake_context *sc)
+{
+	/*
+	 * We apply padding and pre-XOR the value into the state. We
+	 * set dptr to the end of the buffer, so that first call to
+	 * shake_extract() will process the block.
+	 */
+	if ((sc->dptr + 1) == sc->rate) {
+		sc->dbuf[sc->dptr ++] = 0x9F;
+	} else {
+		sc->dbuf[sc->dptr ++] = 0x1F;
+		memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1);
+		sc->dbuf[sc->rate - 1] = 0x80;
+		sc->dptr = sc->rate;
+	}
+	xor_block(sc->A, sc->dbuf, sc->rate);
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_produce(br_shake_context *sc, void *out, size_t len)
+{
+	unsigned char *buf;
+	size_t dptr, rate;
+
+	buf = out;
+	dptr = sc->dptr;
+	rate = sc->rate;
+	while (len > 0) {
+		size_t clen;
+
+		if (dptr == rate) {
+			unsigned char *dbuf;
+			uint64_t *A;
+
+			A = sc->A;
+			dbuf = sc->dbuf;
+			process_block(A);
+			br_enc64le(dbuf +   0,  A[ 0]);
+			br_enc64le(dbuf +   8, ~A[ 1]);
+			br_enc64le(dbuf +  16, ~A[ 2]);
+			br_enc64le(dbuf +  24,  A[ 3]);
+			br_enc64le(dbuf +  32,  A[ 4]);
+			br_enc64le(dbuf +  40,  A[ 5]);
+			br_enc64le(dbuf +  48,  A[ 6]);
+			br_enc64le(dbuf +  56,  A[ 7]);
+			br_enc64le(dbuf +  64, ~A[ 8]);
+			br_enc64le(dbuf +  72,  A[ 9]);
+			br_enc64le(dbuf +  80,  A[10]);
+			br_enc64le(dbuf +  88,  A[11]);
+			br_enc64le(dbuf +  96, ~A[12]);
+			br_enc64le(dbuf + 104,  A[13]);
+			br_enc64le(dbuf + 112,  A[14]);
+			br_enc64le(dbuf + 120,  A[15]);
+			br_enc64le(dbuf + 128,  A[16]);
+			br_enc64le(dbuf + 136, ~A[17]);
+			br_enc64le(dbuf + 144,  A[18]);
+			br_enc64le(dbuf + 152,  A[19]);
+			br_enc64le(dbuf + 160, ~A[20]);
+			br_enc64le(dbuf + 168,  A[21]);
+			br_enc64le(dbuf + 176,  A[22]);
+			br_enc64le(dbuf + 184,  A[23]);
+			br_enc64le(dbuf + 192,  A[24]);
+			dptr = 0;
+		}
+		clen = rate - dptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(buf, sc->dbuf + dptr, clen);
+		dptr += clen;
+		buf += clen;
+		len -= clen;
+	}
+	sc->dptr = dptr;
+}
diff --git a/third_party/bearssl/src/skey_decoder.c b/third_party/bearssl/src/skey_decoder.c
new file mode 100644
index 0000000..9e285d7
--- /dev/null
+++ b/third_party/bearssl/src/skey_decoder.c
@@ -0,0 +1,650 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_skey_decoder_init_main(void *t0ctx);
+
+void br_skey_decoder_run(void *t0ctx);
+
+
+
+#include "inner.h"
+
+
+
+
+
+#include "inner.h"
+
+#define CTX   ((br_skey_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_skey_decoder_context, cpu)))
+#define CONTEXT_NAME   br_skey_decoder_context
+
+/* see bearssl_x509.h */
+void
+br_skey_decoder_init(br_skey_decoder_context *ctx)
+{
+	memset(ctx, 0, sizeof *ctx);
+	ctx->cpu.dp = &ctx->dp_stack[0];
+	ctx->cpu.rp = &ctx->rp_stack[0];
+	br_skey_decoder_init_main(&ctx->cpu);
+	br_skey_decoder_run(&ctx->cpu);
+}
+
+/* see bearssl_x509.h */
+void
+br_skey_decoder_push(br_skey_decoder_context *ctx,
+	const void *data, size_t len)
+{
+	ctx->hbuf = data;
+	ctx->hlen = len;
+	br_skey_decoder_run(&ctx->cpu);
+}
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x07,
+	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE,
+	0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2B,
+	0x81, 0x04, 0x00, 0x23
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x01, 0x07, 0x00, 0x00, 0x01, 0x01, 0x08, 0x00, 0x00, 0x13,
+	0x13, 0x00, 0x00, 0x01, T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00,
+	0x01, T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INVALID_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_data)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_type)), 0x00, 0x00,
+	0x33, 0x48, 0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, pad)),
+	0x00, 0x00, 0x01, 0x13, 0x00, 0x00, 0x01, 0x1C, 0x00, 0x00, 0x01, 0x22,
+	0x00, 0x00, 0x05, 0x02, 0x2C, 0x16, 0x00, 0x00, 0x06, 0x02, 0x2D, 0x16,
+	0x00, 0x00, 0x01, 0x10, 0x3D, 0x00, 0x00, 0x0D, 0x05, 0x02, 0x2F, 0x16,
+	0x3A, 0x00, 0x00, 0x0D, 0x05, 0x02, 0x2F, 0x16, 0x3B, 0x00, 0x00, 0x06,
+	0x02, 0x27, 0x16, 0x00, 0x01, 0x03, 0x00, 0x54, 0x57, 0x01, 0x02, 0x3E,
+	0x55, 0x23, 0x06, 0x02, 0x30, 0x16, 0x57, 0x01, 0x04, 0x3E, 0x02, 0x00,
+	0x41, 0x3F, 0x00, 0x02, 0x03, 0x00, 0x53, 0x14, 0x14, 0x03, 0x01, 0x48,
+	0x0E, 0x06, 0x02, 0x30, 0x16, 0x33, 0x4C, 0x58, 0x01, 0x7F, 0x19, 0x0D,
+	0x06, 0x04, 0x13, 0x13, 0x04, 0x29, 0x01, 0x20, 0x19, 0x0D, 0x06, 0x16,
+	0x13, 0x3A, 0x53, 0x4D, 0x02, 0x00, 0x06, 0x09, 0x02, 0x00, 0x0C, 0x06,
+	0x02, 0x2A, 0x16, 0x04, 0x02, 0x03, 0x00, 0x3F, 0x04, 0x0D, 0x01, 0x21,
+	0x19, 0x0D, 0x06, 0x04, 0x13, 0x3A, 0x04, 0x03, 0x30, 0x16, 0x13, 0x5D,
+	0x02, 0x00, 0x05, 0x02, 0x30, 0x16, 0x02, 0x00, 0x02, 0x01, 0x1D, 0x00,
+	0x02, 0x53, 0x4B, 0x05, 0x02, 0x30, 0x16, 0x5B, 0x15, 0x06, 0x07, 0x5D,
+	0x01, 0x7F, 0x03, 0x01, 0x04, 0x16, 0x46, 0x15, 0x06, 0x10, 0x01, 0x00,
+	0x03, 0x01, 0x14, 0x06, 0x03, 0x4D, 0x04, 0x02, 0x01, 0x00, 0x03, 0x00,
+	0x04, 0x02, 0x30, 0x16, 0x3F, 0x57, 0x01, 0x04, 0x3E, 0x53, 0x02, 0x01,
+	0x06, 0x03, 0x43, 0x04, 0x03, 0x02, 0x00, 0x40, 0x3F, 0x5D, 0x02, 0x01,
+	0x06, 0x03, 0x32, 0x04, 0x01, 0x31, 0x00, 0x00, 0x54, 0x57, 0x01, 0x02,
+	0x3E, 0x55, 0x06, 0x02, 0x30, 0x16, 0x57, 0x01, 0x02, 0x3E, 0x44, 0x3F,
+	0x00, 0x07, 0x35, 0x50, 0x14, 0x05, 0x02, 0x2F, 0x16, 0x23, 0x01, 0x03,
+	0x0B, 0x33, 0x17, 0x47, 0x07, 0x03, 0x00, 0x4F, 0x4F, 0x35, 0x4E, 0x14,
+	0x14, 0x03, 0x01, 0x03, 0x02, 0x51, 0x14, 0x03, 0x03, 0x02, 0x02, 0x07,
+	0x14, 0x03, 0x02, 0x51, 0x14, 0x03, 0x04, 0x02, 0x02, 0x07, 0x14, 0x03,
+	0x02, 0x51, 0x14, 0x03, 0x05, 0x02, 0x02, 0x07, 0x14, 0x03, 0x02, 0x51,
+	0x03, 0x06, 0x02, 0x00, 0x02, 0x01, 0x02, 0x03, 0x02, 0x04, 0x02, 0x05,
+	0x02, 0x06, 0x1E, 0x00, 0x00, 0x19, 0x19, 0x00, 0x00, 0x01, 0x0B, 0x00,
+	0x00, 0x01, 0x00, 0x20, 0x14, 0x06, 0x08, 0x01, 0x01, 0x21, 0x20, 0x22,
+	0x20, 0x04, 0x75, 0x13, 0x00, 0x00, 0x01,
+	T0_INT2(3 * BR_X509_BUFSIZE_SIG), 0x00, 0x01, 0x01, 0x87, 0xFF, 0xFF,
+	0x7F, 0x54, 0x57, 0x01, 0x02, 0x3E, 0x55, 0x01, 0x01, 0x0E, 0x06, 0x02,
+	0x30, 0x16, 0x57, 0x01, 0x02, 0x19, 0x0D, 0x06, 0x06, 0x13, 0x3B, 0x44,
+	0x32, 0x04, 0x1C, 0x01, 0x04, 0x19, 0x0D, 0x06, 0x08, 0x13, 0x3B, 0x01,
+	0x00, 0x41, 0x31, 0x04, 0x0E, 0x01, 0x10, 0x19, 0x0D, 0x06, 0x05, 0x13,
+	0x3A, 0x42, 0x04, 0x03, 0x30, 0x16, 0x13, 0x03, 0x00, 0x3F, 0x02, 0x00,
+	0x34, 0x1F, 0x5A, 0x27, 0x16, 0x00, 0x01, 0x45, 0x0A, 0x06, 0x02, 0x29,
+	0x16, 0x14, 0x03, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x57, 0x01, 0x06,
+	0x3E, 0x56, 0x00, 0x00, 0x20, 0x14, 0x06, 0x07, 0x1A, 0x14, 0x06, 0x01,
+	0x12, 0x04, 0x76, 0x24, 0x00, 0x00, 0x4B, 0x05, 0x02, 0x30, 0x16, 0x37,
+	0x15, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x38, 0x15, 0x06, 0x04, 0x01,
+	0x18, 0x04, 0x0A, 0x39, 0x15, 0x06, 0x04, 0x01, 0x19, 0x04, 0x02, 0x30,
+	0x16, 0x00, 0x00, 0x1C, 0x57, 0x01, 0x02, 0x3E, 0x09, 0x50, 0x00, 0x00,
+	0x35, 0x4E, 0x13, 0x00, 0x03, 0x14, 0x03, 0x00, 0x03, 0x01, 0x03, 0x02,
+	0x53, 0x59, 0x14, 0x01, 0x81, 0x00, 0x0F, 0x06, 0x02, 0x2E, 0x16, 0x14,
+	0x01, 0x00, 0x0D, 0x06, 0x0B, 0x13, 0x14, 0x05, 0x04, 0x13, 0x01, 0x00,
+	0x00, 0x59, 0x04, 0x6F, 0x02, 0x01, 0x14, 0x05, 0x02, 0x2B, 0x16, 0x23,
+	0x03, 0x01, 0x02, 0x02, 0x1F, 0x02, 0x02, 0x22, 0x03, 0x02, 0x14, 0x06,
+	0x03, 0x59, 0x04, 0x68, 0x13, 0x02, 0x00, 0x02, 0x01, 0x08, 0x00, 0x00,
+	0x14, 0x35, 0x1C, 0x08, 0x20, 0x1C, 0x07, 0x20, 0x4E, 0x00, 0x01, 0x59,
+	0x14, 0x01, 0x81, 0x00, 0x0A, 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x08,
+	0x14, 0x05, 0x02, 0x28, 0x16, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01,
+	0x00, 0x0E, 0x06, 0x19, 0x02, 0x00, 0x23, 0x03, 0x00, 0x14, 0x01, 0x83,
+	0xFF, 0xFF, 0x7F, 0x0E, 0x06, 0x02, 0x29, 0x16, 0x01, 0x08, 0x0B, 0x20,
+	0x59, 0x1C, 0x07, 0x04, 0x60, 0x00, 0x00, 0x52, 0x4A, 0x00, 0x00, 0x57,
+	0x3C, 0x53, 0x00, 0x01, 0x53, 0x14, 0x05, 0x02, 0x2E, 0x16, 0x59, 0x14,
+	0x01, 0x81, 0x00, 0x0F, 0x06, 0x02, 0x2E, 0x16, 0x03, 0x00, 0x14, 0x06,
+	0x16, 0x59, 0x02, 0x00, 0x14, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x0F, 0x06,
+	0x02, 0x2E, 0x16, 0x01, 0x08, 0x0B, 0x07, 0x03, 0x00, 0x04, 0x67, 0x13,
+	0x02, 0x00, 0x00, 0x00, 0x53, 0x14, 0x01, 0x81, 0x7F, 0x0E, 0x06, 0x08,
+	0x5C, 0x01, 0x00, 0x36, 0x1F, 0x01, 0x00, 0x00, 0x14, 0x36, 0x1F, 0x36,
+	0x22, 0x4C, 0x01, 0x7F, 0x00, 0x01, 0x59, 0x03, 0x00, 0x02, 0x00, 0x01,
+	0x05, 0x10, 0x01, 0x01, 0x11, 0x18, 0x02, 0x00, 0x01, 0x06, 0x10, 0x14,
+	0x01, 0x01, 0x11, 0x06, 0x02, 0x25, 0x16, 0x01, 0x04, 0x0B, 0x02, 0x00,
+	0x01, 0x1F, 0x11, 0x14, 0x01, 0x1F, 0x0D, 0x06, 0x02, 0x26, 0x16, 0x07,
+	0x00, 0x00, 0x14, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, 0x00, 0x57, 0x00,
+	0x00, 0x14, 0x05, 0x02, 0x29, 0x16, 0x23, 0x5A, 0x00, 0x00, 0x1B, 0x14,
+	0x01, 0x00, 0x0F, 0x06, 0x01, 0x00, 0x13, 0x12, 0x04, 0x74, 0x00, 0x01,
+	0x01, 0x00, 0x00, 0x5D, 0x13, 0x00, 0x00, 0x14, 0x06, 0x07, 0x5E, 0x14,
+	0x06, 0x01, 0x12, 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1A, 0x09,
+	0x24, 0x00
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	14,
+	18,
+	22,
+	26,
+	30,
+	34,
+	38,
+	42,
+	46,
+	50,
+	54,
+	58,
+	62,
+	66,
+	70,
+	75,
+	80,
+	84,
+	89,
+	93,
+	97,
+	101,
+	107,
+	113,
+	118,
+	126,
+	134,
+	140,
+	163,
+	244,
+	311,
+	329,
+	404,
+	408,
+	412,
+	429,
+	434,
+	505,
+	519,
+	526,
+	540,
+	573,
+	582,
+	587,
+	654,
+	665,
+	721,
+	725,
+	730,
+	778,
+	804,
+	848,
+	859,
+	868,
+	881,
+	885,
+	889,
+	901
+};
+
+#define T0_INTERPRETED   34
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_skey_decoder_init_main, 73)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_skey_decoder_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 8: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 9: {
+				/* -rot */
+ T0_NROT(); 
+				}
+				break;
+			case 10: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 11: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 12: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 13: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 14: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 15: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 16: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 17: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 18: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 19: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 20: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 21: {
+				/* eqOID */
+
+	const unsigned char *a2 = &t0_datablock[T0_POP()];
+	const unsigned char *a1 = &CTX->pad[0];
+	size_t len = a1[0];
+	int x;
+	if (len == a2[0]) {
+		x = -(memcmp(a1 + 1, a2 + 1, len) == 0);
+	} else {
+		x = 0;
+	}
+	T0_PUSH((uint32_t)x);
+
+				}
+				break;
+			case 22: {
+				/* fail */
+
+	CTX->err = T0_POPi();
+	T0_CO();
+
+				}
+				break;
+			case 23: {
+				/* get8 */
+
+	uint32_t addr = T0_POP();
+	T0_PUSH(*((unsigned char *)CTX + addr));
+
+				}
+				break;
+			case 24: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 25: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 26: {
+				/* read-blob-inner */
+
+	uint32_t len = T0_POP();
+	uint32_t addr = T0_POP();
+	size_t clen = CTX->hlen;
+	if (clen > len) {
+		clen = (size_t)len;
+	}
+	if (addr != 0) {
+		memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen);
+	}
+	CTX->hbuf += clen;
+	CTX->hlen -= clen;
+	T0_PUSH(addr + clen);
+	T0_PUSH(len - clen);
+
+				}
+				break;
+			case 27: {
+				/* read8-low */
+
+	if (CTX->hlen == 0) {
+		T0_PUSHi(-1);
+	} else {
+		CTX->hlen --;
+		T0_PUSH(*CTX->hbuf ++);
+	}
+
+				}
+				break;
+			case 28: {
+				/* rot */
+ T0_ROT(); 
+				}
+				break;
+			case 29: {
+				/* set-ec-key */
+
+	size_t xlen = T0_POP();
+	uint32_t curve = T0_POP();
+	CTX->key.ec.curve = curve;
+	CTX->key.ec.x = CTX->key_data;
+	CTX->key.ec.xlen = xlen;
+
+				}
+				break;
+			case 30: {
+				/* set-rsa-key */
+
+	size_t iqlen = T0_POP();
+	size_t dqlen = T0_POP();
+	size_t dplen = T0_POP();
+	size_t qlen = T0_POP();
+	size_t plen = T0_POP();
+	uint32_t n_bitlen = T0_POP();
+	size_t off;
+
+	CTX->key.rsa.n_bitlen = n_bitlen;
+	CTX->key.rsa.p = CTX->key_data;
+	CTX->key.rsa.plen = plen;
+	off = plen;
+	CTX->key.rsa.q = CTX->key_data + off;
+	CTX->key.rsa.qlen = qlen;
+	off += qlen;
+	CTX->key.rsa.dp = CTX->key_data + off;
+	CTX->key.rsa.dplen = dplen;
+	off += dplen;
+	CTX->key.rsa.dq = CTX->key_data + off;
+	CTX->key.rsa.dqlen = dqlen;
+	off += dqlen;
+	CTX->key.rsa.iq = CTX->key_data + off;
+	CTX->key.rsa.iqlen = iqlen;
+
+				}
+				break;
+			case 31: {
+				/* set8 */
+
+	uint32_t addr = T0_POP();
+	*((unsigned char *)CTX + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 32: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			case 33: {
+				/* u>> */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x >> c);
+
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
diff --git a/third_party/bearssl/src/ssl_ccert_single_ec.c b/third_party/bearssl/src/ssl_ccert_single_ec.c
new file mode 100644
index 0000000..93ebcde
--- /dev/null
+++ b/third_party/bearssl/src/ssl_ccert_single_ec.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+cc_none0(const br_ssl_client_certificate_class **pctx)
+{
+	(void)pctx;
+}
+
+static void
+cc_none1(const br_ssl_client_certificate_class **pctx, size_t len)
+{
+	(void)pctx;
+	(void)len;
+}
+
+static void
+cc_none2(const br_ssl_client_certificate_class **pctx,
+	const unsigned char *data, size_t len)
+{
+	(void)pctx;
+	(void)data;
+	(void)len;
+}
+
+static void
+cc_choose(const br_ssl_client_certificate_class **pctx,
+	const br_ssl_client_context *cc, uint32_t auth_types,
+	br_ssl_client_certificate *choices)
+{
+	br_ssl_client_certificate_ec_context *zc;
+	int x;
+	int scurve;
+
+	zc = (br_ssl_client_certificate_ec_context *)pctx;
+	scurve = br_ssl_client_get_server_curve(cc);
+
+	if ((zc->allowed_usages & BR_KEYTYPE_KEYX) != 0
+		&& scurve == zc->sk->curve)
+	{
+		int x;
+
+		x = (zc->issuer_key_type == BR_KEYTYPE_RSA) ? 16 : 17;
+		if (((auth_types >> x) & 1) != 0) {
+			choices->auth_type = BR_AUTH_ECDH;
+			choices->hash_id = -1;
+			choices->chain = zc->chain;
+			choices->chain_len = zc->chain_len;
+		}
+	}
+
+	/*
+	 * For ECDSA authentication, we must choose an appropriate
+	 * hash function.
+	 */
+	x = br_ssl_choose_hash((unsigned)(auth_types >> 8));
+	if (x == 0 || (zc->allowed_usages & BR_KEYTYPE_SIGN) == 0) {
+		memset(choices, 0, sizeof *choices);
+		return;
+	}
+	choices->auth_type = BR_AUTH_ECDSA;
+	choices->hash_id = x;
+	choices->chain = zc->chain;
+	choices->chain_len = zc->chain_len;
+}
+
+static uint32_t
+cc_do_keyx(const br_ssl_client_certificate_class **pctx,
+	unsigned char *data, size_t *len)
+{
+	br_ssl_client_certificate_ec_context *zc;
+	uint32_t r;
+	size_t xoff, xlen;
+
+	zc = (br_ssl_client_certificate_ec_context *)pctx;
+	r = zc->iec->mul(data, *len, zc->sk->x, zc->sk->xlen, zc->sk->curve);
+	xoff = zc->iec->xoff(zc->sk->curve, &xlen);
+	memmove(data, data + xoff, xlen);
+	*len = xlen;
+	return r;
+}
+
+static size_t
+cc_do_sign(const br_ssl_client_certificate_class **pctx,
+	int hash_id, size_t hv_len, unsigned char *data, size_t len)
+{
+	br_ssl_client_certificate_ec_context *zc;
+	unsigned char hv[64];
+	const br_hash_class *hc;
+
+	zc = (br_ssl_client_certificate_ec_context *)pctx;
+	memcpy(hv, data, hv_len);
+	hc = br_multihash_getimpl(zc->mhash, hash_id);
+	if (hc == NULL) {
+		return 0;
+	}
+	if (len < 139) {
+		return 0;
+	}
+	return zc->iecdsa(zc->iec, hc, hv, zc->sk, data);
+}
+
+static const br_ssl_client_certificate_class ccert_vtable = {
+	sizeof(br_ssl_client_certificate_ec_context),
+	cc_none0, /* start_name_list */
+	cc_none1, /* start_name */
+	cc_none2, /* append_name */
+	cc_none0, /* end_name */
+	cc_none0, /* end_name_list */
+	cc_choose,
+	cc_do_keyx,
+	cc_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_set_single_ec(br_ssl_client_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk, unsigned allowed_usages,
+	unsigned cert_issuer_key_type,
+	const br_ec_impl *iec, br_ecdsa_sign iecdsa)
+{
+	cc->client_auth.single_ec.vtable = &ccert_vtable;
+	cc->client_auth.single_ec.chain = chain;
+	cc->client_auth.single_ec.chain_len = chain_len;
+	cc->client_auth.single_ec.sk = sk;
+	cc->client_auth.single_ec.allowed_usages = allowed_usages;
+	cc->client_auth.single_ec.issuer_key_type = cert_issuer_key_type;
+	cc->client_auth.single_ec.mhash = &cc->eng.mhash;
+	cc->client_auth.single_ec.iec = iec;
+	cc->client_auth.single_ec.iecdsa = iecdsa;
+	cc->client_auth_vtable = &cc->client_auth.single_ec.vtable;
+}
diff --git a/third_party/bearssl/src/ssl_ccert_single_rsa.c b/third_party/bearssl/src/ssl_ccert_single_rsa.c
new file mode 100644
index 0000000..690df20
--- /dev/null
+++ b/third_party/bearssl/src/ssl_ccert_single_rsa.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+cc_none0(const br_ssl_client_certificate_class **pctx)
+{
+	(void)pctx;
+}
+
+static void
+cc_none1(const br_ssl_client_certificate_class **pctx, size_t len)
+{
+	(void)pctx;
+	(void)len;
+}
+
+static void
+cc_none2(const br_ssl_client_certificate_class **pctx,
+	const unsigned char *data, size_t len)
+{
+	(void)pctx;
+	(void)data;
+	(void)len;
+}
+
+static void
+cc_choose(const br_ssl_client_certificate_class **pctx,
+	const br_ssl_client_context *cc, uint32_t auth_types,
+	br_ssl_client_certificate *choices)
+{
+	br_ssl_client_certificate_rsa_context *zc;
+	int x;
+
+	(void)cc;
+	zc = (br_ssl_client_certificate_rsa_context *)pctx;
+	x = br_ssl_choose_hash((unsigned)auth_types);
+	if (x == 0 && (auth_types & 1) == 0) {
+		memset(choices, 0, sizeof *choices);
+	}
+	choices->auth_type = BR_AUTH_RSA;
+	choices->hash_id = x;
+	choices->chain = zc->chain;
+	choices->chain_len = zc->chain_len;
+}
+
+/*
+ * OID for hash functions in RSA signatures.
+ */
+static const unsigned char HASH_OID_SHA1[] = {
+	0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char HASH_OID_SHA224[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char HASH_OID_SHA256[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char HASH_OID_SHA384[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char HASH_OID_SHA512[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+static const unsigned char *HASH_OID[] = {
+	HASH_OID_SHA1,
+	HASH_OID_SHA224,
+	HASH_OID_SHA256,
+	HASH_OID_SHA384,
+	HASH_OID_SHA512
+};
+
+static size_t
+cc_do_sign(const br_ssl_client_certificate_class **pctx,
+	int hash_id, size_t hv_len, unsigned char *data, size_t len)
+{
+	br_ssl_client_certificate_rsa_context *zc;
+	unsigned char hv[64];
+	const unsigned char *hash_oid;
+	size_t sig_len;
+
+	zc = (br_ssl_client_certificate_rsa_context *)pctx;
+	memcpy(hv, data, hv_len);
+	if (hash_id == 0) {
+		hash_oid = NULL;
+	} else if (hash_id >= 2 && hash_id <= 6) {
+		hash_oid = HASH_OID[hash_id - 2];
+	} else {
+		return 0;
+	}
+	sig_len = (zc->sk->n_bitlen + 7) >> 3;
+	if (len < sig_len) {
+		return 0;
+	}
+	return zc->irsasign(hash_oid, hv, hv_len, zc->sk, data) ? sig_len : 0;
+}
+
+static const br_ssl_client_certificate_class ccert_vtable = {
+	sizeof(br_ssl_client_certificate_rsa_context),
+	cc_none0, /* start_name_list */
+	cc_none1, /* start_name */
+	cc_none2, /* append_name */
+	cc_none0, /* end_name */
+	cc_none0, /* end_name_list */
+	cc_choose,
+	0,
+	cc_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_set_single_rsa(br_ssl_client_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk, br_rsa_pkcs1_sign irsasign)
+{
+	cc->client_auth.single_rsa.vtable = &ccert_vtable;
+	cc->client_auth.single_rsa.chain = chain;
+	cc->client_auth.single_rsa.chain_len = chain_len;
+	cc->client_auth.single_rsa.sk = sk;
+	cc->client_auth.single_rsa.irsasign = irsasign;
+	cc->client_auth_vtable = &cc->client_auth.single_rsa.vtable;
+}
diff --git a/third_party/bearssl/src/ssl_client.c b/third_party/bearssl/src/ssl_client.c
new file mode 100644
index 0000000..28c404b
--- /dev/null
+++ b/third_party/bearssl/src/ssl_client.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_zero(br_ssl_client_context *cc)
+{
+	/*
+	 * For really standard C, we should explicitly set to NULL all
+	 * pointers, and 0 all other fields. However, on all our target
+	 * architectures, a direct memset() will work, be faster, and
+	 * use a lot less code.
+	 */
+	memset(cc, 0, sizeof *cc);
+}
+
+/* see bearssl_ssl.h */
+int
+br_ssl_client_reset(br_ssl_client_context *cc,
+	const char *server_name, int resume_session)
+{
+	size_t n;
+
+	br_ssl_engine_set_buffer(&cc->eng, NULL, 0, 0);
+	cc->eng.version_out = cc->eng.version_min;
+	if (!resume_session) {
+		br_ssl_client_forget_session(cc);
+	}
+	if (!br_ssl_engine_init_rand(&cc->eng)) {
+		return 0;
+	}
+
+	/*
+	 * We always set back the "reneg" flag to 0 because we use it
+	 * to distinguish between first handshake and renegotiation.
+	 * Note that "renegotiation" and "session resumption" are two
+	 * different things.
+	 */
+	cc->eng.reneg = 0;
+
+	if (server_name == NULL) {
+		cc->eng.server_name[0] = 0;
+	} else {
+		n = strlen(server_name) + 1;
+		if (n > sizeof cc->eng.server_name) {
+			br_ssl_engine_fail(&cc->eng, BR_ERR_BAD_PARAM);
+			return 0;
+		}
+		memcpy(cc->eng.server_name, server_name, n);
+	}
+
+	br_ssl_engine_hs_reset(&cc->eng,
+		br_ssl_hs_client_init_main, br_ssl_hs_client_run);
+	return br_ssl_engine_last_error(&cc->eng) == BR_ERR_OK;
+}
diff --git a/third_party/bearssl/src/ssl_client_default_rsapub.c b/third_party/bearssl/src/ssl_client_default_rsapub.c
new file mode 100644
index 0000000..2cdaab8
--- /dev/null
+++ b/third_party/bearssl/src/ssl_client_default_rsapub.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_set_default_rsapub(br_ssl_client_context *cc)
+{
+	br_ssl_client_set_rsapub(cc, br_rsa_public_get_default());
+}
diff --git a/third_party/bearssl/src/ssl_client_full.c b/third_party/bearssl/src/ssl_client_full.c
new file mode 100644
index 0000000..9814349
--- /dev/null
+++ b/third_party/bearssl/src/ssl_client_full.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_init_full(br_ssl_client_context *cc,
+	br_x509_minimal_context *xc,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num)
+{
+	/*
+	 * The "full" profile supports all implemented cipher suites.
+	 *
+	 * Rationale for suite order, from most important to least
+	 * important rule:
+	 *
+	 * -- Don't use 3DES if AES or ChaCha20 is available.
+	 * -- Try to have Forward Secrecy (ECDHE suite) if possible.
+	 * -- When not using Forward Secrecy, ECDH key exchange is
+	 *    better than RSA key exchange (slightly more expensive on the
+	 *    client, but much cheaper on the server, and it implies smaller
+	 *    messages).
+	 * -- ChaCha20+Poly1305 is better than AES/GCM (faster, smaller code).
+	 * -- GCM is better than CCM and CBC. CCM is better than CBC.
+	 * -- CCM is preferable over CCM_8 (with CCM_8, forgeries may succeed
+	 *    with probability 2^(-64)).
+	 * -- AES-128 is preferred over AES-256 (AES-128 is already
+	 *    strong enough, and AES-256 is 40% more expensive).
+	 */
+	static const uint16_t suites[] = {
+		BR_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,
+		BR_TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_RSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_RSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_RSA_WITH_AES_128_CCM,
+		BR_TLS_RSA_WITH_AES_256_CCM,
+		BR_TLS_RSA_WITH_AES_128_CCM_8,
+		BR_TLS_RSA_WITH_AES_256_CCM_8,
+		BR_TLS_RSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_RSA_WITH_AES_256_CBC_SHA256,
+		BR_TLS_RSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_RSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_RSA_WITH_3DES_EDE_CBC_SHA
+	};
+
+	/*
+	 * All hash functions are activated.
+	 * Note: the X.509 validation engine will nonetheless refuse to
+	 * validate signatures that use MD5 as hash function.
+	 */
+	static const br_hash_class *hashes[] = {
+		&br_md5_vtable,
+		&br_sha1_vtable,
+		&br_sha224_vtable,
+		&br_sha256_vtable,
+		&br_sha384_vtable,
+		&br_sha512_vtable
+	};
+
+	int id;
+
+	/*
+	 * Reset client context and set supported versions from TLS-1.0
+	 * to TLS-1.2 (inclusive).
+	 */
+	br_ssl_client_zero(cc);
+	br_ssl_engine_set_versions(&cc->eng, BR_TLS10, BR_TLS12);
+
+	/*
+	 * X.509 engine uses SHA-256 to hash certificate DN (for
+	 * comparisons).
+	 */
+	br_x509_minimal_init(xc, &br_sha256_vtable,
+		trust_anchors, trust_anchors_num);
+
+	/*
+	 * Set suites and asymmetric crypto implementations. We use the
+	 * "i31" code for RSA (it is somewhat faster than the "i32"
+	 * implementation).
+	 * TODO: change that when better implementations are made available.
+	 */
+	br_ssl_engine_set_suites(&cc->eng, suites,
+		(sizeof suites) / (sizeof suites[0]));
+	br_ssl_client_set_default_rsapub(cc);
+	br_ssl_engine_set_default_rsavrfy(&cc->eng);
+	br_ssl_engine_set_default_ecdsa(&cc->eng);
+	br_x509_minimal_set_rsa(xc, br_ssl_engine_get_rsavrfy(&cc->eng));
+	br_x509_minimal_set_ecdsa(xc,
+		br_ssl_engine_get_ec(&cc->eng),
+		br_ssl_engine_get_ecdsa(&cc->eng));
+
+	/*
+	 * Set supported hash functions, for the SSL engine and for the
+	 * X.509 engine.
+	 */
+	for (id = br_md5_ID; id <= br_sha512_ID; id ++) {
+		const br_hash_class *hc;
+
+		hc = hashes[id - 1];
+		br_ssl_engine_set_hash(&cc->eng, id, hc);
+		br_x509_minimal_set_hash(xc, id, hc);
+	}
+
+	/*
+	 * Link the X.509 engine in the SSL engine.
+	 */
+	br_ssl_engine_set_x509(&cc->eng, &xc->vtable);
+
+	/*
+	 * Set the PRF implementations.
+	 */
+	br_ssl_engine_set_prf10(&cc->eng, &br_tls10_prf);
+	br_ssl_engine_set_prf_sha256(&cc->eng, &br_tls12_sha256_prf);
+	br_ssl_engine_set_prf_sha384(&cc->eng, &br_tls12_sha384_prf);
+
+	/*
+	 * Symmetric encryption. We use the "default" implementations
+	 * (fastest among constant-time implementations).
+	 */
+	br_ssl_engine_set_default_aes_cbc(&cc->eng);
+	br_ssl_engine_set_default_aes_ccm(&cc->eng);
+	br_ssl_engine_set_default_aes_gcm(&cc->eng);
+	br_ssl_engine_set_default_des_cbc(&cc->eng);
+	br_ssl_engine_set_default_chapol(&cc->eng);
+}
diff --git a/third_party/bearssl/src/ssl_engine.c b/third_party/bearssl/src/ssl_engine.c
new file mode 100644
index 0000000..f59fe1a
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine.c
@@ -0,0 +1,1584 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if 0
+/* obsolete */
+
+/*
+ * If BR_USE_URANDOM is not defined, then try to autodetect its presence
+ * through compiler macros.
+ */
+#ifndef BR_USE_URANDOM
+
+/*
+ * Macro values documented on:
+ *    https://sourceforge.net/p/predef/wiki/OperatingSystems/
+ *
+ * Only the most common systems have been included here for now. This
+ * should be enriched later on.
+ */
+#if defined _AIX \
+	|| defined __ANDROID__ \
+	|| defined __FreeBSD__ \
+	|| defined __NetBSD__ \
+	|| defined __OpenBSD__ \
+	|| defined __DragonFly__ \
+	|| defined __linux__ \
+	|| (defined __sun && (defined __SVR4 || defined __svr4__)) \
+	|| (defined __APPLE__ && defined __MACH__)
+#define BR_USE_URANDOM   1
+#endif
+
+#endif
+
+/*
+ * If BR_USE_WIN32_RAND is not defined, perform autodetection here.
+ */
+#ifndef BR_USE_WIN32_RAND
+
+#if defined _WIN32 || defined _WIN64
+#define BR_USE_WIN32_RAND   1
+#endif
+
+#endif
+
+#if BR_USE_URANDOM
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#endif
+
+#if BR_USE_WIN32_RAND
+#include <windows.h>
+#include <wincrypt.h>
+#pragma comment(lib, "advapi32")
+#endif
+
+#endif
+
+/* ==================================================================== */
+/*
+ * This part of the file does the low-level record management.
+ */
+
+/*
+ * IMPLEMENTATION NOTES
+ * ====================
+ *
+ * In this file, we designate by "input" (and the "i" letter) the "recv"
+ * operations: incoming records from the peer, from which payload data
+ * is obtained, and must be extracted by the application (or the SSL
+ * handshake engine). Similarly, "output" (and the "o" letter) is for
+ * "send": payload data injected by the application (and SSL handshake
+ * engine), to be wrapped into records, that are then conveyed to the
+ * peer over the transport medium.
+ *
+ * The input and output buffers may be distinct or shared. When
+ * shared, input and output cannot occur concurrently; the caller
+ * must make sure that it never needs to output data while input
+ * data has been received. In practice, a shared buffer prevents
+ * pipelining of HTTP requests, or similar protocols; however, a
+ * shared buffer saves RAM.
+ *
+ * The input buffer is pointed to by 'ibuf' and has size 'ibuf_len';
+ * the output buffer is pointed to by 'obuf' and has size 'obuf_len'.
+ * From the size of these buffers is derived the maximum fragment
+ * length, which will be honoured upon sending records; regardless of
+ * that length, incoming records will be processed as long as they
+ * fit in the input buffer, and their length still complies with the
+ * protocol specification (maximum plaintext payload length is 16384
+ * bytes).
+ *
+ * Three registers are used to manage buffering in ibuf, called ixa,
+ * ixb and ixc. Similarly, three registers are used to manage buffering
+ * in obuf, called oxa, oxb and oxc.
+ *
+ *
+ * At any time, the engine is in one of the following modes:
+ * -- Failed mode: an error occurs, no I/O can happen.
+ * -- Input mode: the engine can either receive record bytes from the
+ * transport layer, or it has some buffered payload bytes to yield.
+ * -- Output mode: the engine can either receive payload bytes, or it
+ * has some record bytes to send to the transport layer.
+ * -- Input/Output mode: both input and output modes are active. When
+ * the buffer is shared, this can happen only when the buffer is empty
+ * (no buffered payload bytes or record bytes in either direction).
+ *
+ *
+ * Failed mode:
+ * ------------
+ *
+ * I/O failed for some reason (invalid received data, not enough room
+ * for the next record...). No I/O may ever occur again for this context,
+ * until an explicit reset is performed. This mode, and the error code,
+ * are also used for protocol errors, especially handshake errors.
+ *
+ *
+ * Input mode:
+ * -----------
+ *
+ *  ixa   index within ibuf[] for the currently read data
+ *  ixb   maximum index within ibuf[] for the currently read data
+ *  ixc   number of bytes not yet received for the current record
+ * 
+ * -- When ixa == ixb, there is no available data for readers. When
+ * ixa != ixb, there is available data and it starts at offset ixa.
+ *
+ * -- When waiting for the next record header, ixa and ixb are equal
+ * and contain a value ranging from 0 to 4; ixc is equal to 5-ixa.
+ *
+ * -- When the header has been received, record data is obtained. The
+ * ixc field records how many bytes are still needed to reach the
+ * end of the current record.
+ *
+ *    ** If encryption is active, then ixa and ixb are kept equal, and
+ *    point to the end of the currently received record bytes. When
+ *    ixc reaches 0, decryption/MAC is applied, and ixa and ixb are
+ *    adjusted.
+ *
+ *    ** If encryption is not active, then ixa and ixb are distinct
+ *    and data can be read right away. Additional record data is
+ *    obtained only when ixa == ixb.
+ *
+ * Note: in input mode and no encryption, records larger than the buffer
+ * size are allowed. When encryption is active, the complete record must
+ * fit within the buffer, since it cannot be decrypted/MACed until it
+ * has been completely received.
+ *
+ * -- When receiving the next record header, 'version_in' contains the
+ * expected input version (0 if not expecting a specific version); on
+ * mismatch, the mode switches to 'failed'.
+ *
+ * -- When the header has been received, 'version_in' contains the received
+ * version. It is up to the caller to check and adjust the 'version_in' field
+ * to implement the required semantics.
+ *
+ * -- The 'record_type_in' field is updated with the incoming record type
+ * when the next record header has been received.
+ *
+ *
+ * Output mode:
+ * ------------
+ *
+ *  oxa   index within obuf[] for the currently accumulated data
+ *  oxb   maximum index within obuf[] for record data
+ *  oxc   pointer for start of record data, and for record sending
+ *
+ * -- When oxa != oxb, more data can be accumulated into the current
+ * record; when oxa == oxb, a closed record is being sent.
+ *
+ * -- When accumulating data, oxc points to the start of the data.
+ *
+ * -- During record sending, oxa (and oxb) point to the next record byte
+ * to send, and oxc indicates the end of the current record.
+ *
+ * Note: sent records must fit within the buffer, since the header is
+ * adjusted only when the complete record has been assembled.
+ *
+ * -- The 'version_out' and 'record_type_out' fields are used to build the
+ * record header when the mode is switched to 'sending'.
+ *
+ *
+ * Modes:
+ * ------
+ *
+ * The state register iomode contains one of the following values:
+ *
+ *  BR_IO_FAILED   I/O failed
+ *  BR_IO_IN       input mode
+ *  BR_IO_OUT      output mode
+ *  BR_IO_INOUT    input/output mode
+ *
+ * Whether encryption is active on incoming records is indicated by the
+ * incrypt flag. For outgoing records, there is no such flag; "encryption"
+ * is always considered active, but initially uses functions that do not
+ * encrypt anything. The 'incrypt' flag is needed because when there is
+ * no active encryption, records larger than the I/O buffer are accepted.
+ *
+ * Note: we do not support no-encryption modes (MAC only).
+ *
+ * TODO: implement GCM support
+ *
+ *
+ * Misc:
+ * -----
+ *
+ * 'max_frag_len' is the maximum plaintext size for an outgoing record.
+ * By default, it is set to the maximum value that fits in the provided
+ * buffers, in the following list: 512, 1024, 2048, 4096, 16384. The
+ * caller may change it if needed, but the new value MUST still fit in
+ * the buffers, and it MUST be one of the list above for compatibility
+ * with the Maximum Fragment Length extension.
+ *
+ * For incoming records, only the total buffer length and current
+ * encryption mode impact the maximum length for incoming records. The
+ * 'max_frag_len' value is still adjusted so that records up to that
+ * length can be both received and sent.
+ *
+ *
+ * Offsets and lengths:
+ * --------------------
+ *
+ * When sending fragments with TLS-1.1+, the maximum overhead is:
+ *   5 bytes for the record header
+ *   16 bytes for the explicit IV
+ *   48 bytes for the MAC (HMAC/SHA-384)
+ *   16 bytes for the padding (AES)
+ * so a total of 85 extra bytes. Note that we support block cipher sizes
+ * up to 16 bytes (AES) and HMAC output sizes up to 48 bytes (SHA-384).
+ *
+ * With TLS-1.0 and CBC mode, we apply a 1/n-1 split, for a maximum
+ * overhead of:
+ *   5 bytes for the first record header
+ *   32 bytes for the first record payload (AES-CBC + HMAC/SHA-1)
+ *   5 bytes for the second record header
+ *   20 bytes for the MAC (HMAC/SHA-1)
+ *   16 bytes for the padding (AES)
+ *   -1 byte to account for the payload byte in the first record
+ * so a total of 77 extra bytes at most, less than the 85 bytes above.
+ * Note that with TLS-1.0, the MAC is HMAC with either MD5 or SHA-1, but
+ * no other hash function.
+ *
+ * The implementation does not try to send larger records when the current
+ * encryption mode has less overhead.
+ *
+ * Maximum input record overhead is:
+ *   5 bytes for the record header
+ *   16 bytes for the explicit IV (TLS-1.1+)
+ *   48 bytes for the MAC (HMAC/SHA-384)
+ *   256 bytes for the padding
+ * so a total of 325 extra bytes.
+ *
+ * When receiving the next record header, it is written into the buffer
+ * bytes 0 to 4 (inclusive). Record data is always written into buf[]
+ * starting at offset 5. When encryption is active, the plaintext data
+ * may start at a larger offset (e.g. because of an explicit IV).
+ */
+
+#define MAX_OUT_OVERHEAD    85
+#define MAX_IN_OVERHEAD    325
+
+/* see inner.h */
+void
+br_ssl_engine_fail(br_ssl_engine_context *rc, int err)
+{
+	if (rc->iomode != BR_IO_FAILED) {
+		rc->iomode = BR_IO_FAILED;
+		rc->err = err;
+	}
+}
+
+/*
+ * Adjust registers for a new incoming record.
+ */
+static void
+make_ready_in(br_ssl_engine_context *rc)
+{
+	rc->ixa = rc->ixb = 0;
+	rc->ixc = 5;
+	if (rc->iomode == BR_IO_IN) {
+		rc->iomode = BR_IO_INOUT;
+	}
+}
+
+/*
+ * Adjust registers for a new outgoing record.
+ */
+static void
+make_ready_out(br_ssl_engine_context *rc)
+{
+	size_t a, b;
+
+	a = 5;
+	b = rc->obuf_len - a;
+	rc->out.vtable->max_plaintext(&rc->out.vtable, &a, &b);
+	if ((b - a) > rc->max_frag_len) {
+		b = a + rc->max_frag_len;
+	}
+	rc->oxa = a;
+	rc->oxb = b;
+	rc->oxc = a;
+	if (rc->iomode == BR_IO_OUT) {
+		rc->iomode = BR_IO_INOUT;
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_new_max_frag_len(br_ssl_engine_context *rc, unsigned max_frag_len)
+{
+	size_t nxb;
+
+	rc->max_frag_len = max_frag_len;
+	nxb = rc->oxc + max_frag_len;
+	if (rc->oxa < rc->oxb && rc->oxb > nxb && rc->oxa < nxb) {
+		rc->oxb = nxb;
+	}
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_buffer(br_ssl_engine_context *rc,
+	void *buf, size_t buf_len, int bidi)
+{
+	if (buf == NULL) {
+		br_ssl_engine_set_buffers_bidi(rc, NULL, 0, NULL, 0);
+	} else {
+		/*
+		 * In bidirectional mode, we want to maximise input
+		 * buffer size, since we support arbitrary fragmentation
+		 * when sending, but the peer will not necessarily
+		 * comply to any low fragment length (in particular if
+		 * we are the server, because the maximum fragment
+		 * length extension is under client control).
+		 *
+		 * We keep a minimum size of 512 bytes for the plaintext
+		 * of our outgoing records.
+		 *
+		 * br_ssl_engine_set_buffers_bidi() will compute the maximum
+		 * fragment length for outgoing records by using the minimum
+		 * of allocated spaces for both input and output records,
+		 * rounded down to a standard length.
+		 */
+		if (bidi) {
+			size_t w;
+
+			if (buf_len < (512 + MAX_IN_OVERHEAD
+				+ 512 + MAX_OUT_OVERHEAD))
+			{
+				rc->iomode = BR_IO_FAILED;
+				rc->err = BR_ERR_BAD_PARAM;
+				return;
+			} else if (buf_len < (16384 + MAX_IN_OVERHEAD
+				+ 512 + MAX_OUT_OVERHEAD))
+			{
+				w = 512 + MAX_OUT_OVERHEAD;
+			} else {
+				w = buf_len - (16384 + MAX_IN_OVERHEAD);
+			}
+			br_ssl_engine_set_buffers_bidi(rc,
+				buf, buf_len - w,
+				(unsigned char *)buf + w, w);
+		} else {
+			br_ssl_engine_set_buffers_bidi(rc,
+				buf, buf_len, NULL, 0);
+		}
+	}
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_buffers_bidi(br_ssl_engine_context *rc,
+	void *ibuf, size_t ibuf_len, void *obuf, size_t obuf_len)
+{
+	rc->iomode = BR_IO_INOUT;
+	rc->incrypt = 0;
+	rc->err = BR_ERR_OK;
+	rc->version_in = 0;
+	rc->record_type_in = 0;
+	rc->version_out = 0;
+	rc->record_type_out = 0;
+	if (ibuf == NULL) {
+		if (rc->ibuf == NULL) {
+			br_ssl_engine_fail(rc, BR_ERR_BAD_PARAM);
+		}
+	} else {
+		unsigned u;
+
+		rc->ibuf = ibuf;
+		rc->ibuf_len = ibuf_len;
+		if (obuf == NULL) {
+			obuf = ibuf;
+			obuf_len = ibuf_len;
+		}
+		rc->obuf = obuf;
+		rc->obuf_len = obuf_len;
+
+		/*
+		 * Compute the maximum fragment length, that fits for
+		 * both incoming and outgoing records. This length will
+		 * be used in fragment length negotiation, so we must
+		 * honour it both ways. Regardless, larger incoming
+		 * records will be accepted, as long as they fit in the
+		 * actual buffer size.
+		 */
+		for (u = 14; u >= 9; u --) {
+			size_t flen;
+
+			flen = (size_t)1 << u;
+			if (obuf_len >= flen + MAX_OUT_OVERHEAD
+				&& ibuf_len >= flen + MAX_IN_OVERHEAD)
+			{
+				break;
+			}
+		}
+		if (u == 8) {
+			br_ssl_engine_fail(rc, BR_ERR_BAD_PARAM);
+			return;
+		} else if (u == 13) {
+			u = 12;
+		}
+		rc->max_frag_len = (size_t)1 << u;
+		rc->log_max_frag_len = u;
+		rc->peer_log_max_frag_len = 0;
+	}
+	rc->out.vtable = &br_sslrec_out_clear_vtable;
+	make_ready_in(rc);
+	make_ready_out(rc);
+}
+
+/*
+ * Clear buffers in both directions.
+ */
+static void
+engine_clearbuf(br_ssl_engine_context *rc)
+{
+	make_ready_in(rc);
+	make_ready_out(rc);
+}
+
+/*
+ * Make sure the internal PRNG is initialised (but not necessarily
+ * seeded properly yet).
+ */
+static int
+rng_init(br_ssl_engine_context *cc)
+{
+	const br_hash_class *h;
+
+	if (cc->rng_init_done != 0) {
+		return 1;
+	}
+
+	/*
+	 * If using TLS-1.2, then SHA-256 or SHA-384 must be present (or
+	 * both); we prefer SHA-256 which is faster for 32-bit systems.
+	 *
+	 * If using TLS-1.0 or 1.1 then SHA-1 must be present.
+	 *
+	 * Though HMAC_DRBG/SHA-1 is, as far as we know, as safe as
+	 * these things can be, we still prefer the SHA-2 functions over
+	 * SHA-1, if only for public relations (known theoretical
+	 * weaknesses of SHA-1 with regards to collisions are mostly
+	 * irrelevant here, but they still make people nervous).
+	 */
+	h = br_multihash_getimpl(&cc->mhash, br_sha256_ID);
+	if (!h) {
+		h = br_multihash_getimpl(&cc->mhash, br_sha384_ID);
+		if (!h) {
+			h = br_multihash_getimpl(&cc->mhash,
+				br_sha1_ID);
+			if (!h) {
+				br_ssl_engine_fail(cc, BR_ERR_BAD_STATE);
+				return 0;
+			}
+		}
+	}
+	br_hmac_drbg_init(&cc->rng, h, NULL, 0);
+	cc->rng_init_done = 1;
+	return 1;
+}
+
+/* see inner.h */
+int
+br_ssl_engine_init_rand(br_ssl_engine_context *cc)
+{
+	if (!rng_init(cc)) {
+		return 0;
+	}
+
+	/*
+	 * We always try OS/hardware seeding once. If it works, then
+	 * we assume proper seeding. If not, then external entropy must
+	 * have been injected; otherwise, we report an error.
+	 */
+	if (!cc->rng_os_rand_done) {
+		br_prng_seeder sd;
+
+		sd = br_prng_seeder_system(NULL);
+		if (sd != 0 && sd(&cc->rng.vtable)) {
+			cc->rng_init_done = 2;
+		}
+		cc->rng_os_rand_done = 1;
+	}
+	if (cc->rng_init_done < 2) {
+		br_ssl_engine_fail(cc, BR_ERR_NO_RANDOM);
+		return 0;
+	}
+	return 1;
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_inject_entropy(br_ssl_engine_context *cc,
+	const void *data, size_t len)
+{
+	/*
+	 * Externally provided entropy is assumed to be "good enough"
+	 * (we cannot really test its quality) so if the RNG structure
+	 * could be initialised at all, then we marked the RNG as
+	 * "properly seeded".
+	 */
+	if (!rng_init(cc)) {
+		return;
+	}
+	br_hmac_drbg_update(&cc->rng, data, len);
+	cc->rng_init_done = 2;
+}
+
+/*
+ * We define a few internal functions that implement the low-level engine
+ * API for I/O; the external API (br_ssl_engine_sendapp_buf() and similar
+ * functions) is built upon these function, with special processing for
+ * records which are not of type "application data".
+ *
+ *   recvrec_buf, recvrec_ack     receives bytes from transport medium
+ *   sendrec_buf, sendrec_ack     send bytes to transport medium
+ *   recvpld_buf, recvpld_ack     receives payload data from engine
+ *   sendpld_buf, sendpld_ack     send payload data to engine
+ */
+
+static unsigned char *
+recvrec_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	if (rc->shutdown_recv) {
+		*len = 0;
+		return NULL;
+	}
+
+	/*
+	 * Bytes from the transport can be injected only if the mode is
+	 * compatible (in or in/out), and ixa == ixb; ixc then contains
+	 * the number of bytes that are still expected (but it may
+	 * exceed our buffer size).
+	 *
+	 * We cannot get "stuck" here (buffer is full, but still more
+	 * data is expected) because oversized records are detected when
+	 * their header is processed.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_IN:
+	case BR_IO_INOUT:
+		if (rc->ixa == rc->ixb) {
+			size_t z;
+
+			z = rc->ixc;
+			if (z > rc->ibuf_len - rc->ixa) {
+				z = rc->ibuf_len - rc->ixa;
+			}
+			*len = z;
+			return rc->ibuf + rc->ixa;
+		}
+		break;
+	}
+	*len = 0;
+	return NULL;
+}
+
+static void
+recvrec_ack(br_ssl_engine_context *rc, size_t len)
+{
+	unsigned char *pbuf;
+	size_t pbuf_len;
+
+	/*
+	 * Adjust state if necessary (for a shared input/output buffer):
+	 * we got some incoming bytes, so we cannot (temporarily) handle
+	 * outgoing data.
+	 */
+	if (rc->iomode == BR_IO_INOUT && rc->ibuf == rc->obuf) {
+		rc->iomode = BR_IO_IN;
+	}
+
+	/*
+	 * Adjust data pointers.
+	 */
+	rc->ixb = (rc->ixa += len);
+	rc->ixc -= len;
+
+	/*
+	 * If we are receiving a header and did not fully obtained it
+	 * yet, then just wait for the next bytes.
+	 */
+	if (rc->ixa < 5) {
+		return;
+	}
+
+	/*
+	 * If we just obtained a full header, process it.
+	 */
+	if (rc->ixa == 5) {
+		unsigned version;
+		unsigned rlen;
+
+		/*
+		 * Get record type and version. We support only versions
+		 * 3.x (if the version major number does not match, then
+		 * we suppose that the record format is too alien for us
+		 * to process it).
+		 *
+		 * Note: right now, we reject clients that try to send
+		 * a ClientHello in a format compatible with SSL-2.0. It
+		 * is unclear whether this will ever be supported; and
+		 * if we want to support it, then this might be done in
+		 * in the server-specific code, not here.
+		 */
+		rc->record_type_in = rc->ibuf[0];
+		version = br_dec16be(rc->ibuf + 1);
+		if ((version >> 8) != 3) {
+			br_ssl_engine_fail(rc, BR_ERR_UNSUPPORTED_VERSION);
+			return;
+		}
+
+		/*
+		 * We ensure that successive records have the same
+		 * version. The handshake code must check and adjust the
+		 * variables when necessary to accommodate the protocol
+		 * negotiation details.
+		 */
+		if (rc->version_in != 0 && rc->version_in != version) {
+			br_ssl_engine_fail(rc, BR_ERR_BAD_VERSION);
+			return;
+		}
+		rc->version_in = version;
+
+		/*
+		 * Decode record length. We must check that the length
+		 * is valid (relatively to the current encryption mode)
+		 * and also (if encryption is active) that the record
+		 * will fit in our buffer.
+		 *
+		 * When no encryption is active, we can process records
+		 * by chunks, and thus accept any record up to the
+		 * maximum allowed plaintext length (16384 bytes).
+		 */
+		rlen = br_dec16be(rc->ibuf + 3);
+		if (rc->incrypt) {
+			if (!rc->in.vtable->check_length(
+				&rc->in.vtable, rlen))
+			{
+				br_ssl_engine_fail(rc, BR_ERR_BAD_LENGTH);
+				return;
+			}
+			if (rlen > (rc->ibuf_len - 5)) {
+				br_ssl_engine_fail(rc, BR_ERR_TOO_LARGE);
+				return;
+			}
+		} else {
+			if (rlen > 16384) {
+				br_ssl_engine_fail(rc, BR_ERR_BAD_LENGTH);
+				return;
+			}
+		}
+
+		/*
+		 * If the record is completely empty then we must switch
+		 * to a new record. Note that, in that case, we
+		 * completely ignore the record type, which is fitting
+		 * since we received no actual data of that type.
+		 *
+		 * A completely empty record is technically allowed as
+		 * long as encryption/MAC is not active, i.e. before
+		 * completion of the first handshake. It it still weird;
+		 * it might conceptually be useful as a heartbeat or
+		 * keep-alive mechanism while some lengthy operation is
+		 * going on, e.g. interaction with a human user.
+		 */
+		if (rlen == 0) {
+			make_ready_in(rc);
+		} else {
+			rc->ixa = rc->ixb = 5;
+			rc->ixc = rlen;
+		}
+		return;
+	}
+
+	/*
+	 * If there is no active encryption, then the data can be read
+	 * right away. Note that we do not receive bytes from the
+	 * transport medium when we still have payload bytes to be
+	 * acknowledged.
+	 */
+	if (!rc->incrypt) {
+		rc->ixa = 5;
+		return;
+	}
+
+	/*
+	 * Since encryption is active, we must wait for a full record
+	 * before processing it.
+	 */
+	if (rc->ixc != 0) {
+		return;
+	}
+
+	/*
+	 * We got the full record. Decrypt it.
+	 */
+	pbuf_len = rc->ixa - 5;
+	pbuf = rc->in.vtable->decrypt(&rc->in.vtable,
+		rc->record_type_in, rc->version_in, rc->ibuf + 5, &pbuf_len);
+	if (pbuf == 0) {
+		br_ssl_engine_fail(rc, BR_ERR_BAD_MAC);
+		return;
+	}
+	rc->ixa = (size_t)(pbuf - rc->ibuf);
+	rc->ixb = rc->ixa + pbuf_len;
+
+	/*
+	 * Decryption may have yielded an empty record, in which case
+	 * we get back to "ready" state immediately.
+	 */
+	if (rc->ixa == rc->ixb) {
+		make_ready_in(rc);
+	}
+}
+
+/* see inner.h */
+int
+br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc)
+{
+	switch (rc->iomode) {
+	case BR_IO_IN:
+	case BR_IO_INOUT:
+		return rc->ixc == 0 || rc->ixa < 5;
+	default:
+		return 1;
+	}
+}
+
+static unsigned char *
+recvpld_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	/*
+	 * There is payload data to be read only if the mode is
+	 * compatible, and ixa != ixb.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_IN:
+	case BR_IO_INOUT:
+		*len = rc->ixb - rc->ixa;
+		return (*len == 0) ? NULL : (rc->ibuf + rc->ixa);
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
+
+static void
+recvpld_ack(br_ssl_engine_context *rc, size_t len)
+{
+	rc->ixa += len;
+
+	/*
+	 * If we read all the available data, then we either expect
+	 * the remainder of the current record (if the current record
+	 * was not finished; this may happen when encryption is not
+	 * active), or go to "ready" state.
+	 */
+	if (rc->ixa == rc->ixb) {
+		if (rc->ixc == 0) {
+			make_ready_in(rc);
+		} else {
+			rc->ixa = rc->ixb = 5;
+		}
+	}
+}
+
+static unsigned char *
+sendpld_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	/*
+	 * Payload data can be injected only if the current mode is
+	 * compatible, and oxa != oxb.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_OUT:
+	case BR_IO_INOUT:
+		*len = rc->oxb - rc->oxa;
+		return (*len == 0) ? NULL : (rc->obuf + rc->oxa);
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
+
+/*
+ * If some payload bytes have been accumulated, then wrap them into
+ * an outgoing record. Otherwise, this function does nothing, unless
+ * 'force' is non-zero, in which case an empty record is assembled.
+ *
+ * The caller must take care not to invoke this function if the engine
+ * is not currently ready to receive payload bytes to send.
+ */
+static void
+sendpld_flush(br_ssl_engine_context *rc, int force)
+{
+	size_t xlen;
+	unsigned char *buf;
+
+	if (rc->oxa == rc->oxb) {
+		return;
+	}
+	xlen = rc->oxa - rc->oxc;
+	if (xlen == 0 && !force) {
+		return;
+	}
+	buf = rc->out.vtable->encrypt(&rc->out.vtable,
+		rc->record_type_out, rc->version_out,
+		rc->obuf + rc->oxc, &xlen);
+	rc->oxb = rc->oxa = (size_t)(buf - rc->obuf);
+	rc->oxc = rc->oxa + xlen;
+}
+
+static void
+sendpld_ack(br_ssl_engine_context *rc, size_t len)
+{
+	/*
+	 * If using a shared buffer, then we may have to modify the
+	 * current mode.
+	 */
+	if (rc->iomode == BR_IO_INOUT && rc->ibuf == rc->obuf) {
+		rc->iomode = BR_IO_OUT;
+	}
+	rc->oxa += len;
+	if (rc->oxa >= rc->oxb) {
+		/*
+		 * Set oxb to one more than oxa so that sendpld_flush()
+		 * does not mistakingly believe that a record is
+		 * already prepared and being sent.
+		 */
+		rc->oxb = rc->oxa + 1;
+		sendpld_flush(rc, 0);
+	}
+}
+
+static unsigned char *
+sendrec_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	/*
+	 * When still gathering payload bytes, oxc points to the start
+	 * of the record data, so oxc <= oxa. However, when a full
+	 * record has been completed, oxc points to the end of the record,
+	 * so oxc > oxa.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_OUT:
+	case BR_IO_INOUT:
+		if (rc->oxc > rc->oxa) {
+			*len = rc->oxc - rc->oxa;
+			return rc->obuf + rc->oxa;
+		}
+		break;
+	}
+	*len = 0;
+	return NULL;
+}
+
+static void
+sendrec_ack(br_ssl_engine_context *rc, size_t len)
+{
+	rc->oxb = (rc->oxa += len);
+	if (rc->oxa == rc->oxc) {
+		make_ready_out(rc);
+	}
+}
+
+/*
+ * Test whether there is some buffered outgoing record that still must
+ * sent.
+ */
+static inline int
+has_rec_tosend(const br_ssl_engine_context *rc)
+{
+	return rc->oxa == rc->oxb && rc->oxa != rc->oxc;
+}
+
+/*
+ * The "no encryption" mode has no overhead. It limits the payload size
+ * to the maximum size allowed by the standard (16384 bytes); the caller
+ * is responsible for possibly enforcing a smaller fragment length.
+ */
+static void
+clear_max_plaintext(const br_sslrec_out_clear_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	(void)cc;
+	len = *end - *start;
+	if (len > 16384) {
+		*end = *start + 16384;
+	}
+}
+
+/*
+ * In "no encryption" mode, encryption is trivial (a no-operation) so
+ * we just have to encode the header.
+ */
+static unsigned char *
+clear_encrypt(br_sslrec_out_clear_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+
+	(void)cc;
+	buf = (unsigned char *)data - 5;
+	buf[0] = record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, *data_len);
+	*data_len += 5;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_class br_sslrec_out_clear_vtable = {
+	sizeof(br_sslrec_out_clear_context),
+	(void (*)(const br_sslrec_out_class *const *, size_t *, size_t *))
+		&clear_max_plaintext,
+	(unsigned char *(*)(const br_sslrec_out_class **,
+		int, unsigned, void *, size_t *))
+		&clear_encrypt
+};
+
+/* ==================================================================== */
+/*
+ * In this part of the file, we handle the various record types, and
+ * communications with the handshake processor.
+ */
+
+/*
+ * IMPLEMENTATION NOTES
+ * ====================
+ *
+ * The handshake processor is written in T0 and runs as a coroutine.
+ * It receives the contents of all records except application data, and
+ * is responsible for producing the contents of all records except
+ * application data.
+ *
+ * A state flag is maintained, which specifies whether application data
+ * is acceptable or not. When it is set:
+ *
+ * -- Application data can be injected as payload data (provided that
+ *    the output buffer is ready for that).
+ *
+ * -- Incoming application data records are accepted, and yield data
+ *    that the caller may retrieve.
+ *
+ * When the flag is cleared, application data is not accepted from the
+ * application, and incoming application data records trigger an error.
+ *
+ *
+ * Records of type handshake, alert or change-cipher-spec are handled
+ * by the handshake processor. The handshake processor is written in T0
+ * and runs as a coroutine; it gets invoked whenever one of the following
+ * situations is reached:
+ *
+ * -- An incoming record has type handshake, alert or change-cipher-spec,
+ *    and yields data that can be read (zero-length records are thus
+ *    ignored).
+ *
+ * -- An outgoing record has just finished being sent, and the "application
+ *    data" flag is cleared.
+ *
+ * -- The caller wishes to perform a close (call to br_ssl_engine_close()).
+ *
+ * -- The caller wishes to perform a renegotiation (call to
+ *    br_ssl_engine_renegotiate()).
+ *
+ * Whenever the handshake processor is entered, access to the payload
+ * buffers is provided, along with some information about explicit
+ * closures or renegotiations.
+ */
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_suites(br_ssl_engine_context *cc,
+	const uint16_t *suites, size_t suites_num)
+{
+	if ((suites_num * sizeof *suites) > sizeof cc->suites_buf) {
+		br_ssl_engine_fail(cc, BR_ERR_BAD_PARAM);
+		return;
+	}
+	memcpy(cc->suites_buf, suites, suites_num * sizeof *suites);
+	cc->suites_num = suites_num;
+}
+
+/*
+ * Give control to handshake processor. 'action' is 1 for a close,
+ * 2 for a renegotiation, or 0 for a jump due to I/O completion.
+ */
+static void
+jump_handshake(br_ssl_engine_context *cc, int action)
+{
+	/*
+	 * We use a loop because the handshake processor actions may
+	 * allow for more actions; namely, if the processor reads all
+	 * input data, then it may allow for output data to be produced,
+	 * in case of a shared in/out buffer.
+	 */
+	for (;;) {
+		size_t hlen_in, hlen_out;
+
+		/*
+		 * Get input buffer. We do not want to provide
+		 * application data to the handshake processor (we could
+		 * get called with an explicit close or renegotiation
+		 * while there is application data ready to be read).
+		 */
+		cc->hbuf_in = recvpld_buf(cc, &hlen_in);
+		if (cc->hbuf_in != NULL
+			&& cc->record_type_in == BR_SSL_APPLICATION_DATA)
+		{
+			hlen_in = 0;
+		}
+
+		/*
+		 * Get output buffer. The handshake processor never
+		 * leaves an unfinished outgoing record, so if there is
+		 * buffered output, then it MUST be some application
+		 * data, so the processor cannot write to it.
+		 */
+		cc->saved_hbuf_out = cc->hbuf_out = sendpld_buf(cc, &hlen_out);
+		if (cc->hbuf_out != NULL && br_ssl_engine_has_pld_to_send(cc)) {
+			hlen_out = 0;
+		}
+
+		/*
+		 * Note: hlen_in and hlen_out can be both non-zero only if
+		 * the input and output buffers are disjoint. Thus, we can
+		 * offer both buffers to the handshake code.
+		 */
+
+		cc->hlen_in = hlen_in;
+		cc->hlen_out = hlen_out;
+		cc->action = action;
+		cc->hsrun(&cc->cpu);
+		if (br_ssl_engine_closed(cc)) {
+			return;
+		}
+		if (cc->hbuf_out != cc->saved_hbuf_out) {
+			sendpld_ack(cc, cc->hbuf_out - cc->saved_hbuf_out);
+		}
+		if (hlen_in != cc->hlen_in) {
+			recvpld_ack(cc, hlen_in - cc->hlen_in);
+			if (cc->hlen_in == 0) {
+				/*
+				 * We read all data bytes, which may have
+				 * released the output buffer in case it
+				 * is shared with the input buffer, and
+				 * the handshake code might be waiting for
+				 * that.
+				 */
+				action = 0;
+				continue;
+			}
+		}
+		break;
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_flush_record(br_ssl_engine_context *cc)
+{
+	if (cc->hbuf_out != cc->saved_hbuf_out) {
+		sendpld_ack(cc, cc->hbuf_out - cc->saved_hbuf_out);
+	}
+	if (br_ssl_engine_has_pld_to_send(cc)) {
+		sendpld_flush(cc, 0);
+	}
+	cc->saved_hbuf_out = cc->hbuf_out = sendpld_buf(cc, &cc->hlen_out);
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_sendapp_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	if (!(cc->application_data & 1)) {
+		*len = 0;
+		return NULL;
+	}
+	return sendpld_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_sendapp_ack(br_ssl_engine_context *cc, size_t len)
+{
+	sendpld_ack(cc, len);
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_recvapp_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	if (!(cc->application_data & 1)
+		|| cc->record_type_in != BR_SSL_APPLICATION_DATA)
+	{
+		*len = 0;
+		return NULL;
+	}
+	return recvpld_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_recvapp_ack(br_ssl_engine_context *cc, size_t len)
+{
+	recvpld_ack(cc, len);
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_sendrec_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	return sendrec_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_sendrec_ack(br_ssl_engine_context *cc, size_t len)
+{
+	sendrec_ack(cc, len);
+	if (len != 0 && !has_rec_tosend(cc)
+		&& (cc->record_type_out != BR_SSL_APPLICATION_DATA
+		|| (cc->application_data & 1) == 0))
+	{
+		jump_handshake(cc, 0);
+	}
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_recvrec_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	return recvrec_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_recvrec_ack(br_ssl_engine_context *cc, size_t len)
+{
+	unsigned char *buf;
+
+	recvrec_ack(cc, len);
+	if (br_ssl_engine_closed(cc)) {
+		return;
+	}
+
+	/*
+	 * We just received some bytes from the peer. This may have
+	 * yielded some payload bytes, in which case we must process
+	 * them according to the record type.
+	 */
+	buf = recvpld_buf(cc, &len);
+	if (buf != NULL) {
+		switch (cc->record_type_in) {
+		case BR_SSL_CHANGE_CIPHER_SPEC:
+		case BR_SSL_ALERT:
+		case BR_SSL_HANDSHAKE:
+			jump_handshake(cc, 0);
+			break;
+		case BR_SSL_APPLICATION_DATA:
+			if (cc->application_data == 1) {
+				break;
+			}
+
+			/*
+			 * If we are currently closing, and waiting for
+			 * a close_notify from the peer, then incoming
+			 * application data should be discarded.
+			 */
+			if (cc->application_data == 2) {
+				recvpld_ack(cc, len);
+				break;
+			}
+
+			/* Fall through */
+		default:
+			br_ssl_engine_fail(cc, BR_ERR_UNEXPECTED);
+			break;
+		}
+	}
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_close(br_ssl_engine_context *cc)
+{
+	if (!br_ssl_engine_closed(cc)) {
+		/*
+		 * If we are not already closed, then we need to
+		 * initiate the closure. Once closing, any incoming
+		 * application data is discarded; we should also discard
+		 * application data which is already there but has not
+		 * been acknowledged by the application yet (this mimics
+		 * usual semantics on BSD sockets: you cannot read()
+		 * once you called close(), even if there was some
+		 * unread data already buffered).
+		 */
+		size_t len;
+
+		if (br_ssl_engine_recvapp_buf(cc, &len) != NULL && len != 0) {
+			br_ssl_engine_recvapp_ack(cc, len);
+		}
+		jump_handshake(cc, 1);
+	}
+}
+
+/* see bearssl_ssl.h */
+int
+br_ssl_engine_renegotiate(br_ssl_engine_context *cc)
+{
+	size_t len;
+
+	if (br_ssl_engine_closed(cc) || cc->reneg == 1
+		|| (cc->flags & BR_OPT_NO_RENEGOTIATION) != 0
+		|| br_ssl_engine_recvapp_buf(cc, &len) != NULL)
+	{
+		return 0;
+	}
+	jump_handshake(cc, 2);
+	return 1;
+}
+
+/* see bearssl.h */
+unsigned
+br_ssl_engine_current_state(const br_ssl_engine_context *cc)
+{
+	unsigned s;
+	size_t len;
+
+	if (br_ssl_engine_closed(cc)) {
+		return BR_SSL_CLOSED;
+	}
+
+	s = 0;
+	if (br_ssl_engine_sendrec_buf(cc, &len) != NULL) {
+		s |= BR_SSL_SENDREC;
+	}
+	if (br_ssl_engine_recvrec_buf(cc, &len) != NULL) {
+		s |= BR_SSL_RECVREC;
+	}
+	if (br_ssl_engine_sendapp_buf(cc, &len) != NULL) {
+		s |= BR_SSL_SENDAPP;
+	}
+	if (br_ssl_engine_recvapp_buf(cc, &len) != NULL) {
+		s |= BR_SSL_RECVAPP;
+	}
+	return s;
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_flush(br_ssl_engine_context *cc, int force)
+{
+	if (!br_ssl_engine_closed(cc) && (cc->application_data & 1) != 0) {
+		sendpld_flush(cc, force);
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_hs_reset(br_ssl_engine_context *cc,
+	void (*hsinit)(void *), void (*hsrun)(void *))
+{
+	engine_clearbuf(cc);
+	cc->cpu.dp = cc->dp_stack;
+	cc->cpu.rp = cc->rp_stack;
+	hsinit(&cc->cpu);
+	cc->hsrun = hsrun;
+	cc->shutdown_recv = 0;
+	cc->application_data = 0;
+	cc->alert = 0;
+	jump_handshake(cc, 0);
+}
+
+/* see inner.h */
+br_tls_prf_impl
+br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id)
+{
+	if (cc->session.version >= BR_TLS12) {
+		if (prf_id == br_sha384_ID) {
+			return cc->prf_sha384;
+		} else {
+			return cc->prf_sha256;
+		}
+	} else {
+		return cc->prf10;
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_compute_master(br_ssl_engine_context *cc,
+	int prf_id, const void *pms, size_t pms_len)
+{
+	br_tls_prf_impl iprf;
+	br_tls_prf_seed_chunk seed[2] = {
+		{ cc->client_random, sizeof cc->client_random },
+		{ cc->server_random, sizeof cc->server_random }
+	};
+
+	iprf = br_ssl_engine_get_PRF(cc, prf_id);
+	iprf(cc->session.master_secret, sizeof cc->session.master_secret,
+		pms, pms_len, "master secret", 2, seed);
+}
+
+/*
+ * Compute key block.
+ */
+static void
+compute_key_block(br_ssl_engine_context *cc, int prf_id,
+	size_t half_len, unsigned char *kb)
+{
+	br_tls_prf_impl iprf;
+	br_tls_prf_seed_chunk seed[2] = {
+		{ cc->server_random, sizeof cc->server_random },
+		{ cc->client_random, sizeof cc->client_random }
+	};
+
+	iprf = br_ssl_engine_get_PRF(cc, prf_id);
+	iprf(kb, half_len << 1,
+		cc->session.master_secret, sizeof cc->session.master_secret,
+		"key expansion", 2, seed);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcdec_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[192];
+	unsigned char *cipher_key, *mac_key, *iv;
+	const br_hash_class *imh;
+	size_t mac_key_len, mac_out_len, iv_len;
+
+	imh = br_ssl_engine_get_hash(cc, mac_id);
+	mac_out_len = (imh->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+	mac_key_len = mac_out_len;
+
+	/*
+	 * TLS 1.1+ uses per-record explicit IV, so no IV to generate here.
+	 */
+	if (cc->session.version >= BR_TLS11) {
+		iv_len = 0;
+	} else {
+		iv_len = bc_impl->block_size;
+	}
+	compute_key_block(cc, prf_id,
+		mac_key_len + cipher_key_len + iv_len, kb);
+	if (is_client) {
+		mac_key = &kb[mac_key_len];
+		cipher_key = &kb[(mac_key_len << 1) + cipher_key_len];
+		iv = &kb[((mac_key_len + cipher_key_len) << 1) + iv_len];
+	} else {
+		mac_key = &kb[0];
+		cipher_key = &kb[mac_key_len << 1];
+		iv = &kb[(mac_key_len + cipher_key_len) << 1];
+	}
+	if (iv_len == 0) {
+		iv = NULL;
+	}
+	cc->icbc_in->init(&cc->in.cbc.vtable,
+		bc_impl, cipher_key, cipher_key_len,
+		imh, mac_key, mac_key_len, mac_out_len, iv);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcenc_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[192];
+	unsigned char *cipher_key, *mac_key, *iv;
+	const br_hash_class *imh;
+	size_t mac_key_len, mac_out_len, iv_len;
+
+	imh = br_ssl_engine_get_hash(cc, mac_id);
+	mac_out_len = (imh->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+	mac_key_len = mac_out_len;
+
+	/*
+	 * TLS 1.1+ uses per-record explicit IV, so no IV to generate here.
+	 */
+	if (cc->session.version >= BR_TLS11) {
+		iv_len = 0;
+	} else {
+		iv_len = bc_impl->block_size;
+	}
+	compute_key_block(cc, prf_id,
+		mac_key_len + cipher_key_len + iv_len, kb);
+	if (is_client) {
+		mac_key = &kb[0];
+		cipher_key = &kb[mac_key_len << 1];
+		iv = &kb[(mac_key_len + cipher_key_len) << 1];
+	} else {
+		mac_key = &kb[mac_key_len];
+		cipher_key = &kb[(mac_key_len << 1) + cipher_key_len];
+		iv = &kb[((mac_key_len + cipher_key_len) << 1) + iv_len];
+	}
+	if (iv_len == 0) {
+		iv = NULL;
+	}
+	cc->icbc_out->init(&cc->out.cbc.vtable,
+		bc_impl, cipher_key, cipher_key_len,
+		imh, mac_key, mac_key_len, mac_out_len, iv);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	} else {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	}
+	cc->igcm_in->init(&cc->in.gcm.vtable.in,
+		bc_impl, cipher_key, cipher_key_len, cc->ighash, iv);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	} else {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	}
+	cc->igcm_out->init(&cc->out.gcm.vtable.out,
+		bc_impl, cipher_key, cipher_key_len, cc->ighash, iv);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id)
+{
+	unsigned char kb[88];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, 44, kb);
+	if (is_client) {
+		cipher_key = &kb[32];
+		iv = &kb[76];
+	} else {
+		cipher_key = &kb[0];
+		iv = &kb[64];
+	}
+	cc->ichapol_in->init(&cc->in.chapol.vtable.in,
+		cc->ichacha, cc->ipoly, cipher_key, iv);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id)
+{
+	unsigned char kb[88];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, 44, kb);
+	if (is_client) {
+		cipher_key = &kb[0];
+		iv = &kb[64];
+	} else {
+		cipher_key = &kb[32];
+		iv = &kb[76];
+	}
+	cc->ichapol_out->init(&cc->out.chapol.vtable.out,
+		cc->ichacha, cc->ipoly, cipher_key, iv);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	} else {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	}
+	cc->iccm_in->init(&cc->in.ccm.vtable.in,
+		bc_impl, cipher_key, cipher_key_len, iv, tag_len);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	} else {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	}
+	cc->iccm_out->init(&cc->out.ccm.vtable.out,
+		bc_impl, cipher_key, cipher_key_len, iv, tag_len);
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_aescbc.c b/third_party/bearssl/src/ssl_engine_default_aescbc.c
new file mode 100644
index 0000000..8c5cdb5
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_aescbc.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
+{
+#if BR_AES_X86NI || BR_POWER8
+	const br_block_cbcenc_class *ienc;
+	const br_block_cbcdec_class *idec;
+#endif
+
+	br_ssl_engine_set_cbc(cc,
+		&br_sslrec_in_cbc_vtable,
+		&br_sslrec_out_cbc_vtable);
+#if BR_AES_X86NI
+	ienc = br_aes_x86ni_cbcenc_get_vtable();
+	idec = br_aes_x86ni_cbcdec_get_vtable();
+	if (ienc != NULL && idec != NULL) {
+		br_ssl_engine_set_aes_cbc(cc, ienc, idec);
+		return;
+	}
+#endif
+#if BR_POWER8
+	ienc = br_aes_pwr8_cbcenc_get_vtable();
+	idec = br_aes_pwr8_cbcdec_get_vtable();
+	if (ienc != NULL && idec != NULL) {
+		br_ssl_engine_set_aes_cbc(cc, ienc, idec);
+		return;
+	}
+#endif
+#if BR_64
+	br_ssl_engine_set_aes_cbc(cc,
+		&br_aes_ct64_cbcenc_vtable,
+		&br_aes_ct64_cbcdec_vtable);
+#else
+	br_ssl_engine_set_aes_cbc(cc,
+		&br_aes_ct_cbcenc_vtable,
+		&br_aes_ct_cbcdec_vtable);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_aesccm.c b/third_party/bearssl/src/ssl_engine_default_aesccm.c
new file mode 100644
index 0000000..15c0a78
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_aesccm.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
+{
+#if BR_AES_X86NI || BR_POWER8
+	const br_block_ctrcbc_class *ictrcbc;
+#endif
+
+	br_ssl_engine_set_ccm(cc,
+		&br_sslrec_in_ccm_vtable,
+		&br_sslrec_out_ccm_vtable);
+#if BR_AES_X86NI
+	ictrcbc = br_aes_x86ni_ctrcbc_get_vtable();
+	if (ictrcbc != NULL) {
+		br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
+#else
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
+#endif
+	}
+#elif BR_POWER8
+	ictrcbc = br_aes_pwr8_ctrcbc_get_vtable();
+	if (ictrcbc != NULL) {
+		br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
+#else
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
+#endif
+	}
+#else
+#if BR_64
+	br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
+#else
+	br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
+#endif
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_aesgcm.c b/third_party/bearssl/src/ssl_engine_default_aesgcm.c
new file mode 100644
index 0000000..c44a707
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_aesgcm.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
+{
+#if BR_AES_X86NI || BR_POWER8
+	const br_block_ctr_class *ictr;
+	br_ghash ighash;
+#endif
+
+	br_ssl_engine_set_gcm(cc,
+		&br_sslrec_in_gcm_vtable,
+		&br_sslrec_out_gcm_vtable);
+#if BR_AES_X86NI
+	ictr = br_aes_x86ni_ctr_get_vtable();
+	if (ictr != NULL) {
+		br_ssl_engine_set_aes_ctr(cc, ictr);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
+#else
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
+#endif
+	}
+#elif BR_POWER8
+	ictr = br_aes_pwr8_ctr_get_vtable();
+	if (ictr != NULL) {
+		br_ssl_engine_set_aes_ctr(cc, ictr);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
+#else
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
+#endif
+	}
+#else
+#if BR_64
+	br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
+#else
+	br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
+#endif
+#endif
+#if BR_AES_X86NI
+	ighash = br_ghash_pclmul_get();
+	if (ighash != 0) {
+		br_ssl_engine_set_ghash(cc, ighash);
+		return;
+	}
+#endif
+#if BR_POWER8
+	ighash = br_ghash_pwr8_get();
+	if (ighash != 0) {
+		br_ssl_engine_set_ghash(cc, ighash);
+		return;
+	}
+#endif
+#if BR_LOMUL
+	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul32);
+#elif BR_64
+	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul64);
+#else
+	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_chapol.c b/third_party/bearssl/src/ssl_engine_default_chapol.c
new file mode 100644
index 0000000..47a0c98
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_chapol.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_chapol(br_ssl_engine_context *cc)
+{
+#if BR_INT128 || BR_UMUL128
+	br_poly1305_run bp;
+#endif
+#if BR_SSE2
+	br_chacha20_run bc;
+#endif
+
+	br_ssl_engine_set_chapol(cc,
+		&br_sslrec_in_chapol_vtable,
+		&br_sslrec_out_chapol_vtable);
+#if BR_SSE2
+	bc = br_chacha20_sse2_get();
+	if (bc) {
+		br_ssl_engine_set_chacha20(cc, bc);
+	} else {
+#endif
+		br_ssl_engine_set_chacha20(cc, &br_chacha20_ct_run);
+#if BR_SSE2
+	}
+#endif
+#if BR_INT128 || BR_UMUL128
+	bp = br_poly1305_ctmulq_get();
+	if (bp) {
+		br_ssl_engine_set_poly1305(cc, bp);
+	} else {
+#endif
+#if BR_LOMUL
+		br_ssl_engine_set_poly1305(cc, &br_poly1305_ctmul32_run);
+#else
+		br_ssl_engine_set_poly1305(cc, &br_poly1305_ctmul_run);
+#endif
+#if BR_INT128 || BR_UMUL128
+	}
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_descbc.c b/third_party/bearssl/src/ssl_engine_default_descbc.c
new file mode 100644
index 0000000..0253cb2
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_descbc.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_des_cbc(br_ssl_engine_context *cc)
+{
+	br_ssl_engine_set_cbc(cc,
+		&br_sslrec_in_cbc_vtable,
+		&br_sslrec_out_cbc_vtable);
+	br_ssl_engine_set_des_cbc(cc,
+		&br_des_ct_cbcenc_vtable,
+		&br_des_ct_cbcdec_vtable);
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_ec.c b/third_party/bearssl/src/ssl_engine_default_ec.c
new file mode 100644
index 0000000..0213ae6
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_ec.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_ec(br_ssl_engine_context *cc)
+{
+#if BR_LOMUL
+	br_ssl_engine_set_ec(cc, &br_ec_all_m15);
+#else
+	br_ssl_engine_set_ec(cc, &br_ec_all_m31);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_ecdsa.c b/third_party/bearssl/src/ssl_engine_default_ecdsa.c
new file mode 100644
index 0000000..1304002
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_ecdsa.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_ecdsa(br_ssl_engine_context *cc)
+{
+#if BR_LOMUL
+	br_ssl_engine_set_ec(cc, &br_ec_all_m15);
+	br_ssl_engine_set_ecdsa(cc, &br_ecdsa_i15_vrfy_asn1);
+#else
+	br_ssl_engine_set_ec(cc, &br_ec_all_m31);
+	br_ssl_engine_set_ecdsa(cc, &br_ecdsa_i31_vrfy_asn1);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_rsavrfy.c b/third_party/bearssl/src/ssl_engine_default_rsavrfy.c
new file mode 100644
index 0000000..ad0628a
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_rsavrfy.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_rsavrfy(br_ssl_engine_context *cc)
+{
+	br_ssl_engine_set_rsavrfy(cc, br_rsa_pkcs1_vrfy_get_default());
+}
diff --git a/third_party/bearssl/src/ssl_hashes.c b/third_party/bearssl/src/ssl_hashes.c
new file mode 100644
index 0000000..e10a980
--- /dev/null
+++ b/third_party/bearssl/src/ssl_hashes.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+int
+br_ssl_choose_hash(unsigned bf)
+{
+	static const unsigned char pref[] = {
+		br_sha256_ID, br_sha384_ID, br_sha512_ID,
+		br_sha224_ID, br_sha1_ID
+	};
+	size_t u;
+
+	for (u = 0; u < sizeof pref; u ++) {
+		int x;
+
+		x = pref[u];
+		if ((bf >> x) & 1) {
+			return x;
+		}
+	}
+	return 0;
+}
diff --git a/third_party/bearssl/src/ssl_hs_client.c b/third_party/bearssl/src/ssl_hs_client.c
new file mode 100644
index 0000000..de36165
--- /dev/null
+++ b/third_party/bearssl/src/ssl_hs_client.c
@@ -0,0 +1,1915 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_ssl_hs_client_init_main(void *t0ctx);
+
+void br_ssl_hs_client_run(void *t0ctx);
+
+
+
+#include <stddef.h>
+#include <string.h>
+
+#include "inner.h"
+
+/*
+ * This macro evaluates to a pointer to the current engine context.
+ */
+#define ENG  ((br_ssl_engine_context *)(void *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu)))
+
+
+
+
+
+/*
+ * This macro evaluates to a pointer to the client context, under that
+ * specific name. It must be noted that since the engine context is the
+ * first field of the br_ssl_client_context structure ('eng'), then
+ * pointers values of both types are interchangeable, modulo an
+ * appropriate cast. This also means that "addresses" computed as offsets
+ * within the structure work for both kinds of context.
+ */
+#define CTX  ((br_ssl_client_context *)ENG)
+
+/*
+ * Generate the pre-master secret for RSA key exchange, and encrypt it
+ * with the server's public key. Returned value is either the encrypted
+ * data length (in bytes), or -x on error, with 'x' being an error code.
+ *
+ * This code assumes that the public key has been already verified (it
+ * was properly obtained by the X.509 engine, and it has the right type,
+ * i.e. it is of type RSA and suitable for encryption).
+ */
+static int
+make_pms_rsa(br_ssl_client_context *ctx, int prf_id)
+{
+	const br_x509_class **xc;
+	const br_x509_pkey *pk;
+	const unsigned char *n;
+	unsigned char *pms;
+	size_t nlen, u;
+
+	xc = ctx->eng.x509ctx;
+	pk = (*xc)->get_pkey(xc, NULL);
+
+	/*
+	 * Compute actual RSA key length, in case there are leading zeros.
+	 */
+	n = pk->key.rsa.n;
+	nlen = pk->key.rsa.nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+
+	/*
+	 * We need at least 59 bytes (48 bytes for pre-master secret, and
+	 * 11 bytes for the PKCS#1 type 2 padding). Note that the X.509
+	 * minimal engine normally blocks RSA keys shorter than 128 bytes,
+	 * so this is mostly for public keys provided explicitly by the
+	 * caller.
+	 */
+	if (nlen < 59) {
+		return -BR_ERR_X509_WEAK_PUBLIC_KEY;
+	}
+	if (nlen > sizeof ctx->eng.pad) {
+		return -BR_ERR_LIMIT_EXCEEDED;
+	}
+
+	/*
+	 * Make PMS.
+	 */
+	pms = ctx->eng.pad + nlen - 48;
+	br_enc16be(pms, ctx->eng.version_max);
+	br_hmac_drbg_generate(&ctx->eng.rng, pms + 2, 46);
+	br_ssl_engine_compute_master(&ctx->eng, prf_id, pms, 48);
+
+	/*
+	 * Apply PKCS#1 type 2 padding.
+	 */
+	ctx->eng.pad[0] = 0x00;
+	ctx->eng.pad[1] = 0x02;
+	ctx->eng.pad[nlen - 49] = 0x00;
+	br_hmac_drbg_generate(&ctx->eng.rng, ctx->eng.pad + 2, nlen - 51);
+	for (u = 2; u < nlen - 49; u ++) {
+		while (ctx->eng.pad[u] == 0) {
+			br_hmac_drbg_generate(&ctx->eng.rng,
+				&ctx->eng.pad[u], 1);
+		}
+	}
+
+	/*
+	 * Compute RSA encryption.
+	 */
+	if (!ctx->irsapub(ctx->eng.pad, nlen, &pk->key.rsa)) {
+		return -BR_ERR_LIMIT_EXCEEDED;
+	}
+	return (int)nlen;
+}
+
+/*
+ * OID for hash functions in RSA signatures.
+ */
+static const unsigned char *HASH_OID[] = {
+	BR_HASH_OID_SHA1,
+	BR_HASH_OID_SHA224,
+	BR_HASH_OID_SHA256,
+	BR_HASH_OID_SHA384,
+	BR_HASH_OID_SHA512
+};
+
+/*
+ * Check the RSA signature on the ServerKeyExchange message.
+ *
+ *   hash      hash function ID (2 to 6), or 0 for MD5+SHA-1 (with RSA only)
+ *   use_rsa   non-zero for RSA signature, zero for ECDSA
+ *   sig_len   signature length (in bytes); signature value is in the pad
+ *
+ * Returned value is 0 on success, or an error code.
+ */
+static int
+verify_SKE_sig(br_ssl_client_context *ctx,
+	int hash, int use_rsa, size_t sig_len)
+{
+	const br_x509_class **xc;
+	const br_x509_pkey *pk;
+	br_multihash_context mhc;
+	unsigned char hv[64], head[4];
+	size_t hv_len;
+
+	xc = ctx->eng.x509ctx;
+	pk = (*xc)->get_pkey(xc, NULL);
+	br_multihash_zero(&mhc);
+	br_multihash_copyimpl(&mhc, &ctx->eng.mhash);
+	br_multihash_init(&mhc);
+	br_multihash_update(&mhc,
+		ctx->eng.client_random, sizeof ctx->eng.client_random);
+	br_multihash_update(&mhc,
+		ctx->eng.server_random, sizeof ctx->eng.server_random);
+	head[0] = 3;
+	head[1] = 0;
+	head[2] = ctx->eng.ecdhe_curve;
+	head[3] = ctx->eng.ecdhe_point_len;
+	br_multihash_update(&mhc, head, sizeof head);
+	br_multihash_update(&mhc,
+		ctx->eng.ecdhe_point, ctx->eng.ecdhe_point_len);
+	if (hash) {
+		hv_len = br_multihash_out(&mhc, hash, hv);
+		if (hv_len == 0) {
+			return BR_ERR_INVALID_ALGORITHM;
+		}
+	} else {
+		if (!br_multihash_out(&mhc, br_md5_ID, hv)
+			|| !br_multihash_out(&mhc, br_sha1_ID, hv + 16))
+		{
+			return BR_ERR_INVALID_ALGORITHM;
+		}
+		hv_len = 36;
+	}
+	if (use_rsa) {
+		unsigned char tmp[64];
+		const unsigned char *hash_oid;
+
+		if (hash) {
+			hash_oid = HASH_OID[hash - 2];
+		} else {
+			hash_oid = NULL;
+		}
+		if (!ctx->eng.irsavrfy(ctx->eng.pad, sig_len,
+			hash_oid, hv_len, &pk->key.rsa, tmp)
+			|| memcmp(tmp, hv, hv_len) != 0)
+		{
+			return BR_ERR_BAD_SIGNATURE;
+		}
+	} else {
+		if (!ctx->eng.iecdsa(ctx->eng.iec, hv, hv_len, &pk->key.ec,
+			ctx->eng.pad, sig_len))
+		{
+			return BR_ERR_BAD_SIGNATURE;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Perform client-side ECDH (or ECDHE). The point that should be sent to
+ * the server is written in the pad; returned value is either the point
+ * length (in bytes), or -x on error, with 'x' being an error code.
+ *
+ * The point _from_ the server is taken from ecdhe_point[] if 'ecdhe'
+ * is non-zero, or from the X.509 engine context if 'ecdhe' is zero
+ * (for static ECDH).
+ */
+static int
+make_pms_ecdh(br_ssl_client_context *ctx, unsigned ecdhe, int prf_id)
+{
+	int curve;
+	unsigned char key[66], point[133];
+	const unsigned char *order, *point_src;
+	size_t glen, olen, point_len, xoff, xlen;
+	unsigned char mask;
+
+	if (ecdhe) {
+		curve = ctx->eng.ecdhe_curve;
+		point_src = ctx->eng.ecdhe_point;
+		point_len = ctx->eng.ecdhe_point_len;
+	} else {
+		const br_x509_class **xc;
+		const br_x509_pkey *pk;
+
+		xc = ctx->eng.x509ctx;
+		pk = (*xc)->get_pkey(xc, NULL);
+		curve = pk->key.ec.curve;
+		point_src = pk->key.ec.q;
+		point_len = pk->key.ec.qlen;
+	}
+	if ((ctx->eng.iec->supported_curves & ((uint32_t)1 << curve)) == 0) {
+		return -BR_ERR_INVALID_ALGORITHM;
+	}
+
+	/*
+	 * We need to generate our key, as a non-zero random value which
+	 * is lower than the curve order, in a "large enough" range. We
+	 * force top bit to 0 and bottom bit to 1, which guarantees that
+	 * the value is in the proper range.
+	 */
+	order = ctx->eng.iec->order(curve, &olen);
+	mask = 0xFF;
+	while (mask >= order[0]) {
+		mask >>= 1;
+	}
+	br_hmac_drbg_generate(&ctx->eng.rng, key, olen);
+	key[0] &= mask;
+	key[olen - 1] |= 0x01;
+
+	/*
+	 * Compute the common ECDH point, whose X coordinate is the
+	 * pre-master secret.
+	 */
+	ctx->eng.iec->generator(curve, &glen);
+	if (glen != point_len) {
+		return -BR_ERR_INVALID_ALGORITHM;
+	}
+
+	memcpy(point, point_src, glen);
+	if (!ctx->eng.iec->mul(point, glen, key, olen, curve)) {
+		return -BR_ERR_INVALID_ALGORITHM;
+	}
+
+	/*
+	 * The pre-master secret is the X coordinate.
+	 */
+	xoff = ctx->eng.iec->xoff(curve, &xlen);
+	br_ssl_engine_compute_master(&ctx->eng, prf_id, point + xoff, xlen);
+
+	ctx->eng.iec->mulgen(point, key, olen, curve);
+	memcpy(ctx->eng.pad, point, glen);
+	return (int)glen;
+}
+
+/*
+ * Perform full static ECDH. This occurs only in the context of client
+ * authentication with certificates: the server uses an EC public key,
+ * the cipher suite is of type ECDH (not ECDHE), the server requested a
+ * client certificate and accepts static ECDH, the client has a
+ * certificate with an EC public key in the same curve, and accepts
+ * static ECDH as well.
+ *
+ * Returned value is 0 on success, -1 on error.
+ */
+static int
+make_pms_static_ecdh(br_ssl_client_context *ctx, int prf_id)
+{
+	unsigned char point[133];
+	size_t point_len;
+	const br_x509_class **xc;
+	const br_x509_pkey *pk;
+
+	xc = ctx->eng.x509ctx;
+	pk = (*xc)->get_pkey(xc, NULL);
+	point_len = pk->key.ec.qlen;
+	if (point_len > sizeof point) {
+		return -1;
+	}
+	memcpy(point, pk->key.ec.q, point_len);
+	if (!(*ctx->client_auth_vtable)->do_keyx(
+		ctx->client_auth_vtable, point, &point_len))
+	{
+		return -1;
+	}
+	br_ssl_engine_compute_master(&ctx->eng,
+		prf_id, point, point_len);
+	return 0;
+}
+
+/*
+ * Compute the client-side signature. This is invoked only when a
+ * signature-based client authentication was selected. The computed
+ * signature is in the pad; its length (in bytes) is returned. On
+ * error, 0 is returned.
+ */
+static size_t
+make_client_sign(br_ssl_client_context *ctx)
+{
+	size_t hv_len;
+
+	/*
+	 * Compute hash of handshake messages so far. This "cannot" fail
+	 * because the list of supported hash functions provided to the
+	 * client certificate handler was trimmed to include only the
+	 * hash functions that the multi-hasher supports.
+	 */
+	if (ctx->hash_id) {
+		hv_len = br_multihash_out(&ctx->eng.mhash,
+			ctx->hash_id, ctx->eng.pad);
+	} else {
+		br_multihash_out(&ctx->eng.mhash,
+			br_md5_ID, ctx->eng.pad);
+		br_multihash_out(&ctx->eng.mhash,
+			br_sha1_ID, ctx->eng.pad + 16);
+		hv_len = 36;
+	}
+	return (*ctx->client_auth_vtable)->do_sign(
+		ctx->client_auth_vtable, ctx->hash_id, hv_len,
+		ctx->eng.pad, sizeof ctx->eng.pad);
+}
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x00, 0x0A, 0x00, 0x24, 0x00, 0x2F, 0x01, 0x24, 0x00, 0x35, 0x02,
+	0x24, 0x00, 0x3C, 0x01, 0x44, 0x00, 0x3D, 0x02, 0x44, 0x00, 0x9C, 0x03,
+	0x04, 0x00, 0x9D, 0x04, 0x05, 0xC0, 0x03, 0x40, 0x24, 0xC0, 0x04, 0x41,
+	0x24, 0xC0, 0x05, 0x42, 0x24, 0xC0, 0x08, 0x20, 0x24, 0xC0, 0x09, 0x21,
+	0x24, 0xC0, 0x0A, 0x22, 0x24, 0xC0, 0x0D, 0x30, 0x24, 0xC0, 0x0E, 0x31,
+	0x24, 0xC0, 0x0F, 0x32, 0x24, 0xC0, 0x12, 0x10, 0x24, 0xC0, 0x13, 0x11,
+	0x24, 0xC0, 0x14, 0x12, 0x24, 0xC0, 0x23, 0x21, 0x44, 0xC0, 0x24, 0x22,
+	0x55, 0xC0, 0x25, 0x41, 0x44, 0xC0, 0x26, 0x42, 0x55, 0xC0, 0x27, 0x11,
+	0x44, 0xC0, 0x28, 0x12, 0x55, 0xC0, 0x29, 0x31, 0x44, 0xC0, 0x2A, 0x32,
+	0x55, 0xC0, 0x2B, 0x23, 0x04, 0xC0, 0x2C, 0x24, 0x05, 0xC0, 0x2D, 0x43,
+	0x04, 0xC0, 0x2E, 0x44, 0x05, 0xC0, 0x2F, 0x13, 0x04, 0xC0, 0x30, 0x14,
+	0x05, 0xC0, 0x31, 0x33, 0x04, 0xC0, 0x32, 0x34, 0x05, 0xC0, 0x9C, 0x06,
+	0x04, 0xC0, 0x9D, 0x07, 0x04, 0xC0, 0xA0, 0x08, 0x04, 0xC0, 0xA1, 0x09,
+	0x04, 0xC0, 0xAC, 0x26, 0x04, 0xC0, 0xAD, 0x27, 0x04, 0xC0, 0xAE, 0x28,
+	0x04, 0xC0, 0xAF, 0x29, 0x04, 0xCC, 0xA8, 0x15, 0x04, 0xCC, 0xA9, 0x25,
+	0x04, 0x00, 0x00
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x00, 0x0A, 0x00, 0x00, 0x01, 0x00, 0x0D, 0x00, 0x00, 0x01,
+	0x00, 0x0E, 0x00, 0x00, 0x01, 0x00, 0x0F, 0x00, 0x00, 0x01, 0x01, 0x08,
+	0x00, 0x00, 0x01, 0x01, 0x09, 0x00, 0x00, 0x01, 0x02, 0x08, 0x00, 0x00,
+	0x01, 0x02, 0x09, 0x00, 0x00, 0x25, 0x25, 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_CCS), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_CIPHER_SUITE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_COMPRESSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_FINISHED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_FRAGLEN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_HANDSHAKE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_HELLO_DONE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_PARAM), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_SECRENEG), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_SNI), 0x00, 0x00, 0x01, T0_INT1(BR_ERR_BAD_VERSION),
+	0x00, 0x00, 0x01, T0_INT1(BR_ERR_EXTRA_EXTENSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_INVALID_ALGORITHM), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_LIMIT_EXCEEDED), 0x00, 0x00, 0x01, T0_INT1(BR_ERR_OK),
+	0x00, 0x00, 0x01, T0_INT1(BR_ERR_OVERSIZED_ID), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_RESUME_MISMATCH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_UNSUPPORTED_VERSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_WRONG_KEY_USAGE), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, action)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, alert)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, application_data)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_client_context, auth_type)), 0x00, 0x00,
+	0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, cipher_suite)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, client_random)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, close_received)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, ecdhe_curve)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, ecdhe_point)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, ecdhe_point_len)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, flags)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_client_context, hash_id)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_client_context, hashes)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, log_max_frag_len)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_client_context, min_clienthello_len)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, pad)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, protocol_names_num)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, record_type_in)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, record_type_out)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, reneg)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, saved_finished)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, selected_protocol)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, server_name)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, server_random)), 0x00, 0x00,
+	0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, session_id)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, session_id_len)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, shutdown_recv)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, suites_buf)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, suites_num)), 0x00, 0x00,
+	0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, version)),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_in)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, version_max)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, version_min)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_out)),
+	0x00, 0x00, 0x09, 0x26, 0x58, 0x06, 0x02, 0x68, 0x28, 0x00, 0x00, 0x06,
+	0x08, 0x2C, 0x0E, 0x05, 0x02, 0x71, 0x28, 0x04, 0x01, 0x3C, 0x00, 0x00,
+	0x01, 0x01, 0x00, 0x01, 0x03, 0x00, 0x99, 0x26, 0x5E, 0x44, 0x9D, 0x26,
+	0x05, 0x04, 0x60, 0x01, 0x00, 0x00, 0x02, 0x00, 0x0E, 0x06, 0x02, 0x9D,
+	0x00, 0x5E, 0x04, 0x6B, 0x00, 0x06, 0x02, 0x68, 0x28, 0x00, 0x00, 0x26,
+	0x89, 0x44, 0x05, 0x03, 0x01, 0x0C, 0x08, 0x44, 0x79, 0x2C, 0xAB, 0x1C,
+	0x84, 0x01, 0x0C, 0x31, 0x00, 0x00, 0x26, 0x1F, 0x01, 0x08, 0x0B, 0x44,
+	0x5C, 0x1F, 0x08, 0x00, 0x01, 0x03, 0x00, 0x77, 0x2E, 0x02, 0x00, 0x36,
+	0x17, 0x01, 0x01, 0x0B, 0x77, 0x3E, 0x29, 0x1A, 0x36, 0x06, 0x07, 0x02,
+	0x00, 0xCF, 0x03, 0x00, 0x04, 0x75, 0x01, 0x00, 0xC5, 0x02, 0x00, 0x26,
+	0x1A, 0x17, 0x06, 0x02, 0x6F, 0x28, 0xCF, 0x04, 0x76, 0x01, 0x01, 0x00,
+	0x77, 0x3E, 0x01, 0x16, 0x87, 0x3E, 0x01, 0x00, 0x8A, 0x3C, 0x34, 0xD5,
+	0x29, 0xB4, 0x06, 0x09, 0x01, 0x7F, 0xAF, 0x01, 0x7F, 0xD2, 0x04, 0x80,
+	0x53, 0xB1, 0x79, 0x2C, 0xA1, 0x01, T0_INT1(BR_KEYTYPE_SIGN), 0x17,
+	0x06, 0x01, 0xB5, 0xB8, 0x26, 0x01, 0x0D, 0x0E, 0x06, 0x07, 0x25, 0xB7,
+	0xB8, 0x01, 0x7F, 0x04, 0x02, 0x01, 0x00, 0x03, 0x00, 0x01, 0x0E, 0x0E,
+	0x05, 0x02, 0x72, 0x28, 0x06, 0x02, 0x67, 0x28, 0x33, 0x06, 0x02, 0x72,
+	0x28, 0x02, 0x00, 0x06, 0x1C, 0xD3, 0x80, 0x2E, 0x01, 0x81, 0x7F, 0x0E,
+	0x06, 0x0D, 0x25, 0x01, 0x10, 0xDE, 0x01, 0x00, 0xDD, 0x79, 0x2C, 0xAB,
+	0x24, 0x04, 0x04, 0xD6, 0x06, 0x01, 0xD4, 0x04, 0x01, 0xD6, 0x01, 0x7F,
+	0xD2, 0x01, 0x7F, 0xAF, 0x01, 0x01, 0x77, 0x3E, 0x01, 0x17, 0x87, 0x3E,
+	0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x00,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_RSA | BR_KEYTYPE_KEYX), 0x04, 0x30, 0x01, 0x01,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_RSA | BR_KEYTYPE_SIGN), 0x04, 0x25, 0x01, 0x02,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_EC  | BR_KEYTYPE_SIGN), 0x04, 0x1A, 0x01, 0x03,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_EC  | BR_KEYTYPE_KEYX), 0x04, 0x0F, 0x01, 0x04,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_EC  | BR_KEYTYPE_KEYX), 0x04, 0x04, 0x01, 0x00,
+	0x44, 0x25, 0x00, 0x00, 0x82, 0x2E, 0x01, 0x0E, 0x0E, 0x06, 0x04, 0x01,
+	0x00, 0x04, 0x02, 0x01, 0x05, 0x00, 0x00, 0x40, 0x06, 0x04, 0x01, 0x06,
+	0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x88, 0x2E, 0x26, 0x06, 0x08, 0x01,
+	0x01, 0x09, 0x01, 0x11, 0x07, 0x04, 0x03, 0x25, 0x01, 0x05, 0x00, 0x01,
+	0x41, 0x03, 0x00, 0x25, 0x01, 0x00, 0x43, 0x06, 0x03, 0x02, 0x00, 0x08,
+	0x42, 0x06, 0x03, 0x02, 0x00, 0x08, 0x26, 0x06, 0x06, 0x01, 0x01, 0x0B,
+	0x01, 0x06, 0x08, 0x00, 0x00, 0x8B, 0x3F, 0x26, 0x06, 0x03, 0x01, 0x09,
+	0x08, 0x00, 0x01, 0x40, 0x26, 0x06, 0x1E, 0x01, 0x00, 0x03, 0x00, 0x26,
+	0x06, 0x0E, 0x26, 0x01, 0x01, 0x17, 0x02, 0x00, 0x08, 0x03, 0x00, 0x01,
+	0x01, 0x11, 0x04, 0x6F, 0x25, 0x02, 0x00, 0x01, 0x01, 0x0B, 0x01, 0x06,
+	0x08, 0x00, 0x00, 0x7F, 0x2D, 0x44, 0x11, 0x01, 0x01, 0x17, 0x35, 0x00,
+	0x00, 0x9F, 0xCE, 0x26, 0x01, 0x07, 0x17, 0x01, 0x00, 0x38, 0x0E, 0x06,
+	0x09, 0x25, 0x01, 0x10, 0x17, 0x06, 0x01, 0x9F, 0x04, 0x35, 0x01, 0x01,
+	0x38, 0x0E, 0x06, 0x2C, 0x25, 0x25, 0x01, 0x00, 0x77, 0x3E, 0xB3, 0x88,
+	0x2E, 0x01, 0x01, 0x0E, 0x01, 0x01, 0xA8, 0x37, 0x06, 0x17, 0x29, 0x1A,
+	0x36, 0x06, 0x04, 0xCE, 0x25, 0x04, 0x78, 0x01, 0x80, 0x64, 0xC5, 0x01,
+	0x01, 0x77, 0x3E, 0x01, 0x17, 0x87, 0x3E, 0x04, 0x01, 0x9F, 0x04, 0x03,
+	0x72, 0x28, 0x25, 0x04, 0xFF, 0x34, 0x01, 0x26, 0x03, 0x00, 0x09, 0x26,
+	0x58, 0x06, 0x02, 0x68, 0x28, 0x02, 0x00, 0x00, 0x00, 0x9A, 0x01, 0x0F,
+	0x17, 0x00, 0x00, 0x76, 0x2E, 0x01, 0x00, 0x38, 0x0E, 0x06, 0x10, 0x25,
+	0x26, 0x01, 0x01, 0x0D, 0x06, 0x03, 0x25, 0x01, 0x02, 0x76, 0x3E, 0x01,
+	0x00, 0x04, 0x21, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x14, 0x25, 0x01, 0x00,
+	0x76, 0x3E, 0x26, 0x01, 0x80, 0x64, 0x0E, 0x06, 0x05, 0x01, 0x82, 0x00,
+	0x08, 0x28, 0x5A, 0x04, 0x07, 0x25, 0x01, 0x82, 0x00, 0x08, 0x28, 0x25,
+	0x00, 0x00, 0x01, 0x00, 0x2F, 0x06, 0x05, 0x3A, 0xAC, 0x37, 0x04, 0x78,
+	0x26, 0x06, 0x04, 0x01, 0x01, 0x8F, 0x3E, 0x00, 0x01, 0xBF, 0xAA, 0xBF,
+	0xAA, 0xC1, 0x84, 0x44, 0x26, 0x03, 0x00, 0xB6, 0x9B, 0x9B, 0x02, 0x00,
+	0x4D, 0x26, 0x58, 0x06, 0x0A, 0x01, 0x03, 0xA8, 0x06, 0x02, 0x72, 0x28,
+	0x25, 0x04, 0x03, 0x5C, 0x8A, 0x3C, 0x00, 0x00, 0x2F, 0x06, 0x0B, 0x86,
+	0x2E, 0x01, 0x14, 0x0D, 0x06, 0x02, 0x72, 0x28, 0x04, 0x11, 0xCE, 0x01,
+	0x07, 0x17, 0x26, 0x01, 0x02, 0x0D, 0x06, 0x06, 0x06, 0x02, 0x72, 0x28,
+	0x04, 0x70, 0x25, 0xC2, 0x01, 0x01, 0x0D, 0x33, 0x37, 0x06, 0x02, 0x61,
+	0x28, 0x26, 0x01, 0x01, 0xC8, 0x36, 0xB2, 0x00, 0x01, 0xB8, 0x01, 0x0B,
+	0x0E, 0x05, 0x02, 0x72, 0x28, 0x26, 0x01, 0x03, 0x0E, 0x06, 0x08, 0xC0,
+	0x06, 0x02, 0x68, 0x28, 0x44, 0x25, 0x00, 0x44, 0x57, 0xC0, 0xAA, 0x26,
+	0x06, 0x23, 0xC0, 0xAA, 0x26, 0x56, 0x26, 0x06, 0x18, 0x26, 0x01, 0x82,
+	0x00, 0x0F, 0x06, 0x05, 0x01, 0x82, 0x00, 0x04, 0x01, 0x26, 0x03, 0x00,
+	0x84, 0x02, 0x00, 0xB6, 0x02, 0x00, 0x53, 0x04, 0x65, 0x9B, 0x54, 0x04,
+	0x5A, 0x9B, 0x9B, 0x55, 0x26, 0x06, 0x02, 0x35, 0x00, 0x25, 0x2B, 0x00,
+	0x00, 0x79, 0x2C, 0xA1, 0x01, 0x7F, 0xB0, 0x26, 0x58, 0x06, 0x02, 0x35,
+	0x28, 0x26, 0x05, 0x02, 0x72, 0x28, 0x38, 0x17, 0x0D, 0x06, 0x02, 0x74,
+	0x28, 0x3B, 0x00, 0x00, 0x9C, 0xB8, 0x01, 0x14, 0x0D, 0x06, 0x02, 0x72,
+	0x28, 0x84, 0x01, 0x0C, 0x08, 0x01, 0x0C, 0xB6, 0x9B, 0x84, 0x26, 0x01,
+	0x0C, 0x08, 0x01, 0x0C, 0x30, 0x05, 0x02, 0x64, 0x28, 0x00, 0x00, 0xB9,
+	0x06, 0x02, 0x72, 0x28, 0x06, 0x02, 0x66, 0x28, 0x00, 0x0A, 0xB8, 0x01,
+	0x02, 0x0E, 0x05, 0x02, 0x72, 0x28, 0xBF, 0x03, 0x00, 0x02, 0x00, 0x95,
+	0x2C, 0x0A, 0x02, 0x00, 0x94, 0x2C, 0x0F, 0x37, 0x06, 0x02, 0x73, 0x28,
+	0x02, 0x00, 0x93, 0x2C, 0x0D, 0x06, 0x02, 0x6B, 0x28, 0x02, 0x00, 0x96,
+	0x3C, 0x8C, 0x01, 0x20, 0xB6, 0x01, 0x00, 0x03, 0x01, 0xC1, 0x03, 0x02,
+	0x02, 0x02, 0x01, 0x20, 0x0F, 0x06, 0x02, 0x70, 0x28, 0x84, 0x02, 0x02,
+	0xB6, 0x02, 0x02, 0x8E, 0x2E, 0x0E, 0x02, 0x02, 0x01, 0x00, 0x0F, 0x17,
+	0x06, 0x0B, 0x8D, 0x84, 0x02, 0x02, 0x30, 0x06, 0x04, 0x01, 0x7F, 0x03,
+	0x01, 0x8D, 0x84, 0x02, 0x02, 0x31, 0x02, 0x02, 0x8E, 0x3E, 0x02, 0x00,
+	0x92, 0x02, 0x01, 0x98, 0xBF, 0x26, 0xC3, 0x58, 0x06, 0x02, 0x62, 0x28,
+	0x26, 0xCD, 0x02, 0x00, 0x01, 0x86, 0x03, 0x0A, 0x17, 0x06, 0x02, 0x62,
+	0x28, 0x79, 0x02, 0x01, 0x98, 0xC1, 0x06, 0x02, 0x63, 0x28, 0x26, 0x06,
+	0x81, 0x47, 0xBF, 0xAA, 0xA6, 0x03, 0x03, 0xA4, 0x03, 0x04, 0xA2, 0x03,
+	0x05, 0xA5, 0x03, 0x06, 0xA7, 0x03, 0x07, 0xA3, 0x03, 0x08, 0x27, 0x03,
+	0x09, 0x26, 0x06, 0x81, 0x18, 0xBF, 0x01, 0x00, 0x38, 0x0E, 0x06, 0x0F,
+	0x25, 0x02, 0x03, 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x03, 0xBE,
+	0x04, 0x80, 0x7F, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x02, 0x05,
+	0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x05, 0xBC, 0x04, 0x80, 0x6A,
+	0x01, 0x83, 0xFE, 0x01, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x02, 0x04, 0x05,
+	0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x04, 0xBD, 0x04, 0x80, 0x53, 0x01,
+	0x0D, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x06, 0x05, 0x02, 0x6C, 0x28,
+	0x01, 0x00, 0x03, 0x06, 0xBA, 0x04, 0x3F, 0x01, 0x0A, 0x38, 0x0E, 0x06,
+	0x0E, 0x25, 0x02, 0x07, 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x07,
+	0xBA, 0x04, 0x2B, 0x01, 0x0B, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x08,
+	0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x08, 0xBA, 0x04, 0x17, 0x01,
+	0x10, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x09, 0x05, 0x02, 0x6C, 0x28,
+	0x01, 0x00, 0x03, 0x09, 0xAE, 0x04, 0x03, 0x6C, 0x28, 0x25, 0x04, 0xFE,
+	0x64, 0x02, 0x04, 0x06, 0x0D, 0x02, 0x04, 0x01, 0x05, 0x0F, 0x06, 0x02,
+	0x69, 0x28, 0x01, 0x01, 0x88, 0x3E, 0x9B, 0x04, 0x0C, 0xA4, 0x01, 0x05,
+	0x0F, 0x06, 0x02, 0x69, 0x28, 0x01, 0x01, 0x88, 0x3E, 0x9B, 0x02, 0x01,
+	0x00, 0x04, 0xB8, 0x01, 0x0C, 0x0E, 0x05, 0x02, 0x72, 0x28, 0xC1, 0x01,
+	0x03, 0x0E, 0x05, 0x02, 0x6D, 0x28, 0xBF, 0x26, 0x7C, 0x3E, 0x26, 0x01,
+	0x20, 0x10, 0x06, 0x02, 0x6D, 0x28, 0x40, 0x44, 0x11, 0x01, 0x01, 0x17,
+	0x05, 0x02, 0x6D, 0x28, 0xC1, 0x26, 0x01, 0x81, 0x05, 0x0F, 0x06, 0x02,
+	0x6D, 0x28, 0x26, 0x7E, 0x3E, 0x7D, 0x44, 0xB6, 0x92, 0x2C, 0x01, 0x86,
+	0x03, 0x10, 0x03, 0x00, 0x79, 0x2C, 0xCB, 0x03, 0x01, 0x01, 0x02, 0x03,
+	0x02, 0x02, 0x00, 0x06, 0x21, 0xC1, 0x26, 0x26, 0x01, 0x02, 0x0A, 0x44,
+	0x01, 0x06, 0x0F, 0x37, 0x06, 0x02, 0x6D, 0x28, 0x03, 0x02, 0xC1, 0x02,
+	0x01, 0x01, 0x01, 0x0B, 0x01, 0x03, 0x08, 0x0E, 0x05, 0x02, 0x6D, 0x28,
+	0x04, 0x08, 0x02, 0x01, 0x06, 0x04, 0x01, 0x00, 0x03, 0x02, 0xBF, 0x26,
+	0x03, 0x03, 0x26, 0x01, 0x84, 0x00, 0x0F, 0x06, 0x02, 0x6E, 0x28, 0x84,
+	0x44, 0xB6, 0x02, 0x02, 0x02, 0x01, 0x02, 0x03, 0x50, 0x26, 0x06, 0x01,
+	0x28, 0x25, 0x9B, 0x00, 0x02, 0x03, 0x00, 0x03, 0x01, 0x02, 0x00, 0x97,
+	0x02, 0x01, 0x02, 0x00, 0x39, 0x26, 0x01, 0x00, 0x0E, 0x06, 0x02, 0x60,
+	0x00, 0xD0, 0x04, 0x74, 0x02, 0x01, 0x00, 0x03, 0x00, 0xC1, 0xAA, 0x26,
+	0x06, 0x80, 0x43, 0xC1, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01,
+	0x81, 0x7F, 0x04, 0x2E, 0x01, 0x80, 0x40, 0x38, 0x0E, 0x06, 0x07, 0x25,
+	0x01, 0x83, 0xFE, 0x00, 0x04, 0x20, 0x01, 0x80, 0x41, 0x38, 0x0E, 0x06,
+	0x07, 0x25, 0x01, 0x84, 0x80, 0x00, 0x04, 0x12, 0x01, 0x80, 0x42, 0x38,
+	0x0E, 0x06, 0x07, 0x25, 0x01, 0x88, 0x80, 0x00, 0x04, 0x04, 0x01, 0x00,
+	0x44, 0x25, 0x02, 0x00, 0x37, 0x03, 0x00, 0x04, 0xFF, 0x39, 0x9B, 0x79,
+	0x2C, 0xC9, 0x05, 0x09, 0x02, 0x00, 0x01, 0x83, 0xFF, 0x7F, 0x17, 0x03,
+	0x00, 0x92, 0x2C, 0x01, 0x86, 0x03, 0x10, 0x06, 0x3A, 0xBB, 0x26, 0x81,
+	0x3D, 0x41, 0x25, 0x26, 0x01, 0x08, 0x0B, 0x37, 0x01, 0x8C, 0x80, 0x00,
+	0x37, 0x17, 0x02, 0x00, 0x17, 0x02, 0x00, 0x01, 0x8C, 0x80, 0x00, 0x17,
+	0x06, 0x19, 0x26, 0x01, 0x81, 0x7F, 0x17, 0x06, 0x05, 0x01, 0x84, 0x80,
+	0x00, 0x37, 0x26, 0x01, 0x83, 0xFE, 0x00, 0x17, 0x06, 0x05, 0x01, 0x88,
+	0x80, 0x00, 0x37, 0x03, 0x00, 0x04, 0x09, 0x02, 0x00, 0x01, 0x8C, 0x88,
+	0x01, 0x17, 0x03, 0x00, 0x16, 0xBF, 0xAA, 0x26, 0x06, 0x23, 0xBF, 0xAA,
+	0x26, 0x15, 0x26, 0x06, 0x18, 0x26, 0x01, 0x82, 0x00, 0x0F, 0x06, 0x05,
+	0x01, 0x82, 0x00, 0x04, 0x01, 0x26, 0x03, 0x01, 0x84, 0x02, 0x01, 0xB6,
+	0x02, 0x01, 0x12, 0x04, 0x65, 0x9B, 0x13, 0x04, 0x5A, 0x9B, 0x14, 0x9B,
+	0x02, 0x00, 0x2A, 0x00, 0x00, 0xB9, 0x26, 0x5A, 0x06, 0x07, 0x25, 0x06,
+	0x02, 0x66, 0x28, 0x04, 0x74, 0x00, 0x00, 0xC2, 0x01, 0x03, 0xC0, 0x44,
+	0x25, 0x44, 0x00, 0x00, 0xBF, 0xC6, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00,
+	0xBF, 0xAA, 0x26, 0x06, 0x80, 0x50, 0xC1, 0x03, 0x01, 0xC1, 0x03, 0x02,
+	0x02, 0x01, 0x01, 0x08, 0x0E, 0x06, 0x16, 0x02, 0x02, 0x01, 0x0F, 0x0C,
+	0x06, 0x0D, 0x01, 0x01, 0x02, 0x02, 0x01, 0x10, 0x08, 0x0B, 0x02, 0x00,
+	0x37, 0x03, 0x00, 0x04, 0x2A, 0x02, 0x01, 0x01, 0x02, 0x10, 0x02, 0x01,
+	0x01, 0x06, 0x0C, 0x17, 0x02, 0x02, 0x01, 0x01, 0x0E, 0x02, 0x02, 0x01,
+	0x03, 0x0E, 0x37, 0x17, 0x06, 0x11, 0x02, 0x00, 0x01, 0x01, 0x02, 0x02,
+	0x5D, 0x01, 0x02, 0x0B, 0x02, 0x01, 0x08, 0x0B, 0x37, 0x03, 0x00, 0x04,
+	0xFF, 0x2C, 0x9B, 0x02, 0x00, 0x00, 0x00, 0xBF, 0x01, 0x01, 0x0E, 0x05,
+	0x02, 0x65, 0x28, 0xC1, 0x01, 0x08, 0x08, 0x82, 0x2E, 0x0E, 0x05, 0x02,
+	0x65, 0x28, 0x00, 0x00, 0xBF, 0x88, 0x2E, 0x05, 0x15, 0x01, 0x01, 0x0E,
+	0x05, 0x02, 0x69, 0x28, 0xC1, 0x01, 0x00, 0x0E, 0x05, 0x02, 0x69, 0x28,
+	0x01, 0x02, 0x88, 0x3E, 0x04, 0x1C, 0x01, 0x19, 0x0E, 0x05, 0x02, 0x69,
+	0x28, 0xC1, 0x01, 0x18, 0x0E, 0x05, 0x02, 0x69, 0x28, 0x84, 0x01, 0x18,
+	0xB6, 0x89, 0x84, 0x01, 0x18, 0x30, 0x05, 0x02, 0x69, 0x28, 0x00, 0x00,
+	0xBF, 0x06, 0x02, 0x6A, 0x28, 0x00, 0x00, 0x01, 0x02, 0x97, 0xC2, 0x01,
+	0x08, 0x0B, 0xC2, 0x08, 0x00, 0x00, 0x01, 0x03, 0x97, 0xC2, 0x01, 0x08,
+	0x0B, 0xC2, 0x08, 0x01, 0x08, 0x0B, 0xC2, 0x08, 0x00, 0x00, 0x01, 0x01,
+	0x97, 0xC2, 0x00, 0x00, 0x3A, 0x26, 0x58, 0x05, 0x01, 0x00, 0x25, 0xD0,
+	0x04, 0x76, 0x02, 0x03, 0x00, 0x91, 0x2E, 0x03, 0x01, 0x01, 0x00, 0x26,
+	0x02, 0x01, 0x0A, 0x06, 0x10, 0x26, 0x01, 0x01, 0x0B, 0x90, 0x08, 0x2C,
+	0x02, 0x00, 0x0E, 0x06, 0x01, 0x00, 0x5C, 0x04, 0x6A, 0x25, 0x01, 0x7F,
+	0x00, 0x00, 0x01, 0x15, 0x87, 0x3E, 0x44, 0x52, 0x25, 0x52, 0x25, 0x29,
+	0x00, 0x00, 0x01, 0x01, 0x44, 0xC4, 0x00, 0x00, 0x44, 0x38, 0x97, 0x44,
+	0x26, 0x06, 0x05, 0xC2, 0x25, 0x5D, 0x04, 0x78, 0x25, 0x00, 0x00, 0x26,
+	0x01, 0x81, 0xAC, 0x00, 0x0E, 0x06, 0x04, 0x25, 0x01, 0x7F, 0x00, 0x9A,
+	0x59, 0x00, 0x02, 0x03, 0x00, 0x79, 0x2C, 0x9A, 0x03, 0x01, 0x02, 0x01,
+	0x01, 0x0F, 0x17, 0x02, 0x01, 0x01, 0x04, 0x11, 0x01, 0x0F, 0x17, 0x02,
+	0x01, 0x01, 0x08, 0x11, 0x01, 0x0F, 0x17, 0x01, 0x00, 0x38, 0x0E, 0x06,
+	0x10, 0x25, 0x01, 0x00, 0x01, 0x18, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04,
+	0x01, 0x4A, 0x04, 0x81, 0x0D, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x10, 0x25,
+	0x01, 0x01, 0x01, 0x10, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, 0x01, 0x4A,
+	0x04, 0x80, 0x77, 0x01, 0x02, 0x38, 0x0E, 0x06, 0x10, 0x25, 0x01, 0x01,
+	0x01, 0x20, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, 0x01, 0x4A, 0x04, 0x80,
+	0x61, 0x01, 0x03, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x25, 0x01, 0x10, 0x02,
+	0x00, 0x06, 0x03, 0x47, 0x04, 0x01, 0x48, 0x04, 0x80, 0x4C, 0x01, 0x04,
+	0x38, 0x0E, 0x06, 0x0E, 0x25, 0x25, 0x01, 0x20, 0x02, 0x00, 0x06, 0x03,
+	0x47, 0x04, 0x01, 0x48, 0x04, 0x38, 0x01, 0x05, 0x38, 0x0E, 0x06, 0x0C,
+	0x25, 0x25, 0x02, 0x00, 0x06, 0x03, 0x4B, 0x04, 0x01, 0x4C, 0x04, 0x26,
+	0x26, 0x01, 0x09, 0x0F, 0x06, 0x02, 0x68, 0x28, 0x44, 0x25, 0x26, 0x01,
+	0x01, 0x17, 0x01, 0x04, 0x0B, 0x01, 0x10, 0x08, 0x44, 0x01, 0x08, 0x17,
+	0x01, 0x10, 0x44, 0x09, 0x02, 0x00, 0x06, 0x03, 0x45, 0x04, 0x01, 0x46,
+	0x00, 0x25, 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x02, 0x0F, 0x00,
+	0x00, 0x9A, 0x01, 0x0C, 0x11, 0x26, 0x5B, 0x44, 0x01, 0x03, 0x0A, 0x17,
+	0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x01, 0x0E, 0x00, 0x00, 0x9A,
+	0x01, 0x0C, 0x11, 0x5A, 0x00, 0x00, 0x9A, 0x01, 0x81, 0x70, 0x17, 0x01,
+	0x20, 0x0D, 0x00, 0x00, 0x1B, 0x01, 0x00, 0x75, 0x2E, 0x26, 0x06, 0x22,
+	0x01, 0x01, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, 0x00, 0x9E, 0x04, 0x14,
+	0x01, 0x02, 0x38, 0x0E, 0x06, 0x0D, 0x25, 0x77, 0x2E, 0x01, 0x01, 0x0E,
+	0x06, 0x03, 0x01, 0x10, 0x37, 0x04, 0x01, 0x25, 0x04, 0x01, 0x25, 0x7B,
+	0x2E, 0x05, 0x33, 0x2F, 0x06, 0x30, 0x86, 0x2E, 0x01, 0x14, 0x38, 0x0E,
+	0x06, 0x06, 0x25, 0x01, 0x02, 0x37, 0x04, 0x22, 0x01, 0x15, 0x38, 0x0E,
+	0x06, 0x09, 0x25, 0xAD, 0x06, 0x03, 0x01, 0x7F, 0x9E, 0x04, 0x13, 0x01,
+	0x16, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, 0x01, 0x37, 0x04, 0x07, 0x25,
+	0x01, 0x04, 0x37, 0x01, 0x00, 0x25, 0x1A, 0x06, 0x03, 0x01, 0x08, 0x37,
+	0x00, 0x00, 0x1B, 0x26, 0x05, 0x13, 0x2F, 0x06, 0x10, 0x86, 0x2E, 0x01,
+	0x15, 0x0E, 0x06, 0x08, 0x25, 0xAD, 0x01, 0x00, 0x77, 0x3E, 0x04, 0x01,
+	0x20, 0x00, 0x00, 0xCE, 0x01, 0x07, 0x17, 0x01, 0x01, 0x0F, 0x06, 0x02,
+	0x72, 0x28, 0x00, 0x01, 0x03, 0x00, 0x29, 0x1A, 0x06, 0x05, 0x02, 0x00,
+	0x87, 0x3E, 0x00, 0xCE, 0x25, 0x04, 0x74, 0x00, 0x01, 0x14, 0xD1, 0x01,
+	0x01, 0xDE, 0x29, 0x26, 0x01, 0x00, 0xC8, 0x01, 0x16, 0xD1, 0xD7, 0x29,
+	0x00, 0x00, 0x01, 0x0B, 0xDE, 0x4E, 0x26, 0x26, 0x01, 0x03, 0x08, 0xDD,
+	0xDD, 0x18, 0x26, 0x58, 0x06, 0x02, 0x25, 0x00, 0xDD, 0x1D, 0x26, 0x06,
+	0x05, 0x84, 0x44, 0xD8, 0x04, 0x77, 0x25, 0x04, 0x6C, 0x00, 0x21, 0x01,
+	0x0F, 0xDE, 0x26, 0x92, 0x2C, 0x01, 0x86, 0x03, 0x10, 0x06, 0x0C, 0x01,
+	0x04, 0x08, 0xDD, 0x80, 0x2E, 0xDE, 0x78, 0x2E, 0xDE, 0x04, 0x02, 0x5E,
+	0xDD, 0x26, 0xDC, 0x84, 0x44, 0xD8, 0x00, 0x02, 0xA4, 0xA6, 0x08, 0xA2,
+	0x08, 0xA5, 0x08, 0xA7, 0x08, 0xA3, 0x08, 0x27, 0x08, 0x03, 0x00, 0x01,
+	0x01, 0xDE, 0x01, 0x27, 0x8E, 0x2E, 0x08, 0x91, 0x2E, 0x01, 0x01, 0x0B,
+	0x08, 0x02, 0x00, 0x06, 0x04, 0x5E, 0x02, 0x00, 0x08, 0x83, 0x2C, 0x38,
+	0x09, 0x26, 0x5B, 0x06, 0x24, 0x02, 0x00, 0x05, 0x04, 0x44, 0x5E, 0x44,
+	0x5F, 0x01, 0x04, 0x09, 0x26, 0x58, 0x06, 0x03, 0x25, 0x01, 0x00, 0x26,
+	0x01, 0x04, 0x08, 0x02, 0x00, 0x08, 0x03, 0x00, 0x44, 0x01, 0x04, 0x08,
+	0x38, 0x08, 0x44, 0x04, 0x03, 0x25, 0x01, 0x7F, 0x03, 0x01, 0xDD, 0x94,
+	0x2C, 0xDC, 0x7A, 0x01, 0x04, 0x19, 0x7A, 0x01, 0x04, 0x08, 0x01, 0x1C,
+	0x32, 0x7A, 0x01, 0x20, 0xD8, 0x8D, 0x8E, 0x2E, 0xDA, 0x91, 0x2E, 0x26,
+	0x01, 0x01, 0x0B, 0xDC, 0x90, 0x44, 0x26, 0x06, 0x0F, 0x5D, 0x38, 0x2C,
+	0x26, 0xC7, 0x05, 0x02, 0x62, 0x28, 0xDC, 0x44, 0x5E, 0x44, 0x04, 0x6E,
+	0x60, 0x01, 0x01, 0xDE, 0x01, 0x00, 0xDE, 0x02, 0x00, 0x06, 0x81, 0x5A,
+	0x02, 0x00, 0xDC, 0xA4, 0x06, 0x0E, 0x01, 0x83, 0xFE, 0x01, 0xDC, 0x89,
+	0xA4, 0x01, 0x04, 0x09, 0x26, 0xDC, 0x5D, 0xDA, 0xA6, 0x06, 0x16, 0x01,
+	0x00, 0xDC, 0x8B, 0xA6, 0x01, 0x04, 0x09, 0x26, 0xDC, 0x01, 0x02, 0x09,
+	0x26, 0xDC, 0x01, 0x00, 0xDE, 0x01, 0x03, 0x09, 0xD9, 0xA2, 0x06, 0x0C,
+	0x01, 0x01, 0xDC, 0x01, 0x01, 0xDC, 0x82, 0x2E, 0x01, 0x08, 0x09, 0xDE,
+	0xA5, 0x06, 0x19, 0x01, 0x0D, 0xDC, 0xA5, 0x01, 0x04, 0x09, 0x26, 0xDC,
+	0x01, 0x02, 0x09, 0xDC, 0x42, 0x06, 0x03, 0x01, 0x03, 0xDB, 0x43, 0x06,
+	0x03, 0x01, 0x01, 0xDB, 0xA7, 0x26, 0x06, 0x36, 0x01, 0x0A, 0xDC, 0x01,
+	0x04, 0x09, 0x26, 0xDC, 0x5F, 0xDC, 0x40, 0x01, 0x00, 0x26, 0x01, 0x82,
+	0x80, 0x80, 0x80, 0x00, 0x17, 0x06, 0x0A, 0x01, 0xFD, 0xFF, 0xFF, 0xFF,
+	0x7F, 0x17, 0x01, 0x1D, 0xDC, 0x26, 0x01, 0x20, 0x0A, 0x06, 0x0C, 0xA0,
+	0x11, 0x01, 0x01, 0x17, 0x06, 0x02, 0x26, 0xDC, 0x5C, 0x04, 0x6E, 0x60,
+	0x04, 0x01, 0x25, 0xA3, 0x06, 0x0A, 0x01, 0x0B, 0xDC, 0x01, 0x02, 0xDC,
+	0x01, 0x82, 0x00, 0xDC, 0x27, 0x26, 0x06, 0x1F, 0x01, 0x10, 0xDC, 0x01,
+	0x04, 0x09, 0x26, 0xDC, 0x5F, 0xDC, 0x85, 0x2C, 0x01, 0x00, 0xA0, 0x0F,
+	0x06, 0x0A, 0x26, 0x1E, 0x26, 0xDE, 0x84, 0x44, 0xD8, 0x5C, 0x04, 0x72,
+	0x60, 0x04, 0x01, 0x25, 0x02, 0x01, 0x58, 0x05, 0x11, 0x01, 0x15, 0xDC,
+	0x02, 0x01, 0x26, 0xDC, 0x26, 0x06, 0x06, 0x5D, 0x01, 0x00, 0xDE, 0x04,
+	0x77, 0x25, 0x00, 0x00, 0x01, 0x10, 0xDE, 0x79, 0x2C, 0x26, 0xCC, 0x06,
+	0x0C, 0xAB, 0x23, 0x26, 0x5E, 0xDD, 0x26, 0xDC, 0x84, 0x44, 0xD8, 0x04,
+	0x0D, 0x26, 0xCA, 0x44, 0xAB, 0x22, 0x26, 0x5C, 0xDD, 0x26, 0xDE, 0x84,
+	0x44, 0xD8, 0x00, 0x00, 0x9C, 0x01, 0x14, 0xDE, 0x01, 0x0C, 0xDD, 0x84,
+	0x01, 0x0C, 0xD8, 0x00, 0x00, 0x51, 0x26, 0x01, 0x00, 0x0E, 0x06, 0x02,
+	0x60, 0x00, 0xCE, 0x25, 0x04, 0x73, 0x00, 0x26, 0xDC, 0xD8, 0x00, 0x00,
+	0x26, 0xDE, 0xD8, 0x00, 0x01, 0x03, 0x00, 0x41, 0x25, 0x26, 0x01, 0x10,
+	0x17, 0x06, 0x06, 0x01, 0x04, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x08,
+	0x17, 0x06, 0x06, 0x01, 0x03, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x20,
+	0x17, 0x06, 0x06, 0x01, 0x05, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x80,
+	0x40, 0x17, 0x06, 0x06, 0x01, 0x06, 0xDE, 0x02, 0x00, 0xDE, 0x01, 0x04,
+	0x17, 0x06, 0x06, 0x01, 0x02, 0xDE, 0x02, 0x00, 0xDE, 0x00, 0x00, 0x26,
+	0x01, 0x08, 0x4F, 0xDE, 0xDE, 0x00, 0x00, 0x26, 0x01, 0x10, 0x4F, 0xDE,
+	0xDC, 0x00, 0x00, 0x26, 0x52, 0x06, 0x02, 0x25, 0x00, 0xCE, 0x25, 0x04,
+	0x76
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	15,
+	20,
+	25,
+	30,
+	35,
+	40,
+	44,
+	48,
+	52,
+	56,
+	60,
+	64,
+	68,
+	72,
+	76,
+	80,
+	84,
+	88,
+	92,
+	96,
+	100,
+	104,
+	108,
+	112,
+	116,
+	120,
+	124,
+	129,
+	134,
+	139,
+	144,
+	149,
+	154,
+	159,
+	164,
+	169,
+	174,
+	179,
+	184,
+	189,
+	194,
+	199,
+	204,
+	209,
+	214,
+	219,
+	224,
+	229,
+	234,
+	239,
+	244,
+	249,
+	254,
+	259,
+	264,
+	269,
+	274,
+	279,
+	284,
+	289,
+	294,
+	303,
+	316,
+	320,
+	345,
+	351,
+	370,
+	381,
+	422,
+	542,
+	546,
+	611,
+	626,
+	637,
+	655,
+	684,
+	694,
+	730,
+	740,
+	818,
+	832,
+	838,
+	897,
+	916,
+	951,
+	1000,
+	1076,
+	1103,
+	1134,
+	1145,
+	1497,
+	1644,
+	1668,
+	1884,
+	1898,
+	1907,
+	1911,
+	2006,
+	2027,
+	2083,
+	2090,
+	2101,
+	2117,
+	2123,
+	2134,
+	2169,
+	2181,
+	2187,
+	2202,
+	2218,
+	2411,
+	2420,
+	2433,
+	2442,
+	2449,
+	2459,
+	2565,
+	2590,
+	2603,
+	2619,
+	2637,
+	2669,
+	2703,
+	3071,
+	3107,
+	3120,
+	3134,
+	3139,
+	3144,
+	3210,
+	3218,
+	3226
+};
+
+#define T0_INTERPRETED   88
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_ssl_hs_client_init_main, 169)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_ssl_hs_client_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* * */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a * b);
+
+				}
+				break;
+			case 8: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 9: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 10: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 11: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 12: {
+				/* <= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a <= b));
+
+				}
+				break;
+			case 13: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 14: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 15: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 16: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 17: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 18: {
+				/* anchor-dn-append-name */
+
+	size_t len;
+
+	len = T0_POP();
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->append_name(
+			CTX->client_auth_vtable, ENG->pad, len);
+	}
+
+				}
+				break;
+			case 19: {
+				/* anchor-dn-end-name */
+
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->end_name(
+			CTX->client_auth_vtable);
+	}
+
+				}
+				break;
+			case 20: {
+				/* anchor-dn-end-name-list */
+
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->end_name_list(
+			CTX->client_auth_vtable);
+	}
+
+				}
+				break;
+			case 21: {
+				/* anchor-dn-start-name */
+
+	size_t len;
+
+	len = T0_POP();
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->start_name(
+			CTX->client_auth_vtable, len);
+	}
+
+				}
+				break;
+			case 22: {
+				/* anchor-dn-start-name-list */
+
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->start_name_list(
+			CTX->client_auth_vtable);
+	}
+
+				}
+				break;
+			case 23: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 24: {
+				/* begin-cert */
+
+	if (ENG->chain_len == 0) {
+		T0_PUSHi(-1);
+	} else {
+		ENG->cert_cur = ENG->chain->data;
+		ENG->cert_len = ENG->chain->data_len;
+		ENG->chain ++;
+		ENG->chain_len --;
+		T0_PUSH(ENG->cert_len);
+	}
+
+				}
+				break;
+			case 25: {
+				/* bzero */
+
+	size_t len = (size_t)T0_POP();
+	void *addr = (unsigned char *)ENG + (size_t)T0_POP();
+	memset(addr, 0, len);
+
+				}
+				break;
+			case 26: {
+				/* can-output? */
+
+	T0_PUSHi(-(ENG->hlen_out > 0));
+
+				}
+				break;
+			case 27: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 28: {
+				/* compute-Finished-inner */
+
+	int prf_id = T0_POP();
+	int from_client = T0_POPi();
+	unsigned char tmp[48];
+	br_tls_prf_seed_chunk seed;
+
+	br_tls_prf_impl prf = br_ssl_engine_get_PRF(ENG, prf_id);
+	seed.data = tmp;
+	if (ENG->session.version >= BR_TLS12) {
+		seed.len = br_multihash_out(&ENG->mhash, prf_id, tmp);
+	} else {
+		br_multihash_out(&ENG->mhash, br_md5_ID, tmp);
+		br_multihash_out(&ENG->mhash, br_sha1_ID, tmp + 16);
+		seed.len = 36;
+	}
+	prf(ENG->pad, 12, ENG->session.master_secret,
+		sizeof ENG->session.master_secret,
+		from_client ? "client finished" : "server finished",
+		1, &seed);
+
+				}
+				break;
+			case 29: {
+				/* copy-cert-chunk */
+
+	size_t clen;
+
+	clen = ENG->cert_len;
+	if (clen > sizeof ENG->pad) {
+		clen = sizeof ENG->pad;
+	}
+	memcpy(ENG->pad, ENG->cert_cur, clen);
+	ENG->cert_cur += clen;
+	ENG->cert_len -= clen;
+	T0_PUSH(clen);
+
+				}
+				break;
+			case 30: {
+				/* copy-protocol-name */
+
+	size_t idx = T0_POP();
+	size_t len = strlen(ENG->protocol_names[idx]);
+	memcpy(ENG->pad, ENG->protocol_names[idx], len);
+	T0_PUSH(len);
+
+				}
+				break;
+			case 31: {
+				/* data-get8 */
+
+	size_t addr = T0_POP();
+	T0_PUSH(t0_datablock[addr]);
+
+				}
+				break;
+			case 32: {
+				/* discard-input */
+
+	ENG->hlen_in = 0;
+
+				}
+				break;
+			case 33: {
+				/* do-client-sign */
+
+	size_t sig_len;
+
+	sig_len = make_client_sign(CTX);
+	if (sig_len == 0) {
+		br_ssl_engine_fail(ENG, BR_ERR_INVALID_ALGORITHM);
+		T0_CO();
+	}
+	T0_PUSH(sig_len);
+
+				}
+				break;
+			case 34: {
+				/* do-ecdh */
+
+	unsigned prf_id = T0_POP();
+	unsigned ecdhe = T0_POP();
+	int x;
+
+	x = make_pms_ecdh(CTX, ecdhe, prf_id);
+	if (x < 0) {
+		br_ssl_engine_fail(ENG, -x);
+		T0_CO();
+	} else {
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 35: {
+				/* do-rsa-encrypt */
+
+	int x;
+
+	x = make_pms_rsa(CTX, T0_POP());
+	if (x < 0) {
+		br_ssl_engine_fail(ENG, -x);
+		T0_CO();
+	} else {
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 36: {
+				/* do-static-ecdh */
+
+	unsigned prf_id = T0_POP();
+
+	if (make_pms_static_ecdh(CTX, prf_id) < 0) {
+		br_ssl_engine_fail(ENG, BR_ERR_INVALID_ALGORITHM);
+		T0_CO();
+	}
+
+				}
+				break;
+			case 37: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 38: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 39: {
+				/* ext-ALPN-length */
+
+	size_t u, len;
+
+	if (ENG->protocol_names_num == 0) {
+		T0_PUSH(0);
+		T0_RET();
+	}
+	len = 6;
+	for (u = 0; u < ENG->protocol_names_num; u ++) {
+		len += 1 + strlen(ENG->protocol_names[u]);
+	}
+	T0_PUSH(len);
+
+				}
+				break;
+			case 40: {
+				/* fail */
+
+	br_ssl_engine_fail(ENG, (int)T0_POPi());
+	T0_CO();
+
+				}
+				break;
+			case 41: {
+				/* flush-record */
+
+	br_ssl_engine_flush_record(ENG);
+
+				}
+				break;
+			case 42: {
+				/* get-client-chain */
+
+	uint32_t auth_types;
+
+	auth_types = T0_POP();
+	if (CTX->client_auth_vtable != NULL) {
+		br_ssl_client_certificate ux;
+
+		(*CTX->client_auth_vtable)->choose(CTX->client_auth_vtable,
+			CTX, auth_types, &ux);
+		CTX->auth_type = (unsigned char)ux.auth_type;
+		CTX->hash_id = (unsigned char)ux.hash_id;
+		ENG->chain = ux.chain;
+		ENG->chain_len = ux.chain_len;
+	} else {
+		CTX->hash_id = 0;
+		ENG->chain_len = 0;
+	}
+
+				}
+				break;
+			case 43: {
+				/* get-key-type-usages */
+
+	const br_x509_class *xc;
+	const br_x509_pkey *pk;
+	unsigned usages;
+
+	xc = *(ENG->x509ctx);
+	pk = xc->get_pkey(ENG->x509ctx, &usages);
+	if (pk == NULL) {
+		T0_PUSH(0);
+	} else {
+		T0_PUSH(pk->key_type | usages);
+	}
+
+				}
+				break;
+			case 44: {
+				/* get16 */
+
+	size_t addr = (size_t)T0_POP();
+	T0_PUSH(*(uint16_t *)(void *)((unsigned char *)ENG + addr));
+
+				}
+				break;
+			case 45: {
+				/* get32 */
+
+	size_t addr = (size_t)T0_POP();
+	T0_PUSH(*(uint32_t *)(void *)((unsigned char *)ENG + addr));
+
+				}
+				break;
+			case 46: {
+				/* get8 */
+
+	size_t addr = (size_t)T0_POP();
+	T0_PUSH(*((unsigned char *)ENG + addr));
+
+				}
+				break;
+			case 47: {
+				/* has-input? */
+
+	T0_PUSHi(-(ENG->hlen_in != 0));
+
+				}
+				break;
+			case 48: {
+				/* memcmp */
+
+	size_t len = (size_t)T0_POP();
+	void *addr2 = (unsigned char *)ENG + (size_t)T0_POP();
+	void *addr1 = (unsigned char *)ENG + (size_t)T0_POP();
+	int x = memcmp(addr1, addr2, len);
+	T0_PUSH((uint32_t)-(x == 0));
+
+				}
+				break;
+			case 49: {
+				/* memcpy */
+
+	size_t len = (size_t)T0_POP();
+	void *src = (unsigned char *)ENG + (size_t)T0_POP();
+	void *dst = (unsigned char *)ENG + (size_t)T0_POP();
+	memcpy(dst, src, len);
+
+				}
+				break;
+			case 50: {
+				/* mkrand */
+
+	size_t len = (size_t)T0_POP();
+	void *addr = (unsigned char *)ENG + (size_t)T0_POP();
+	br_hmac_drbg_generate(&ENG->rng, addr, len);
+
+				}
+				break;
+			case 51: {
+				/* more-incoming-bytes? */
+
+	T0_PUSHi(ENG->hlen_in != 0 || !br_ssl_engine_recvrec_finished(ENG));
+
+				}
+				break;
+			case 52: {
+				/* multihash-init */
+
+	br_multihash_init(&ENG->mhash);
+
+				}
+				break;
+			case 53: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 54: {
+				/* not */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(~a);
+
+				}
+				break;
+			case 55: {
+				/* or */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a | b);
+
+				}
+				break;
+			case 56: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 57: {
+				/* read-chunk-native */
+
+	size_t clen = ENG->hlen_in;
+	if (clen > 0) {
+		uint32_t addr, len;
+
+		len = T0_POP();
+		addr = T0_POP();
+		if ((size_t)len < clen) {
+			clen = (size_t)len;
+		}
+		memcpy((unsigned char *)ENG + addr, ENG->hbuf_in, clen);
+		if (ENG->record_type_in == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, ENG->hbuf_in, clen);
+		}
+		T0_PUSH(addr + (uint32_t)clen);
+		T0_PUSH(len - (uint32_t)clen);
+		ENG->hbuf_in += clen;
+		ENG->hlen_in -= clen;
+	}
+
+				}
+				break;
+			case 58: {
+				/* read8-native */
+
+	if (ENG->hlen_in > 0) {
+		unsigned char x;
+
+		x = *ENG->hbuf_in ++;
+		if (ENG->record_type_in == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, &x, 1);
+		}
+		T0_PUSH(x);
+		ENG->hlen_in --;
+	} else {
+		T0_PUSHi(-1);
+	}
+
+				}
+				break;
+			case 59: {
+				/* set-server-curve */
+
+	const br_x509_class *xc;
+	const br_x509_pkey *pk;
+
+	xc = *(ENG->x509ctx);
+	pk = xc->get_pkey(ENG->x509ctx, NULL);
+	CTX->server_curve =
+		(pk->key_type == BR_KEYTYPE_EC) ? pk->key.ec.curve : 0;
+
+				}
+				break;
+			case 60: {
+				/* set16 */
+
+	size_t addr = (size_t)T0_POP();
+	*(uint16_t *)(void *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP();
+
+				}
+				break;
+			case 61: {
+				/* set32 */
+
+	size_t addr = (size_t)T0_POP();
+	*(uint32_t *)(void *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP();
+
+				}
+				break;
+			case 62: {
+				/* set8 */
+
+	size_t addr = (size_t)T0_POP();
+	*((unsigned char *)ENG + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 63: {
+				/* strlen */
+
+	void *str = (unsigned char *)ENG + (size_t)T0_POP();
+	T0_PUSH((uint32_t)strlen(str));
+
+				}
+				break;
+			case 64: {
+				/* supported-curves */
+
+	uint32_t x = ENG->iec == NULL ? 0 : ENG->iec->supported_curves;
+	T0_PUSH(x);
+
+				}
+				break;
+			case 65: {
+				/* supported-hash-functions */
+
+	int i;
+	unsigned x, num;
+
+	x = 0;
+	num = 0;
+	for (i = br_sha1_ID; i <= br_sha512_ID; i ++) {
+		if (br_multihash_getimpl(&ENG->mhash, i)) {
+			x |= 1U << i;
+			num ++;
+		}
+	}
+	T0_PUSH(x);
+	T0_PUSH(num);
+
+				}
+				break;
+			case 66: {
+				/* supports-ecdsa? */
+
+	T0_PUSHi(-(ENG->iecdsa != 0));
+
+				}
+				break;
+			case 67: {
+				/* supports-rsa-sign? */
+
+	T0_PUSHi(-(ENG->irsavrfy != 0));
+
+				}
+				break;
+			case 68: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			case 69: {
+				/* switch-aesccm-in */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len, tag_len;
+
+	tag_len = T0_POP();
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_ccm_in(ENG, is_client, prf_id,
+		ENG->iaes_ctrcbc, cipher_key_len, tag_len);
+
+				}
+				break;
+			case 70: {
+				/* switch-aesccm-out */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len, tag_len;
+
+	tag_len = T0_POP();
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_ccm_out(ENG, is_client, prf_id,
+		ENG->iaes_ctrcbc, cipher_key_len, tag_len);
+
+				}
+				break;
+			case 71: {
+				/* switch-aesgcm-in */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_gcm_in(ENG, is_client, prf_id,
+		ENG->iaes_ctr, cipher_key_len);
+
+				}
+				break;
+			case 72: {
+				/* switch-aesgcm-out */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_gcm_out(ENG, is_client, prf_id,
+		ENG->iaes_ctr, cipher_key_len);
+
+				}
+				break;
+			case 73: {
+				/* switch-cbc-in */
+
+	int is_client, prf_id, mac_id, aes;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	aes = T0_POP();
+	mac_id = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_cbc_in(ENG, is_client, prf_id, mac_id,
+		aes ? ENG->iaes_cbcdec : ENG->ides_cbcdec, cipher_key_len);
+
+				}
+				break;
+			case 74: {
+				/* switch-cbc-out */
+
+	int is_client, prf_id, mac_id, aes;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	aes = T0_POP();
+	mac_id = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_cbc_out(ENG, is_client, prf_id, mac_id,
+		aes ? ENG->iaes_cbcenc : ENG->ides_cbcenc, cipher_key_len);
+
+				}
+				break;
+			case 75: {
+				/* switch-chapol-in */
+
+	int is_client, prf_id;
+
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_chapol_in(ENG, is_client, prf_id);
+
+				}
+				break;
+			case 76: {
+				/* switch-chapol-out */
+
+	int is_client, prf_id;
+
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_chapol_out(ENG, is_client, prf_id);
+
+				}
+				break;
+			case 77: {
+				/* test-protocol-name */
+
+	size_t len = T0_POP();
+	size_t u;
+
+	for (u = 0; u < ENG->protocol_names_num; u ++) {
+		const char *name;
+
+		name = ENG->protocol_names[u];
+		if (len == strlen(name) && memcmp(ENG->pad, name, len) == 0) {
+			T0_PUSH(u);
+			T0_RET();
+		}
+	}
+	T0_PUSHi(-1);
+
+				}
+				break;
+			case 78: {
+				/* total-chain-length */
+
+	size_t u;
+	uint32_t total;
+
+	total = 0;
+	for (u = 0; u < ENG->chain_len; u ++) {
+		total += 3 + (uint32_t)ENG->chain[u].data_len;
+	}
+	T0_PUSH(total);
+
+				}
+				break;
+			case 79: {
+				/* u>> */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x >> c);
+
+				}
+				break;
+			case 80: {
+				/* verify-SKE-sig */
+
+	size_t sig_len = T0_POP();
+	int use_rsa = T0_POPi();
+	int hash = T0_POPi();
+
+	T0_PUSH(verify_SKE_sig(CTX, hash, use_rsa, sig_len));
+
+				}
+				break;
+			case 81: {
+				/* write-blob-chunk */
+
+	size_t clen = ENG->hlen_out;
+	if (clen > 0) {
+		uint32_t addr, len;
+
+		len = T0_POP();
+		addr = T0_POP();
+		if ((size_t)len < clen) {
+			clen = (size_t)len;
+		}
+		memcpy(ENG->hbuf_out, (unsigned char *)ENG + addr, clen);
+		if (ENG->record_type_out == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, ENG->hbuf_out, clen);
+		}
+		T0_PUSH(addr + (uint32_t)clen);
+		T0_PUSH(len - (uint32_t)clen);
+		ENG->hbuf_out += clen;
+		ENG->hlen_out -= clen;
+	}
+
+				}
+				break;
+			case 82: {
+				/* write8-native */
+
+	unsigned char x;
+
+	x = (unsigned char)T0_POP();
+	if (ENG->hlen_out > 0) {
+		if (ENG->record_type_out == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, &x, 1);
+		}
+		*ENG->hbuf_out ++ = x;
+		ENG->hlen_out --;
+		T0_PUSHi(-1);
+	} else {
+		T0_PUSHi(0);
+	}
+
+				}
+				break;
+			case 83: {
+				/* x509-append */
+
+	const br_x509_class *xc;
+	size_t len;
+
+	xc = *(ENG->x509ctx);
+	len = T0_POP();
+	xc->append(ENG->x509ctx, ENG->pad, len);
+
+				}
+				break;
+			case 84: {
+				/* x509-end-cert */
+
+	const br_x509_class *xc;
+
+	xc = *(ENG->x509ctx);
+	xc->end_cert(ENG->x509ctx);
+
+				}
+				break;
+			case 85: {
+				/* x509-end-chain */
+
+	const br_x509_class *xc;
+
+	xc = *(ENG->x509ctx);
+	T0_PUSH(xc->end_chain(ENG->x509ctx));
+
+				}
+				break;
+			case 86: {
+				/* x509-start-cert */
+
+	const br_x509_class *xc;
+
+	xc = *(ENG->x509ctx);
+	xc->start_cert(ENG->x509ctx, T0_POP());
+
+				}
+				break;
+			case 87: {
+				/* x509-start-chain */
+
+	const br_x509_class *xc;
+	uint32_t bc;
+
+	bc = T0_POP();
+	xc = *(ENG->x509ctx);
+	xc->start_chain(ENG->x509ctx, bc ? ENG->server_name : NULL);
+
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
diff --git a/third_party/bearssl/src/ssl_io.c b/third_party/bearssl/src/ssl_io.c
new file mode 100644
index 0000000..1952615
--- /dev/null
+++ b/third_party/bearssl/src/ssl_io.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_sslio_init(br_sslio_context *ctx,
+	br_ssl_engine_context *engine,
+	int (*low_read)(void *read_context,
+		unsigned char *data, size_t len),
+	void *read_context,
+	int (*low_write)(void *write_context,
+		const unsigned char *data, size_t len),
+	void *write_context)
+{
+	ctx->engine = engine;
+	ctx->low_read = low_read;
+	ctx->read_context = read_context;
+	ctx->low_write = low_write;
+	ctx->write_context = write_context;
+}
+
+/*
+ * Run the engine, until the specified target state is achieved, or
+ * an error occurs. The target state is SENDAPP, RECVAPP, or the
+ * combination of both (the combination matches either). When a match is
+ * achieved, this function returns 0. On error, it returns -1.
+ */
+static int
+run_until(br_sslio_context *ctx, unsigned target)
+{
+	for (;;) {
+		unsigned state;
+
+		state = br_ssl_engine_current_state(ctx->engine);
+		if (state & BR_SSL_CLOSED) {
+			return -1;
+		}
+
+		/*
+		 * If there is some record data to send, do it. This takes
+		 * precedence over everything else.
+		 */
+		if (state & BR_SSL_SENDREC) {
+			unsigned char *buf;
+			size_t len;
+			int wlen;
+
+			buf = br_ssl_engine_sendrec_buf(ctx->engine, &len);
+			wlen = ctx->low_write(ctx->write_context, buf, len);
+			if (wlen < 0) {
+				/*
+				 * If we received a close_notify and we
+				 * still send something, then we have our
+				 * own response close_notify to send, and
+				 * the peer is allowed by RFC 5246 not to
+				 * wait for it.
+				 */
+				if (!ctx->engine->shutdown_recv) {
+					br_ssl_engine_fail(
+						ctx->engine, BR_ERR_IO);
+				}
+				return -1;
+			}
+			if (wlen > 0) {
+				br_ssl_engine_sendrec_ack(ctx->engine, wlen);
+			}
+			continue;
+		}
+
+		/*
+		 * If we reached our target, then we are finished.
+		 */
+		if (state & target) {
+			return 0;
+		}
+
+		/*
+		 * If some application data must be read, and we did not
+		 * exit, then this means that we are trying to write data,
+		 * and that's not possible until the application data is
+		 * read. This may happen if using a shared in/out buffer,
+		 * and the underlying protocol is not strictly half-duplex.
+		 * This is unrecoverable here, so we report an error.
+		 */
+		if (state & BR_SSL_RECVAPP) {
+			return -1;
+		}
+
+		/*
+		 * If we reached that point, then either we are trying
+		 * to read data and there is some, or the engine is stuck
+		 * until a new record is obtained.
+		 */
+		if (state & BR_SSL_RECVREC) {
+			unsigned char *buf;
+			size_t len;
+			int rlen;
+
+			buf = br_ssl_engine_recvrec_buf(ctx->engine, &len);
+			rlen = ctx->low_read(ctx->read_context, buf, len);
+			if (rlen < 0) {
+				br_ssl_engine_fail(ctx->engine, BR_ERR_IO);
+				return -1;
+			}
+			if (rlen > 0) {
+				br_ssl_engine_recvrec_ack(ctx->engine, rlen);
+			}
+			continue;
+		}
+
+		/*
+		 * We can reach that point if the target RECVAPP, and
+		 * the state contains SENDAPP only. This may happen with
+		 * a shared in/out buffer. In that case, we must flush
+		 * the buffered data to "make room" for a new incoming
+		 * record.
+		 */
+		br_ssl_engine_flush(ctx->engine, 0);
+	}
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_read(br_sslio_context *ctx, void *dst, size_t len)
+{
+	unsigned char *buf;
+	size_t alen;
+
+	if (len == 0) {
+		return 0;
+	}
+	if (run_until(ctx, BR_SSL_RECVAPP) < 0) {
+		return -1;
+	}
+	buf = br_ssl_engine_recvapp_buf(ctx->engine, &alen);
+	if (alen > len) {
+		alen = len;
+	}
+	memcpy(dst, buf, alen);
+	br_ssl_engine_recvapp_ack(ctx->engine, alen);
+	return (int)alen;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_read_all(br_sslio_context *ctx, void *dst, size_t len)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (len > 0) {
+		int rlen;
+
+		rlen = br_sslio_read(ctx, buf, len);
+		if (rlen < 0) {
+			return -1;
+		}
+		buf += rlen;
+		len -= (size_t)rlen;
+	}
+	return 0;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_write(br_sslio_context *ctx, const void *src, size_t len)
+{
+	unsigned char *buf;
+	size_t alen;
+
+	if (len == 0) {
+		return 0;
+	}
+	if (run_until(ctx, BR_SSL_SENDAPP) < 0) {
+		return -1;
+	}
+	buf = br_ssl_engine_sendapp_buf(ctx->engine, &alen);
+	if (alen > len) {
+		alen = len;
+	}
+	memcpy(buf, src, alen);
+	br_ssl_engine_sendapp_ack(ctx->engine, alen);
+	return (int)alen;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_write_all(br_sslio_context *ctx, const void *src, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (len > 0) {
+		int wlen;
+
+		wlen = br_sslio_write(ctx, buf, len);
+		if (wlen < 0) {
+			return -1;
+		}
+		buf += wlen;
+		len -= (size_t)wlen;
+	}
+	return 0;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_flush(br_sslio_context *ctx)
+{
+	/*
+	 * We trigger a flush. We know the data is gone when there is
+	 * no longer any record data to send, and we can either read
+	 * or write application data. The call to run_until() does the
+	 * job because it ensures that any assembled record data is
+	 * first sent down the wire before considering anything else.
+	 */
+	br_ssl_engine_flush(ctx->engine, 0);
+	return run_until(ctx, BR_SSL_SENDAPP | BR_SSL_RECVAPP);
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_close(br_sslio_context *ctx)
+{
+	br_ssl_engine_close(ctx->engine);
+	while (br_ssl_engine_current_state(ctx->engine) != BR_SSL_CLOSED) {
+		/*
+		 * Discard any incoming application data.
+		 */
+		size_t len;
+
+		run_until(ctx, BR_SSL_RECVAPP);
+		if (br_ssl_engine_recvapp_buf(ctx->engine, &len) != NULL) {
+			br_ssl_engine_recvapp_ack(ctx->engine, len);
+		}
+	}
+	return br_ssl_engine_last_error(ctx->engine) == BR_ERR_OK;
+}
diff --git a/third_party/bearssl/src/ssl_keyexport.c b/third_party/bearssl/src/ssl_keyexport.c
new file mode 100644
index 0000000..58e6dc3
--- /dev/null
+++ b/third_party/bearssl/src/ssl_keyexport.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Supported cipher suites that use SHA-384 for the PRF when selected
+ * for TLS 1.2. All other cipher suites are deemed to use SHA-256.
+ */
+static const uint16_t suites_sha384[] = {
+	BR_TLS_RSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384
+};
+
+/* see bearssl_ssl.h */
+int
+br_ssl_key_export(br_ssl_engine_context *cc,
+	void *dst, size_t len, const char *label,
+	const void *context, size_t context_len)
+{
+	br_tls_prf_seed_chunk chunks[4];
+	br_tls_prf_impl iprf;
+	size_t num_chunks, u;
+	unsigned char tmp[2];
+	int prf_id;
+
+	if (cc->application_data != 1) {
+		return 0;
+	}
+	chunks[0].data = cc->client_random;
+	chunks[0].len = sizeof cc->client_random;
+	chunks[1].data = cc->server_random;
+	chunks[1].len = sizeof cc->server_random;
+	if (context != NULL) {
+		br_enc16be(tmp, (unsigned)context_len);
+		chunks[2].data = tmp;
+		chunks[2].len = 2;
+		chunks[3].data = context;
+		chunks[3].len = context_len;
+		num_chunks = 4;
+	} else {
+		num_chunks = 2;
+	}
+	prf_id = BR_SSLPRF_SHA256;
+	for (u = 0; u < (sizeof suites_sha384) / sizeof(uint16_t); u ++) {
+		if (suites_sha384[u] == cc->session.cipher_suite) {
+			prf_id = BR_SSLPRF_SHA384;
+		}
+	}
+	iprf = br_ssl_engine_get_PRF(cc, prf_id);
+	iprf(dst, len,
+		cc->session.master_secret, sizeof cc->session.master_secret,
+		label, num_chunks, chunks);
+	return 1;
+}
diff --git a/third_party/bearssl/src/ssl_lru.c b/third_party/bearssl/src/ssl_lru.c
new file mode 100644
index 0000000..4c71011
--- /dev/null
+++ b/third_party/bearssl/src/ssl_lru.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Each entry consists in a fixed number of bytes. Entries are concatenated
+ * in the store block. "Addresses" are really offsets in the block,
+ * expressed over 32 bits (so the cache may have size at most 4 GB, which
+ * "ought to be enough for everyone"). The "null address" is 0xFFFFFFFF.
+ * Note that since the storage block alignment is in no way guaranteed, we
+ * perform only accesses that can handle unaligned data.
+ *
+ * Two concurrent data structures are maintained:
+ *
+ * -- Entries are organised in a doubly-linked list; saved entries are added
+ * at the head, and loaded entries are moved to the head. Eviction uses
+ * the list tail (this is the LRU algorithm).
+ *
+ * -- Entries are indexed with a binary tree: all left descendants of a
+ * node have a lower session ID (in lexicographic order), while all
+ * right descendants have a higher session ID. The tree is heuristically
+ * balanced.
+ *
+ * Entry format:
+ *
+ *   session ID          32 bytes
+ *   master secret       48 bytes
+ *   protocol version    2 bytes (big endian)
+ *   cipher suite        2 bytes (big endian)
+ *   list prev           4 bytes (big endian)
+ *   list next           4 bytes (big endian)
+ *   tree left child     4 bytes (big endian)
+ *   tree right child    4 bytes (big endian)
+ *
+ * If an entry has a protocol version set to 0, then it is "disabled":
+ * it was a session pushed to the cache at some point, but it has
+ * been explicitly removed.
+ *
+ * We need to keep the tree balanced because an attacker could make
+ * handshakes, selecting some specific sessions (by reusing them) to
+ * try to make us make an imbalanced tree that makes lookups expensive
+ * (a denial-of-service attack that would persist as long as the cache
+ * remains, i.e. even after the attacker made all his connections).
+ * To do that, we replace the session ID (or the start of the session ID)
+ * with a HMAC value computed over the replaced part; the hash function
+ * implementation and the key are obtained from the server context upon
+ * first save() call.
+ *
+ * Theoretically, an attacker could use the exact timing of the lookup
+ * to infer the current tree topology, and try to revive entries to make
+ * it as unbalanced as possible. However, since the session ID are
+ * chosen randomly by the server, and the attacker cannot see the
+ * indexing values and must thus rely on blind selection, it should be
+ * exponentially difficult for the attacker to maintain a large
+ * imbalance.
+ */
+#define SESSION_ID_LEN       32
+#define MASTER_SECRET_LEN    48
+
+#define SESSION_ID_OFF        0
+#define MASTER_SECRET_OFF    32
+#define VERSION_OFF          80
+#define CIPHER_SUITE_OFF     82
+#define LIST_PREV_OFF        84
+#define LIST_NEXT_OFF        88
+#define TREE_LEFT_OFF        92
+#define TREE_RIGHT_OFF       96
+
+#define LRU_ENTRY_LEN       100
+
+#define ADDR_NULL   ((uint32_t)-1)
+
+#define GETSET(name, off) \
+static inline uint32_t get_ ## name(br_ssl_session_cache_lru *cc, uint32_t x) \
+{ \
+	return br_dec32be(cc->store + x + (off)); \
+} \
+static inline void set_ ## name(br_ssl_session_cache_lru *cc, \
+	uint32_t x, uint32_t val) \
+{ \
+	br_enc32be(cc->store + x + (off), val); \
+}
+
+GETSET(prev, LIST_PREV_OFF)
+GETSET(next, LIST_NEXT_OFF)
+GETSET(left, TREE_LEFT_OFF)
+GETSET(right, TREE_RIGHT_OFF)
+
+/*
+ * Transform the session ID by replacing the first N bytes with a HMAC
+ * value computed over these bytes, using the random key K (the HMAC
+ * value is truncated if needed). HMAC will use the same hash function
+ * as the DRBG in the SSL server context, so with SHA-256, SHA-384,
+ * or SHA-1, depending on what is available.
+ *
+ * The risk of collision is considered too small to be a concern; and
+ * the impact of a collision is low (the handshake won't succeed). This
+ * risk is much lower than any transmission error, which would lead to
+ * the same consequences.
+ *
+ * Source and destination arrays msut be disjoint.
+ */
+static void
+mask_id(br_ssl_session_cache_lru *cc,
+	const unsigned char *src, unsigned char *dst)
+{
+	br_hmac_key_context hkc;
+	br_hmac_context hc;
+
+	memcpy(dst, src, SESSION_ID_LEN);
+	br_hmac_key_init(&hkc, cc->hash, cc->index_key, sizeof cc->index_key);
+	br_hmac_init(&hc, &hkc, SESSION_ID_LEN);
+	br_hmac_update(&hc, src, SESSION_ID_LEN);
+	br_hmac_out(&hc, dst);
+}
+
+/*
+ * Find a node by ID. Returned value is the node address, or ADDR_NULL if
+ * the node is not found.
+ *
+ * If addr_link is not NULL, then '*addr_link' is set to the address of the
+ * last followed link. If the found node is the root, or if the tree is
+ * empty, then '*addr_link' is set to ADDR_NULL.
+ */
+static uint32_t
+find_node(br_ssl_session_cache_lru *cc, const unsigned char *id,
+	uint32_t *addr_link)
+{
+	uint32_t x, y;
+
+	x = cc->root;
+	y = ADDR_NULL;
+	while (x != ADDR_NULL) {
+		int r;
+
+		r = memcmp(id, cc->store + x + SESSION_ID_OFF, SESSION_ID_LEN);
+		if (r < 0) {
+			y = x + TREE_LEFT_OFF;
+			x = get_left(cc, x);
+		} else if (r == 0) {
+			if (addr_link != NULL) {
+				*addr_link = y;
+			}
+			return x;
+		} else {
+			y = x + TREE_RIGHT_OFF;
+			x = get_right(cc, x);
+		}
+	}
+	if (addr_link != NULL) {
+		*addr_link = y;
+	}
+	return ADDR_NULL;
+}
+
+/*
+ * For node x, find its replacement upon removal.
+ *
+ *  -- If node x has no child, then this returns ADDR_NULL.
+ *  -- Otherwise, if node x has a left child, then the replacement is the
+ *     rightmost left-descendent.
+ *  -- Otherwise, the replacement is the leftmost right-descendent.
+ *
+ * If a node is returned, then '*al' is set to the address of the field
+ * that points to that node. Otherwise (node x has no child), '*al' is
+ * set to ADDR_NULL.
+ *
+ * Note that the replacement node, when found, is always a descendent
+ * of node 'x', so it cannot be the tree root. Thus, '*al' can be set
+ * to ADDR_NULL only when no node is found and ADDR_NULL is returned.
+ */
+static uint32_t
+find_replacement_node(br_ssl_session_cache_lru *cc, uint32_t x, uint32_t *al)
+{
+	uint32_t y1, y2;
+
+	y1 = get_left(cc, x);
+	if (y1 != ADDR_NULL) {
+		y2 = x + TREE_LEFT_OFF;
+		for (;;) {
+			uint32_t z;
+
+			z = get_right(cc, y1);
+			if (z == ADDR_NULL) {
+				*al = y2;
+				return y1;
+			}
+			y2 = y1 + TREE_RIGHT_OFF;
+			y1 = z;
+		}
+	}
+	y1 = get_right(cc, x);
+	if (y1 != ADDR_NULL) {
+		y2 = x + TREE_RIGHT_OFF;
+		for (;;) {
+			uint32_t z;
+
+			z = get_left(cc, y1);
+			if (z == ADDR_NULL) {
+				*al = y2;
+				return y1;
+			}
+			y2 = y1 + TREE_LEFT_OFF;
+			y1 = z;
+		}
+	}
+	*al = ADDR_NULL;
+	return ADDR_NULL;
+}
+
+/*
+ * Set the link at address 'alx' to point to node 'x'. If 'alx' is
+ * ADDR_NULL, then this sets the tree root to 'x'.
+ */
+static inline void
+set_link(br_ssl_session_cache_lru *cc, uint32_t alx, uint32_t x)
+{
+	if (alx == ADDR_NULL) {
+		cc->root = x;
+	} else {
+		br_enc32be(cc->store + alx, x);
+	}
+}
+
+/*
+ * Remove node 'x' from the tree. This function shall not be called if
+ * node 'x' is not part of the tree.
+ */
+static void
+remove_node(br_ssl_session_cache_lru *cc, uint32_t x)
+{
+	uint32_t alx, y, aly;
+
+	/*
+	 * Removal algorithm:
+	 * ------------------
+	 *
+	 * - If we remove the root, then the tree becomes empty.
+	 *
+	 * - If the removed node has no child, then we can simply remove
+	 *   it, with nothing else to do.
+	 *
+	 * - Otherwise, the removed node must be replaced by either its
+	 *   rightmost left-descendent, or its leftmost right-descendent.
+	 *   The replacement node itself must be removed from its current
+	 *   place. By definition, that replacement node has either no
+	 *   child, or at most a single child that will replace it in the
+	 *   tree.
+	 */
+
+	/*
+	 * Find node back and its ancestor link. If the node was the
+	 * root, then alx is set to ADDR_NULL.
+	 */
+	find_node(cc, cc->store + x + SESSION_ID_OFF, &alx);
+
+	/*
+	 * Find replacement node 'y', and 'aly' is set to the address of
+	 * the link to that replacement node. If the removed node has no
+	 * child, then both 'y' and 'aly' are set to ADDR_NULL.
+	 */
+	y = find_replacement_node(cc, x, &aly);
+
+	if (y != ADDR_NULL) {
+		uint32_t z;
+
+		/*
+		 * The unlinked replacement node may have one child (but
+		 * not two) that takes its place.
+		 */
+		z = get_left(cc, y);
+		if (z == ADDR_NULL) {
+			z = get_right(cc, y);
+		}
+		set_link(cc, aly, z);
+
+		/*
+		 * Link the replacement node in its new place, overwriting
+		 * the current link to the node 'x' (which removes 'x').
+		 */
+		set_link(cc, alx, y);
+
+		/*
+		 * The replacement node adopts the left and right children
+		 * of the removed node. Note that this also works even if
+		 * the replacement node was a direct descendent of the
+		 * removed node, since we unlinked it previously.
+		 */
+		set_left(cc, y, get_left(cc, x));
+		set_right(cc, y, get_right(cc, x));
+	} else {
+		/*
+		 * No replacement, we simply unlink the node 'x'.
+		 */
+		set_link(cc, alx, ADDR_NULL);
+	}
+}
+
+static void
+lru_save(const br_ssl_session_cache_class **ctx,
+	br_ssl_server_context *server_ctx,
+	const br_ssl_session_parameters *params)
+{
+	br_ssl_session_cache_lru *cc;
+	unsigned char id[SESSION_ID_LEN];
+	uint32_t x, alx;
+
+	cc = (br_ssl_session_cache_lru *)ctx;
+
+	/*
+	 * If the buffer is too small, we don't record anything. This
+	 * test avoids problems in subsequent code.
+	 */
+	if (cc->store_len < LRU_ENTRY_LEN) {
+		return;
+	}
+
+	/*
+	 * Upon the first save in a session cache instance, we obtain
+	 * a random key for our indexing.
+	 */
+	if (!cc->init_done) {
+		br_hmac_drbg_generate(&server_ctx->eng.rng,
+			cc->index_key, sizeof cc->index_key);
+		cc->hash = br_hmac_drbg_get_hash(&server_ctx->eng.rng);
+		cc->init_done = 1;
+	}
+	mask_id(cc, params->session_id, id);
+
+	/*
+	 * Look for the node in the tree. If the same ID is already used,
+	 * then reject it. This is a collision event, which should be
+	 * exceedingly rare.
+	 * Note: we do NOT record the emplacement here, because the
+	 * removal of an entry may change the tree topology.
+	 */
+	if (find_node(cc, id, NULL) != ADDR_NULL) {
+		return;
+	}
+
+	/*
+	 * Find some room for the new parameters. If the cache is not
+	 * full yet, add it to the end of the area and bump the pointer up.
+	 * Otherwise, evict the list tail entry. Note that we already
+	 * filtered out the case of a ridiculously small buffer that
+	 * cannot hold any entry at all; thus, if there is no room for an
+	 * extra entry, then the cache cannot be empty.
+	 */
+	if (cc->store_ptr > (cc->store_len - LRU_ENTRY_LEN)) {
+		/*
+		 * Evict tail. If the buffer has room for a single entry,
+		 * then this may also be the head.
+		 */
+		x = cc->tail;
+		cc->tail = get_prev(cc, x);
+		if (cc->tail == ADDR_NULL) {
+			cc->head = ADDR_NULL;
+		} else {
+			set_next(cc, cc->tail, ADDR_NULL);
+		}
+
+		/*
+		 * Remove the node from the tree.
+		 */
+		remove_node(cc, x);
+	} else {
+		/*
+		 * Allocate room for new node.
+		 */
+		x = cc->store_ptr;
+		cc->store_ptr += LRU_ENTRY_LEN;
+	}
+
+	/*
+	 * Find the emplacement for the new node, and link it.
+	 */
+	find_node(cc, id, &alx);
+	set_link(cc, alx, x);
+	set_left(cc, x, ADDR_NULL);
+	set_right(cc, x, ADDR_NULL);
+
+	/*
+	 * New entry becomes new list head. It may also become the list
+	 * tail if the cache was empty at that point.
+	 */
+	if (cc->head == ADDR_NULL) {
+		cc->tail = x;
+	} else {
+		set_prev(cc, cc->head, x);
+	}
+	set_prev(cc, x, ADDR_NULL);
+	set_next(cc, x, cc->head);
+	cc->head = x;
+
+	/*
+	 * Fill data in the entry.
+	 */
+	memcpy(cc->store + x + SESSION_ID_OFF, id, SESSION_ID_LEN);
+	memcpy(cc->store + x + MASTER_SECRET_OFF,
+		params->master_secret, MASTER_SECRET_LEN);
+	br_enc16be(cc->store + x + VERSION_OFF, params->version);
+	br_enc16be(cc->store + x + CIPHER_SUITE_OFF, params->cipher_suite);
+}
+
+static int
+lru_load(const br_ssl_session_cache_class **ctx,
+	br_ssl_server_context *server_ctx,
+	br_ssl_session_parameters *params)
+{
+	br_ssl_session_cache_lru *cc;
+	unsigned char id[SESSION_ID_LEN];
+	uint32_t x;
+
+	(void)server_ctx;
+	cc = (br_ssl_session_cache_lru *)ctx;
+	if (!cc->init_done) {
+		return 0;
+	}
+	mask_id(cc, params->session_id, id);
+	x = find_node(cc, id, NULL);
+	if (x != ADDR_NULL) {
+		unsigned version;
+
+		version = br_dec16be(cc->store + x + VERSION_OFF);
+		if (version == 0) {
+			/*
+			 * Entry is disabled, we pretend we did not find it.
+			 * Notably, we don't move it to the front of the
+			 * LRU list.
+			 */
+			return 0;
+		}
+		params->version = version;
+		params->cipher_suite = br_dec16be(
+			cc->store + x + CIPHER_SUITE_OFF);
+		memcpy(params->master_secret,
+			cc->store + x + MASTER_SECRET_OFF,
+			MASTER_SECRET_LEN);
+		if (x != cc->head) {
+			/*
+			 * Found node is not at list head, so move
+			 * it to the head.
+			 */
+			uint32_t p, n;
+
+			p = get_prev(cc, x);
+			n = get_next(cc, x);
+			set_next(cc, p, n);
+			if (n == ADDR_NULL) {
+				cc->tail = p;
+			} else {
+				set_prev(cc, n, p);
+			}
+			set_prev(cc, cc->head, x);
+			set_next(cc, x, cc->head);
+			set_prev(cc, x, ADDR_NULL);
+			cc->head = x;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static const br_ssl_session_cache_class lru_class = {
+	sizeof(br_ssl_session_cache_lru),
+	&lru_save,
+	&lru_load
+};
+
+/* see inner.h */
+void
+br_ssl_session_cache_lru_init(br_ssl_session_cache_lru *cc,
+	unsigned char *store, size_t store_len)
+{
+	cc->vtable = &lru_class;
+	cc->store = store;
+	cc->store_len = store_len;
+	cc->store_ptr = 0;
+	cc->init_done = 0;
+	cc->head = ADDR_NULL;
+	cc->tail = ADDR_NULL;
+	cc->root = ADDR_NULL;
+}
+
+/* see bearssl_ssl.h */
+void br_ssl_session_cache_lru_forget(
+	br_ssl_session_cache_lru *cc, const unsigned char *id)
+{
+	unsigned char mid[SESSION_ID_LEN];
+	uint32_t addr;
+
+	/*
+	 * If the cache is not initialised yet, then it is empty, and
+	 * there is nothing to forget.
+	 */
+	if (!cc->init_done) {
+		return;
+	}
+
+	/*
+	 * Look for the node in the tree. If found, the entry is marked
+	 * as "disabled"; it will be reused in due course, as it ages
+	 * through the list.
+	 *
+	 * We do not go through the complex moves of actually releasing
+	 * the entry right away because explicitly forgetting sessions
+	 * should be a rare event, meant mostly for testing purposes,
+	 * so this is not worth the extra code size.
+	 */
+	mask_id(cc, id, mid);
+	addr = find_node(cc, mid, NULL);
+	if (addr != ADDR_NULL) {
+		br_enc16be(cc->store + addr + VERSION_OFF, 0);
+	}
+}
diff --git a/third_party/bearssl/src/ssl_rec_cbc.c b/third_party/bearssl/src/ssl_rec_cbc.c
new file mode 100644
index 0000000..c38cbfd
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_cbc.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+in_cbc_init(br_sslrec_in_cbc_context *cc,
+	const br_block_cbcdec_class *bc_impl,
+	const void *bc_key, size_t bc_key_len,
+	const br_hash_class *dig_impl,
+	const void *mac_key, size_t mac_key_len, size_t mac_out_len,
+	const void *iv)
+{
+	cc->vtable = &br_sslrec_in_cbc_vtable;
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
+	br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
+	cc->mac_len = mac_out_len;
+	if (iv == NULL) {
+		memset(cc->iv, 0, sizeof cc->iv);
+		cc->explicit_IV = 1;
+	} else {
+		memcpy(cc->iv, iv, bc_impl->block_size);
+		cc->explicit_IV = 0;
+	}
+}
+
+static int
+cbc_check_length(const br_sslrec_in_cbc_context *cc, size_t rlen)
+{
+	/*
+	 * Plaintext size: at most 16384 bytes
+	 * Padding: at most 256 bytes
+	 * MAC: mac_len extra bytes
+	 * TLS 1.1+: each record has an explicit IV
+	 *
+	 * Minimum length includes at least one byte of padding, and the
+	 * MAC.
+	 *
+	 * Total length must be a multiple of the block size.
+	 */
+	size_t blen;
+	size_t min_len, max_len;
+
+	blen = cc->bc.vtable->block_size;
+	min_len = (blen + cc->mac_len) & ~(blen - 1);
+	max_len = (16384 + 256 + cc->mac_len) & ~(blen - 1);
+	if (cc->explicit_IV) {
+		min_len += blen;
+		max_len += blen;
+	}
+	return min_len <= rlen && rlen <= max_len;
+}
+
+/*
+ * Rotate array buf[] of length 'len' to the left (towards low indices)
+ * by 'num' bytes if ctl is 1; otherwise, leave it unchanged. This is
+ * constant-time. 'num' MUST be lower than 'len'. 'len' MUST be lower
+ * than or equal to 64.
+ */
+static void
+cond_rotate(uint32_t ctl, unsigned char *buf, size_t len, size_t num)
+{
+	unsigned char tmp[64];
+	size_t u, v;
+
+	for (u = 0, v = num; u < len; u ++) {
+		tmp[u] = MUX(ctl, buf[v], buf[u]);
+		if (++ v == len) {
+			v = 0;
+		}
+	}
+	memcpy(buf, tmp, len);
+}
+
+static unsigned char *
+cbc_decrypt(br_sslrec_in_cbc_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	/*
+	 * We represent all lengths on 32-bit integers, because:
+	 * -- SSL record lengths always fit in 32 bits;
+	 * -- our constant-time primitives operate on 32-bit integers.
+	 */
+	unsigned char *buf;
+	uint32_t u, v, len, blen, min_len, max_len;
+	uint32_t good, pad_len, rot_count, len_withmac, len_nomac;
+	unsigned char tmp1[64], tmp2[64];
+	int i;
+	br_hmac_context hc;
+
+	buf = data;
+	len = *data_len;
+	blen = cc->bc.vtable->block_size;
+
+	/*
+	 * Decrypt data, and skip the explicit IV (if applicable). Note
+	 * that the total length is supposed to have been verified by
+	 * the caller. If there is an explicit IV, then we actually
+	 * "decrypt" it using the implicit IV (from previous record),
+	 * which is useless but harmless.
+	 */
+	cc->bc.vtable->run(&cc->bc.vtable, cc->iv, data, len);
+	if (cc->explicit_IV) {
+		buf += blen;
+		len -= blen;
+	}
+
+	/*
+	 * Compute minimum and maximum length of plaintext + MAC. These
+	 * lengths can be inferred from the outside: they are not secret.
+	 */
+	min_len = (cc->mac_len + 256 < len) ? len - 256 : cc->mac_len;
+	max_len = len - 1;
+
+	/*
+	 * Use the last decrypted byte to compute the actual payload
+	 * length. Take care not to overflow (we use unsigned types).
+	 */
+	pad_len = buf[max_len];
+	good = LE(pad_len, (uint32_t)(max_len - min_len));
+	len = MUX(good, (uint32_t)(max_len - pad_len), min_len);
+
+	/*
+	 * Check padding contents: all padding bytes must be equal to
+	 * the value of pad_len.
+	 */
+	for (u = min_len; u < max_len; u ++) {
+		good &= LT(u, len) | EQ(buf[u], pad_len);
+	}
+
+	/*
+	 * Extract the MAC value. This is done in one pass, but results
+	 * in a "rotated" MAC value depending on where it actually
+	 * occurs. The 'rot_count' value is set to the offset of the
+	 * first MAC byte within tmp1[].
+	 *
+	 * min_len and max_len are also adjusted to the minimum and
+	 * maximum lengths of the plaintext alone (without the MAC).
+	 */
+	len_withmac = (uint32_t)len;
+	len_nomac = len_withmac - cc->mac_len;
+	min_len -= cc->mac_len;
+	rot_count = 0;
+	memset(tmp1, 0, cc->mac_len);
+	v = 0;
+	for (u = min_len; u < max_len; u ++) {
+		tmp1[v] |= MUX(GE(u, len_nomac) & LT(u, len_withmac),
+			buf[u], 0x00);
+		rot_count = MUX(EQ(u, len_nomac), v, rot_count);
+		if (++ v == cc->mac_len) {
+			v = 0;
+		}
+	}
+	max_len -= cc->mac_len;
+
+	/*
+	 * Rotate back the MAC value. The loop below does the constant-time
+	 * rotation in time n*log n for a MAC output of length n. We assume
+	 * that the MAC output length is no more than 64 bytes, so the
+	 * rotation count fits on 6 bits.
+	 */
+	for (i = 5; i >= 0; i --) {
+		uint32_t rc;
+
+		rc = (uint32_t)1 << i;
+		cond_rotate(rot_count >> i, tmp1, cc->mac_len, rc);
+		rot_count &= ~rc;
+	}
+
+	/*
+	 * Recompute the HMAC value. The input is the concatenation of
+	 * the sequence number (8 bytes), the record header (5 bytes),
+	 * and the payload.
+	 *
+	 * At that point, min_len is the minimum plaintext length, but
+	 * max_len still includes the MAC length.
+	 */
+	br_enc64be(tmp2, cc->seq ++);
+	tmp2[8] = (unsigned char)record_type;
+	br_enc16be(tmp2 + 9, version);
+	br_enc16be(tmp2 + 11, len_nomac);
+	br_hmac_init(&hc, &cc->mac, cc->mac_len);
+	br_hmac_update(&hc, tmp2, 13);
+	br_hmac_outCT(&hc, buf, len_nomac, min_len, max_len, tmp2);
+
+	/*
+	 * Compare the extracted and recomputed MAC values.
+	 */
+	for (u = 0; u < cc->mac_len; u ++) {
+		good &= EQ0(tmp1[u] ^ tmp2[u]);
+	}
+
+	/*
+	 * Check that the plaintext length is valid. The previous
+	 * check was on the encrypted length, but the padding may have
+	 * turned shorter than expected.
+	 *
+	 * Once this final test is done, the critical "constant-time"
+	 * section ends and we can make conditional jumps again.
+	 */
+	good &= LE(len_nomac, 16384);
+
+	if (!good) {
+		return 0;
+	}
+	*data_len = len_nomac;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable = {
+	{
+		sizeof(br_sslrec_in_cbc_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&cbc_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&cbc_decrypt
+	},
+	(void (*)(const br_sslrec_in_cbc_class **,
+		const br_block_cbcdec_class *, const void *, size_t,
+		const br_hash_class *, const void *, size_t, size_t,
+		const void *))
+		&in_cbc_init
+};
+
+/*
+ * For CBC output:
+ *
+ * -- With TLS 1.1+, there is an explicit IV. Generation method uses
+ * HMAC, computed over the current sequence number, and the current MAC
+ * key. The resulting value is truncated to the size of a block, and
+ * added at the head of the plaintext; it will get encrypted along with
+ * the data. This custom generation mechanism is "safe" under the
+ * assumption that HMAC behaves like a random oracle; since the MAC for
+ * a record is computed over the concatenation of the sequence number,
+ * the record header and the plaintext, the HMAC-for-IV will not collide
+ * with the normal HMAC.
+ *
+ * -- With TLS 1.0, for application data, we want to enforce a 1/n-1
+ * split, as a countermeasure against chosen-plaintext attacks. We thus
+ * need to leave some room in the buffer for that extra record.
+ */
+
+static void
+out_cbc_init(br_sslrec_out_cbc_context *cc,
+	const br_block_cbcenc_class *bc_impl,
+	const void *bc_key, size_t bc_key_len,
+	const br_hash_class *dig_impl,
+	const void *mac_key, size_t mac_key_len, size_t mac_out_len,
+	const void *iv)
+{
+	cc->vtable = &br_sslrec_out_cbc_vtable;
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
+	br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
+	cc->mac_len = mac_out_len;
+	if (iv == NULL) {
+		memset(cc->iv, 0, sizeof cc->iv);
+		cc->explicit_IV = 1;
+	} else {
+		memcpy(cc->iv, iv, bc_impl->block_size);
+		cc->explicit_IV = 0;
+	}
+}
+
+static void
+cbc_max_plaintext(const br_sslrec_out_cbc_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t blen, len;
+
+	blen = cc->bc.vtable->block_size;
+	if (cc->explicit_IV) {
+		*start += blen;
+	} else {
+		*start += 4 + ((cc->mac_len + blen + 1) & ~(blen - 1));
+	}
+	len = (*end - *start) & ~(blen - 1);
+	len -= 1 + cc->mac_len;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+cbc_encrypt(br_sslrec_out_cbc_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf, *rbuf;
+	size_t len, blen, plen;
+	unsigned char tmp[13];
+	br_hmac_context hc;
+
+	buf = data;
+	len = *data_len;
+	blen = cc->bc.vtable->block_size;
+
+	/*
+	 * If using TLS 1.0, with more than one byte of plaintext, and
+	 * the record is application data, then we need to compute
+	 * a "split". We do not perform the split on other record types
+	 * because it turned out that some existing, deployed
+	 * implementations of SSL/TLS do not tolerate the splitting of
+	 * some message types (in particular the Finished message).
+	 *
+	 * If using TLS 1.1+, then there is an explicit IV. We produce
+	 * that IV by adding an extra initial plaintext block, whose
+	 * value is computed with HMAC over the record sequence number.
+	 */
+	if (cc->explicit_IV) {
+		/*
+		 * We use here the fact that all the HMAC variants we
+		 * support can produce at least 16 bytes, while all the
+		 * block ciphers we support have blocks of no more than
+		 * 16 bytes. Thus, we can always truncate the HMAC output
+		 * down to the block size.
+		 */
+		br_enc64be(tmp, cc->seq);
+		br_hmac_init(&hc, &cc->mac, blen);
+		br_hmac_update(&hc, tmp, 8);
+		br_hmac_out(&hc, buf - blen);
+		rbuf = buf - blen - 5;
+	} else {
+		if (len > 1 && record_type == BR_SSL_APPLICATION_DATA) {
+			/*
+			 * To do the split, we use a recursive invocation;
+			 * since we only give one byte to the inner call,
+			 * the recursion stops there.
+			 *
+			 * We need to compute the exact size of the extra
+			 * record, so that the two resulting records end up
+			 * being sequential in RAM.
+			 *
+			 * We use here the fact that cbc_max_plaintext()
+			 * adjusted the start offset to leave room for the
+			 * initial fragment.
+			 */
+			size_t xlen;
+
+			rbuf = buf - 4
+				- ((cc->mac_len + blen + 1) & ~(blen - 1));
+			rbuf[0] = buf[0];
+			xlen = 1;
+			rbuf = cbc_encrypt(cc, record_type,
+				version, rbuf, &xlen);
+			buf ++;
+			len --;
+		} else {
+			rbuf = buf - 5;
+		}
+	}
+
+	/*
+	 * Compute MAC.
+	 */
+	br_enc64be(tmp, cc->seq ++);
+	tmp[8] = record_type;
+	br_enc16be(tmp + 9, version);
+	br_enc16be(tmp + 11, len);
+	br_hmac_init(&hc, &cc->mac, cc->mac_len);
+	br_hmac_update(&hc, tmp, 13);
+	br_hmac_update(&hc, buf, len);
+	br_hmac_out(&hc, buf + len);
+	len += cc->mac_len;
+
+	/*
+	 * Add padding.
+	 */
+	plen = blen - (len & (blen - 1));
+	memset(buf + len, (unsigned)plen - 1, plen);
+	len += plen;
+
+	/*
+	 * If an explicit IV is used, the corresponding extra block was
+	 * already put in place earlier; we just have to account for it
+	 * here.
+	 */
+	if (cc->explicit_IV) {
+		buf -= blen;
+		len += blen;
+	}
+
+	/*
+	 * Encrypt the whole thing. If there is an explicit IV, we also
+	 * encrypt it, which is fine (encryption of a uniformly random
+	 * block is still a uniformly random block).
+	 */
+	cc->bc.vtable->run(&cc->bc.vtable, cc->iv, buf, len);
+
+	/*
+	 * Add the header and return.
+	 */
+	buf[-5] = record_type;
+	br_enc16be(buf - 4, version);
+	br_enc16be(buf - 2, len);
+	*data_len = (size_t)((buf + len) - rbuf);
+	return rbuf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable = {
+	{
+		sizeof(br_sslrec_out_cbc_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&cbc_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&cbc_encrypt
+	},
+	(void (*)(const br_sslrec_out_cbc_class **,
+		const br_block_cbcenc_class *, const void *, size_t,
+		const br_hash_class *, const void *, size_t, size_t,
+		const void *))
+		&out_cbc_init
+};
diff --git a/third_party/bearssl/src/ssl_rec_ccm.c b/third_party/bearssl/src/ssl_rec_ccm.c
new file mode 100644
index 0000000..92c3295
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_ccm.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * CCM initialisation. This does everything except setting the vtable,
+ * which depends on whether this is a context for encrypting or for
+ * decrypting.
+ */
+static void
+gen_ccm_init(br_sslrec_ccm_context *cc,
+	const br_block_ctrcbc_class *bc_impl,
+	const void *key, size_t key_len,
+	const void *iv, size_t tag_len)
+{
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, key, key_len);
+	memcpy(cc->iv, iv, sizeof cc->iv);
+	cc->tag_len = tag_len;
+}
+
+static void
+in_ccm_init(br_sslrec_ccm_context *cc,
+	const br_block_ctrcbc_class *bc_impl,
+	const void *key, size_t key_len,
+	const void *iv, size_t tag_len)
+{
+	cc->vtable.in = &br_sslrec_in_ccm_vtable;
+	gen_ccm_init(cc, bc_impl, key, key_len, iv, tag_len);
+}
+
+static int
+ccm_check_length(const br_sslrec_ccm_context *cc, size_t rlen)
+{
+	/*
+	 * CCM overhead is 8 bytes for nonce_explicit, and the tag
+	 * (normally 8 or 16 bytes, depending on cipher suite).
+	 */
+	size_t over;
+
+	over = 8 + cc->tag_len;
+	return rlen >= over && rlen <= (16384 + over);
+}
+
+static unsigned char *
+ccm_decrypt(br_sslrec_ccm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	br_ccm_context zc;
+	unsigned char *buf;
+	unsigned char nonce[12], header[13];
+	size_t len;
+
+	buf = (unsigned char *)data + 8;
+	len = *data_len - (8 + cc->tag_len);
+
+	/*
+	 * Make nonce (implicit + explicit parts).
+	 */
+	memcpy(nonce, cc->iv, sizeof cc->iv);
+	memcpy(nonce + 4, data, 8);
+
+	/*
+	 * Assemble synthetic header for the AAD.
+	 */
+	br_enc64be(header, cc->seq ++);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+
+	/*
+	 * Perform CCM decryption.
+	 */
+	br_ccm_init(&zc, &cc->bc.vtable);
+	br_ccm_reset(&zc, nonce, sizeof nonce, sizeof header, len, cc->tag_len);
+	br_ccm_aad_inject(&zc, header, sizeof header);
+	br_ccm_flip(&zc);
+	br_ccm_run(&zc, 0, buf, len);
+	if (!br_ccm_check_tag(&zc, buf + len)) {
+		return NULL;
+	}
+	*data_len = len;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_ccm_class br_sslrec_in_ccm_vtable = {
+	{
+		sizeof(br_sslrec_ccm_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&ccm_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&ccm_decrypt
+	},
+	(void (*)(const br_sslrec_in_ccm_class **,
+		const br_block_ctrcbc_class *, const void *, size_t,
+		const void *, size_t))
+		&in_ccm_init
+};
+
+static void
+out_ccm_init(br_sslrec_ccm_context *cc,
+	const br_block_ctrcbc_class *bc_impl,
+	const void *key, size_t key_len,
+	const void *iv, size_t tag_len)
+{
+	cc->vtable.out = &br_sslrec_out_ccm_vtable;
+	gen_ccm_init(cc, bc_impl, key, key_len, iv, tag_len);
+}
+
+static void
+ccm_max_plaintext(const br_sslrec_ccm_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	*start += 8;
+	len = *end - *start - cc->tag_len;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+ccm_encrypt(br_sslrec_ccm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	br_ccm_context zc;
+	unsigned char *buf;
+	unsigned char nonce[12], header[13];
+	size_t len;
+
+	buf = (unsigned char *)data;
+	len = *data_len;
+
+	/*
+	 * Make nonce; the explicit part is an encoding of the sequence
+	 * number.
+	 */
+	memcpy(nonce, cc->iv, sizeof cc->iv);
+	br_enc64be(nonce + 4, cc->seq);
+
+	/*
+	 * Assemble synthetic header for the AAD.
+	 */
+	br_enc64be(header, cc->seq ++);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+
+	/*
+	 * Perform CCM encryption.
+	 */
+	br_ccm_init(&zc, &cc->bc.vtable);
+	br_ccm_reset(&zc, nonce, sizeof nonce, sizeof header, len, cc->tag_len);
+	br_ccm_aad_inject(&zc, header, sizeof header);
+	br_ccm_flip(&zc);
+	br_ccm_run(&zc, 1, buf, len);
+	br_ccm_get_tag(&zc, buf + len);
+
+	/*
+	 * Assemble header and adjust pointer/length.
+	 */
+	len += 8 + cc->tag_len;
+	buf -= 13;
+	memcpy(buf + 5, nonce + 4, 8);
+	buf[0] = (unsigned char)record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, len);
+	*data_len = len + 5;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_ccm_class br_sslrec_out_ccm_vtable = {
+	{
+		sizeof(br_sslrec_ccm_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&ccm_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&ccm_encrypt
+	},
+	(void (*)(const br_sslrec_out_ccm_class **,
+		const br_block_ctrcbc_class *, const void *, size_t,
+		const void *, size_t))
+		&out_ccm_init
+};
diff --git a/third_party/bearssl/src/ssl_rec_chapol.c b/third_party/bearssl/src/ssl_rec_chapol.c
new file mode 100644
index 0000000..73b3c78
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_chapol.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+gen_chapol_init(br_sslrec_chapol_context *cc,
+	br_chacha20_run ichacha, br_poly1305_run ipoly,
+	const void *key, const void *iv)
+{
+	cc->seq = 0;
+	cc->ichacha = ichacha;
+	cc->ipoly = ipoly;
+	memcpy(cc->key, key, sizeof cc->key);
+	memcpy(cc->iv, iv, sizeof cc->iv);
+}
+
+static void
+gen_chapol_process(br_sslrec_chapol_context *cc,
+	int record_type, unsigned version, void *data, size_t len,
+	void *tag, int encrypt)
+{
+	unsigned char header[13];
+	unsigned char nonce[12];
+	uint64_t seq;
+	size_t u;
+
+	seq = cc->seq ++;
+	br_enc64be(header, seq);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+	memcpy(nonce, cc->iv, 12);
+	for (u = 0; u < 8; u ++) {
+		nonce[11 - u] ^= (unsigned char)seq;
+		seq >>= 8;
+	}
+	cc->ipoly(cc->key, nonce, data, len, header, sizeof header,
+		tag, cc->ichacha, encrypt);
+}
+
+static void
+in_chapol_init(br_sslrec_chapol_context *cc,
+	br_chacha20_run ichacha, br_poly1305_run ipoly,
+	const void *key, const void *iv)
+{
+	cc->vtable.in = &br_sslrec_in_chapol_vtable;
+	gen_chapol_init(cc, ichacha, ipoly, key, iv);
+}
+
+static int
+chapol_check_length(const br_sslrec_chapol_context *cc, size_t rlen)
+{
+	/*
+	 * Overhead is just the authentication tag (16 bytes).
+	 */
+	(void)cc;
+	return rlen >= 16 && rlen <= (16384 + 16);
+}
+
+static unsigned char *
+chapol_decrypt(br_sslrec_chapol_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t u, len;
+	unsigned char tag[16];
+	unsigned bad;
+
+	buf = data;
+	len = *data_len - 16;
+	gen_chapol_process(cc, record_type, version, buf, len, tag, 0);
+	bad = 0;
+	for (u = 0; u < 16; u ++) {
+		bad |= tag[u] ^ buf[len + u];
+	}
+	if (bad) {
+		return NULL;
+	}
+	*data_len = len;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_chapol_class br_sslrec_in_chapol_vtable = {
+	{
+		sizeof(br_sslrec_chapol_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&chapol_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&chapol_decrypt
+	},
+	(void (*)(const br_sslrec_in_chapol_class **,
+		br_chacha20_run, br_poly1305_run,
+		const void *, const void *))
+		&in_chapol_init
+};
+
+static void
+out_chapol_init(br_sslrec_chapol_context *cc,
+	br_chacha20_run ichacha, br_poly1305_run ipoly,
+	const void *key, const void *iv)
+{
+	cc->vtable.out = &br_sslrec_out_chapol_vtable;
+	gen_chapol_init(cc, ichacha, ipoly, key, iv);
+}
+
+static void
+chapol_max_plaintext(const br_sslrec_chapol_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	(void)cc;
+	len = *end - *start - 16;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+chapol_encrypt(br_sslrec_chapol_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t len;
+
+	buf = data;
+	len = *data_len;
+	gen_chapol_process(cc, record_type, version, buf, len, buf + len, 1);
+	buf -= 5;
+	buf[0] = (unsigned char)record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, len + 16);
+	*data_len = len + 21;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_chapol_class br_sslrec_out_chapol_vtable = {
+	{
+		sizeof(br_sslrec_chapol_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&chapol_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&chapol_encrypt
+	},
+	(void (*)(const br_sslrec_out_chapol_class **,
+		br_chacha20_run, br_poly1305_run,
+		const void *, const void *))
+		&out_chapol_init
+};
diff --git a/third_party/bearssl/src/ssl_rec_gcm.c b/third_party/bearssl/src/ssl_rec_gcm.c
new file mode 100644
index 0000000..70df277
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_gcm.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * GCM initialisation. This does everything except setting the vtable,
+ * which depends on whether this is a context for encrypting or for
+ * decrypting.
+ */
+static void
+gen_gcm_init(br_sslrec_gcm_context *cc,
+	const br_block_ctr_class *bc_impl,
+	const void *key, size_t key_len,
+	br_ghash gh_impl,
+	const void *iv)
+{
+	unsigned char tmp[12];
+
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, key, key_len);
+	cc->gh = gh_impl;
+	memcpy(cc->iv, iv, sizeof cc->iv);
+	memset(cc->h, 0, sizeof cc->h);
+	memset(tmp, 0, sizeof tmp);
+	bc_impl->run(&cc->bc.vtable, tmp, 0, cc->h, sizeof cc->h);
+}
+
+static void
+in_gcm_init(br_sslrec_gcm_context *cc,
+	const br_block_ctr_class *bc_impl,
+	const void *key, size_t key_len,
+	br_ghash gh_impl,
+	const void *iv)
+{
+	cc->vtable.in = &br_sslrec_in_gcm_vtable;
+	gen_gcm_init(cc, bc_impl, key, key_len, gh_impl, iv);
+}
+
+static int
+gcm_check_length(const br_sslrec_gcm_context *cc, size_t rlen)
+{
+	/*
+	 * GCM adds a fixed overhead:
+	 *   8 bytes for the nonce_explicit (before the ciphertext)
+	 *  16 bytes for the authentication tag (after the ciphertext)
+	 */
+	(void)cc;
+	return rlen >= 24 && rlen <= (16384 + 24);
+}
+
+/*
+ * Compute the authentication tag. The value written in 'tag' must still
+ * be CTR-encrypted.
+ */
+static void
+do_tag(br_sslrec_gcm_context *cc,
+	int record_type, unsigned version,
+	void *data, size_t len, void *tag)
+{
+	unsigned char header[13];
+	unsigned char footer[16];
+
+	/*
+	 * Compute authentication tag. Three elements must be injected in
+	 * sequence, each possibly 0-padded to reach a length multiple
+	 * of the block size: the 13-byte header (sequence number, record
+	 * type, protocol version, record length), the cipher text, and
+	 * the word containing the encodings of the bit lengths of the two
+	 * other elements.
+	 */
+	br_enc64be(header, cc->seq ++);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+	br_enc64be(footer, (uint64_t)(sizeof header) << 3);
+	br_enc64be(footer + 8, (uint64_t)len << 3);
+	memset(tag, 0, 16);
+	cc->gh(tag, cc->h, header, sizeof header);
+	cc->gh(tag, cc->h, data, len);
+	cc->gh(tag, cc->h, footer, sizeof footer);
+}
+
+/*
+ * Do CTR encryption. This also does CTR encryption of a single block at
+ * address 'xortag' with the counter value appropriate for the final
+ * processing of the authentication tag.
+ */
+static void
+do_ctr(br_sslrec_gcm_context *cc, const void *nonce, void *data, size_t len,
+	void *xortag)
+{
+	unsigned char iv[12];
+
+	memcpy(iv, cc->iv, 4);
+	memcpy(iv + 4, nonce, 8);
+	cc->bc.vtable->run(&cc->bc.vtable, iv, 2, data, len);
+	cc->bc.vtable->run(&cc->bc.vtable, iv, 1, xortag, 16);
+}
+
+static unsigned char *
+gcm_decrypt(br_sslrec_gcm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t len, u;
+	uint32_t bad;
+	unsigned char tag[16];
+
+	buf = (unsigned char *)data + 8;
+	len = *data_len - 24;
+	do_tag(cc, record_type, version, buf, len, tag);
+	do_ctr(cc, data, buf, len, tag);
+
+	/*
+	 * Compare the computed tag with the value from the record. It
+	 * is possibly useless to do a constant-time comparison here,
+	 * but it does not hurt.
+	 */
+	bad = 0;
+	for (u = 0; u < 16; u ++) {
+		bad |= tag[u] ^ buf[len + u];
+	}
+	if (bad) {
+		return NULL;
+	}
+	*data_len = len;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_gcm_class br_sslrec_in_gcm_vtable = {
+	{
+		sizeof(br_sslrec_gcm_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&gcm_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&gcm_decrypt
+	},
+	(void (*)(const br_sslrec_in_gcm_class **,
+		const br_block_ctr_class *, const void *, size_t,
+		br_ghash, const void *))
+		&in_gcm_init
+};
+
+static void
+out_gcm_init(br_sslrec_gcm_context *cc,
+	const br_block_ctr_class *bc_impl,
+	const void *key, size_t key_len,
+	br_ghash gh_impl,
+	const void *iv)
+{
+	cc->vtable.out = &br_sslrec_out_gcm_vtable;
+	gen_gcm_init(cc, bc_impl, key, key_len, gh_impl, iv);
+}
+
+static void
+gcm_max_plaintext(const br_sslrec_gcm_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	(void)cc;
+	*start += 8;
+	len = *end - *start - 16;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+gcm_encrypt(br_sslrec_gcm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t u, len;
+	unsigned char tmp[16];
+
+	buf = (unsigned char *)data;
+	len = *data_len;
+	memset(tmp, 0, sizeof tmp);
+	br_enc64be(buf - 8, cc->seq);
+	do_ctr(cc, buf - 8, buf, len, tmp);
+	do_tag(cc, record_type, version, buf, len, buf + len);
+	for (u = 0; u < 16; u ++) {
+		buf[len + u] ^= tmp[u];
+	}
+	len += 24;
+	buf -= 13;
+	buf[0] = (unsigned char)record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, len);
+	*data_len = len + 5;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_gcm_class br_sslrec_out_gcm_vtable = {
+	{
+		sizeof(br_sslrec_gcm_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&gcm_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&gcm_encrypt
+	},
+	(void (*)(const br_sslrec_out_gcm_class **,
+		const br_block_ctr_class *, const void *, size_t,
+		br_ghash, const void *))
+		&out_gcm_init
+};
diff --git a/third_party/bearssl/src/ssl_scert_single_ec.c b/third_party/bearssl/src/ssl_scert_single_ec.c
new file mode 100644
index 0000000..ce8d753
--- /dev/null
+++ b/third_party/bearssl/src/ssl_scert_single_ec.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static int
+se_choose(const br_ssl_server_policy_class **pctx,
+	const br_ssl_server_context *cc,
+	br_ssl_server_choices *choices)
+{
+	br_ssl_server_policy_ec_context *pc;
+	const br_suite_translated *st;
+	size_t u, st_num;
+	unsigned hash_id;
+
+	pc = (br_ssl_server_policy_ec_context *)pctx;
+	st = br_ssl_server_get_client_suites(cc, &st_num);
+	hash_id = br_ssl_choose_hash(br_ssl_server_get_client_hashes(cc) >> 8);
+	if (cc->eng.session.version < BR_TLS12) {
+		hash_id = br_sha1_ID;
+	}
+	choices->chain = pc->chain;
+	choices->chain_len = pc->chain_len;
+	for (u = 0; u < st_num; u ++) {
+		unsigned tt;
+
+		tt = st[u][1];
+		switch (tt >> 12) {
+		case BR_SSLKEYX_ECDH_RSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0
+				&& pc->cert_issuer_key_type == BR_KEYTYPE_RSA)
+			{
+				choices->cipher_suite = st[u][0];
+				return 1;
+			}
+			break;
+		case BR_SSLKEYX_ECDH_ECDSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0
+				&& pc->cert_issuer_key_type == BR_KEYTYPE_EC)
+			{
+				choices->cipher_suite = st[u][0];
+				return 1;
+			}
+			break;
+		case BR_SSLKEYX_ECDHE_ECDSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_SIGN) != 0
+				&& hash_id != 0)
+			{
+				choices->cipher_suite = st[u][0];
+				choices->algo_id = hash_id + 0xFF00;
+				return 1;
+			}
+			break;
+		}
+	}
+	return 0;
+}
+
+static uint32_t
+se_do_keyx(const br_ssl_server_policy_class **pctx,
+	unsigned char *data, size_t *len)
+{
+	br_ssl_server_policy_ec_context *pc;
+	uint32_t r;
+	size_t xoff, xlen;
+
+	pc = (br_ssl_server_policy_ec_context *)pctx;
+	r = pc->iec->mul(data, *len, pc->sk->x, pc->sk->xlen, pc->sk->curve);
+	xoff = pc->iec->xoff(pc->sk->curve, &xlen);
+	memmove(data, data + xoff, xlen);
+	*len = xlen;
+	return r;
+}
+
+static size_t
+se_do_sign(const br_ssl_server_policy_class **pctx,
+	unsigned algo_id, unsigned char *data, size_t hv_len, size_t len)
+{
+	br_ssl_server_policy_ec_context *pc;
+	unsigned char hv[64];
+	const br_hash_class *hc;
+
+	algo_id &= 0xFF;
+	pc = (br_ssl_server_policy_ec_context *)pctx;
+	hc = br_multihash_getimpl(pc->mhash, algo_id);
+	if (hc == NULL) {
+		return 0;
+	}
+	memcpy(hv, data, hv_len);
+	if (len < 139) {
+		return 0;
+	}
+	return pc->iecdsa(pc->iec, hc, hv, pc->sk, data);
+}
+
+static const br_ssl_server_policy_class se_policy_vtable = {
+	sizeof(br_ssl_server_policy_ec_context),
+	se_choose,
+	se_do_keyx,
+	se_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_server_set_single_ec(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk, unsigned allowed_usages,
+	unsigned cert_issuer_key_type,
+	const br_ec_impl *iec, br_ecdsa_sign iecdsa)
+{
+	cc->chain_handler.single_ec.vtable = &se_policy_vtable;
+	cc->chain_handler.single_ec.chain = chain;
+	cc->chain_handler.single_ec.chain_len = chain_len;
+	cc->chain_handler.single_ec.sk = sk;
+	cc->chain_handler.single_ec.allowed_usages = allowed_usages;
+	cc->chain_handler.single_ec.cert_issuer_key_type = cert_issuer_key_type;
+	cc->chain_handler.single_ec.mhash = &cc->eng.mhash;
+	cc->chain_handler.single_ec.iec = iec;
+	cc->chain_handler.single_ec.iecdsa = iecdsa;
+	cc->policy_vtable = &cc->chain_handler.single_ec.vtable;
+}
diff --git a/third_party/bearssl/src/ssl_scert_single_rsa.c b/third_party/bearssl/src/ssl_scert_single_rsa.c
new file mode 100644
index 0000000..b2c7767
--- /dev/null
+++ b/third_party/bearssl/src/ssl_scert_single_rsa.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static int
+sr_choose(const br_ssl_server_policy_class **pctx,
+	const br_ssl_server_context *cc,
+	br_ssl_server_choices *choices)
+{
+	br_ssl_server_policy_rsa_context *pc;
+	const br_suite_translated *st;
+	size_t u, st_num;
+	unsigned hash_id;
+	int fh;
+
+	pc = (br_ssl_server_policy_rsa_context *)pctx;
+	st = br_ssl_server_get_client_suites(cc, &st_num);
+	if (cc->eng.session.version < BR_TLS12) {
+		hash_id = 0;
+		fh = 1;
+	} else {
+		hash_id = br_ssl_choose_hash(
+			br_ssl_server_get_client_hashes(cc));
+		fh = (hash_id != 0);
+	}
+	choices->chain = pc->chain;
+	choices->chain_len = pc->chain_len;
+	for (u = 0; u < st_num; u ++) {
+		unsigned tt;
+
+		tt = st[u][1];
+		switch (tt >> 12) {
+		case BR_SSLKEYX_RSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0) {
+				choices->cipher_suite = st[u][0];
+				return 1;
+			}
+			break;
+		case BR_SSLKEYX_ECDHE_RSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_SIGN) != 0 && fh) {
+				choices->cipher_suite = st[u][0];
+				choices->algo_id = hash_id + 0xFF00;
+				return 1;
+			}
+			break;
+		}
+	}
+	return 0;
+}
+
+static uint32_t
+sr_do_keyx(const br_ssl_server_policy_class **pctx,
+	unsigned char *data, size_t *len)
+{
+	br_ssl_server_policy_rsa_context *pc;
+
+	pc = (br_ssl_server_policy_rsa_context *)pctx;
+	return br_rsa_ssl_decrypt(pc->irsacore, pc->sk, data, *len);
+}
+
+/*
+ * OID for hash functions in RSA signatures.
+ */
+static const unsigned char HASH_OID_SHA1[] = {
+	0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char HASH_OID_SHA224[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char HASH_OID_SHA256[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char HASH_OID_SHA384[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char HASH_OID_SHA512[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+static const unsigned char *HASH_OID[] = {
+	HASH_OID_SHA1,
+	HASH_OID_SHA224,
+	HASH_OID_SHA256,
+	HASH_OID_SHA384,
+	HASH_OID_SHA512
+};
+
+static size_t
+sr_do_sign(const br_ssl_server_policy_class **pctx,
+	unsigned algo_id, unsigned char *data, size_t hv_len, size_t len)
+{
+	br_ssl_server_policy_rsa_context *pc;
+	unsigned char hv[64];
+	size_t sig_len;
+	const unsigned char *hash_oid;
+
+	pc = (br_ssl_server_policy_rsa_context *)pctx;
+	memcpy(hv, data, hv_len);
+	algo_id &= 0xFF;
+	if (algo_id == 0) {
+		hash_oid = NULL;
+	} else if (algo_id >= 2 && algo_id <= 6) {
+		hash_oid = HASH_OID[algo_id - 2];
+	} else {
+		return 0;
+	}
+	sig_len = (pc->sk->n_bitlen + 7) >> 3;
+	if (len < sig_len) {
+		return 0;
+	}
+	return pc->irsasign(hash_oid, hv, hv_len, pc->sk, data) ? sig_len : 0;
+}
+
+static const br_ssl_server_policy_class sr_policy_vtable = {
+	sizeof(br_ssl_server_policy_rsa_context),
+	sr_choose,
+	sr_do_keyx,
+	sr_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_server_set_single_rsa(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk, unsigned allowed_usages,
+	br_rsa_private irsacore, br_rsa_pkcs1_sign irsasign)
+{
+	cc->chain_handler.single_rsa.vtable = &sr_policy_vtable;
+	cc->chain_handler.single_rsa.chain = chain;
+	cc->chain_handler.single_rsa.chain_len = chain_len;
+	cc->chain_handler.single_rsa.sk = sk;
+	cc->chain_handler.single_rsa.allowed_usages = allowed_usages;
+	cc->chain_handler.single_rsa.irsacore = irsacore;
+	cc->chain_handler.single_rsa.irsasign = irsasign;
+	cc->policy_vtable = &cc->chain_handler.single_rsa.vtable;
+}
diff --git a/third_party/bearssl/src/sysrng.c b/third_party/bearssl/src/sysrng.c
new file mode 100644
index 0000000..5a92114
--- /dev/null
+++ b/third_party/bearssl/src/sysrng.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_USE_GETENTROPY
+#include <unistd.h>
+#endif
+
+#if BR_USE_URANDOM
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#endif
+
+#if BR_USE_WIN32_RAND
+#include <windows.h>
+#include <wincrypt.h>
+#pragma comment(lib, "advapi32")
+#endif
+
+/*
+ * Seeder that uses the RDRAND opcodes (on x86 CPU).
+ */
+#if BR_RDRAND
+BR_TARGETS_X86_UP
+BR_TARGET("rdrnd")
+static int
+seeder_rdrand(const br_prng_class **ctx)
+{
+	unsigned char tmp[32];
+	size_t u;
+
+	for (u = 0; u < sizeof tmp; u += sizeof(uint32_t)) {
+		int j;
+		uint32_t x;
+
+		/*
+		 * We use the 32-bit intrinsic so that code is compatible
+		 * with both 32-bit and 64-bit architectures.
+		 *
+		 * Intel recommends trying at least 10 times in case of
+		 * failure.
+		 *
+		 * AMD bug: there are reports that some AMD processors
+		 * have a bug that makes them fail silently after a
+		 * suspend/resume cycle, in which case RDRAND will report
+		 * a success but always return 0xFFFFFFFF.
+		 * see: https://bugzilla.kernel.org/show_bug.cgi?id=85911
+		 *
+		 * As a mitigation, if the 32-bit value is 0 or -1, then
+		 * it is considered a failure and tried again. This should
+		 * reliably detect the buggy case, at least. This also
+		 * implies that the selected seed values can never be
+		 * 0x00000000 or 0xFFFFFFFF, which is not a problem since
+		 * we are generating a seed for a PRNG, and we overdo it
+		 * a bit (we generate 32 bytes of randomness, and 256 bits
+		 * of entropy are really overkill).
+		 */
+		for (j = 0; j < 10; j ++) {
+			if (_rdrand32_step(&x) && x != 0 && x != (uint32_t)-1) {
+				goto next_word;
+			}
+		}
+		return 0;
+	next_word:
+		br_enc32le(tmp + u, x);
+	}
+	(*ctx)->update(ctx, tmp, sizeof tmp);
+	return 1;
+}
+BR_TARGETS_X86_DOWN
+
+static int
+rdrand_supported(void)
+{
+	/*
+	 * The RDRND support is bit 30 of ECX, as returned by CPUID.
+	 */
+	return br_cpuid(0, 0, 0x40000000, 0);
+}
+#endif
+
+/*
+ * Seeder that uses /dev/urandom (on Unix-like systems).
+ */
+#if BR_USE_URANDOM
+static int
+seeder_urandom(const br_prng_class **ctx)
+{
+	int f;
+
+	f = open("/dev/urandom", O_RDONLY);
+	if (f >= 0) {
+		unsigned char tmp[32];
+		size_t u;
+
+		for (u = 0; u < sizeof tmp;) {
+			ssize_t len;
+
+			len = read(f, tmp + u, (sizeof tmp) - u);
+			if (len < 0) {
+				if (errno == EINTR) {
+					continue;
+				}
+				break;
+			}
+			u += (size_t)len;
+		}
+		close(f);
+		if (u == sizeof tmp) {
+			(*ctx)->update(ctx, tmp, sizeof tmp);
+			return 1;
+		}
+	}
+	return 0;
+}
+#endif
+
+/*
+ * Seeder that uses getentropy() (backed by getrandom() on some systems,
+ * e.g. Linux). On failure, it will use the /dev/urandom seeder (if
+ * enabled).
+ */
+#if BR_USE_GETENTROPY
+static int
+seeder_getentropy(const br_prng_class **ctx)
+{
+	unsigned char tmp[32];
+
+	if (getentropy(tmp, sizeof tmp) == 0) {
+		(*ctx)->update(ctx, tmp, sizeof tmp);
+		return 1;
+	}
+#if BR_USE_URANDOM
+	return seeder_urandom(ctx);
+#else
+	return 0;
+#endif
+}
+#endif
+
+/*
+ * Seeder that uses CryptGenRandom() (on Windows).
+ */
+#if BR_USE_WIN32_RAND
+static int
+seeder_win32(const br_prng_class **ctx)
+{
+	HCRYPTPROV hp;
+
+	if (CryptAcquireContext(&hp, 0, 0, PROV_RSA_FULL,
+		CRYPT_VERIFYCONTEXT | CRYPT_SILENT))
+	{
+		BYTE buf[32];
+		BOOL r;
+
+		r = CryptGenRandom(hp, sizeof buf, buf);
+		CryptReleaseContext(hp, 0);
+		if (r) {
+			(*ctx)->update(ctx, buf, sizeof buf);
+			return 1;
+		}
+	}
+	return 0;
+}
+#endif
+
+/*
+ * An aggregate seeder that uses RDRAND, and falls back to an OS-provided
+ * source if RDRAND fails.
+ */
+#if BR_RDRAND && (BR_USE_GETENTROPY || BR_USE_URANDOM || BR_USE_WIN32_RAND)
+static int
+seeder_rdrand_with_fallback(const br_prng_class **ctx)
+{
+	if (!seeder_rdrand(ctx)) {
+#if BR_USE_GETENTROPY
+		return seeder_getentropy(ctx);
+#elif BR_USE_URANDOM
+		return seeder_urandom(ctx);
+#elif BR_USE_WIN32_RAND
+		return seeder_win32(ctx);
+#else
+#error "macro selection has gone wrong"
+#endif
+	}
+	return 1;
+}
+#endif
+
+/* see bearssl_rand.h */
+br_prng_seeder
+br_prng_seeder_system(const char **name)
+{
+#if BR_RDRAND
+	if (rdrand_supported()) {
+		if (name != NULL) {
+			*name = "rdrand";
+		}
+#if BR_USE_GETENTROPY || BR_USE_URANDOM || BR_USE_WIN32_RAND
+		return &seeder_rdrand_with_fallback;
+#else
+		return &seeder_rdrand;
+#endif
+	}
+#endif
+#if BR_USE_GETENTROPY
+	if (name != NULL) {
+		*name = "getentropy";
+	}
+	return &seeder_getentropy;
+#elif BR_USE_URANDOM
+	if (name != NULL) {
+		*name = "urandom";
+	}
+	return &seeder_urandom;
+#elif BR_USE_WIN32_RAND
+	if (name != NULL) {
+		*name = "win32";
+	}
+	return &seeder_win32;
+#else
+	if (name != NULL) {
+		*name = "none";
+	}
+	return 0;
+#endif
+}
diff --git a/third_party/bearssl/src/x509_decoder.c b/third_party/bearssl/src/x509_decoder.c
new file mode 100644
index 0000000..8dd970f
--- /dev/null
+++ b/third_party/bearssl/src/x509_decoder.c
@@ -0,0 +1,773 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_x509_decoder_init_main(void *t0ctx);
+
+void br_x509_decoder_run(void *t0ctx);
+
+
+
+#include "inner.h"
+
+
+
+
+
+#include "inner.h"
+
+#define CTX   ((br_x509_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_decoder_context, cpu)))
+#define CONTEXT_NAME   br_x509_decoder_context
+
+/* see bearssl_x509.h */
+void
+br_x509_decoder_init(br_x509_decoder_context *ctx,
+	void (*append_dn)(void *ctx, const void *buf, size_t len),
+	void *append_dn_ctx)
+{
+	memset(ctx, 0, sizeof *ctx);
+	/* obsolete
+	ctx->err = 0;
+	ctx->hbuf = NULL;
+	ctx->hlen = 0;
+	*/
+	ctx->append_dn = append_dn;
+	ctx->append_dn_ctx = append_dn_ctx;
+	ctx->cpu.dp = &ctx->dp_stack[0];
+	ctx->cpu.rp = &ctx->rp_stack[0];
+	br_x509_decoder_init_main(&ctx->cpu);
+	br_x509_decoder_run(&ctx->cpu);
+}
+
+/* see bearssl_x509.h */
+void
+br_x509_decoder_push(br_x509_decoder_context *ctx,
+	const void *data, size_t len)
+{
+	ctx->hbuf = data;
+	ctx->hlen = len;
+	br_x509_decoder_run(&ctx->cpu);
+}
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x09,
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05, 0x09, 0x2A, 0x86,
+	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E, 0x09, 0x2A, 0x86, 0x48, 0x86,
+	0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+	0x01, 0x01, 0x0C, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+	0x0D, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86,
+	0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22,
+	0x05, 0x2B, 0x81, 0x04, 0x00, 0x23, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D,
+	0x04, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01, 0x08,
+	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x08, 0x2A, 0x86, 0x48,
+	0xCE, 0x3D, 0x04, 0x03, 0x03, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04,
+	0x03, 0x04, 0x00, 0x1F, 0x03, 0xFC, 0x07, 0x7F, 0x0B, 0x5E, 0x0F, 0x1F,
+	0x12, 0xFE, 0x16, 0xBF, 0x1A, 0x9F, 0x1E, 0x7E, 0x22, 0x3F, 0x26, 0x1E,
+	0x29, 0xDF, 0x00, 0x1F, 0x03, 0xFD, 0x07, 0x9F, 0x0B, 0x7E, 0x0F, 0x3F,
+	0x13, 0x1E, 0x16, 0xDF, 0x1A, 0xBF, 0x1E, 0x9E, 0x22, 0x5F, 0x26, 0x3E,
+	0x29, 0xFF, 0x03, 0x55, 0x1D, 0x13
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x01, 0x00, 0x11, 0x00, 0x00, 0x01,
+	0x01, 0x09, 0x00, 0x00, 0x01, 0x01, 0x0A, 0x00, 0x00, 0x1A, 0x1A, 0x00,
+	0x00, 0x01, T0_INT1(BR_ERR_X509_BAD_BOOLEAN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TIME), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_PARTIAL_BYTE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, copy_dn)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, decoded)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, isCA)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_x509_decoder_context, pkey_data)), 0x01,
+	T0_INT2(BR_X509_BUFSIZE_KEY), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notafter_days)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notafter_seconds)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notbefore_days)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notbefore_seconds)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, pad)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, signer_hash_id)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, signer_key_type)), 0x00, 0x00, 0x01,
+	0x80, 0x45, 0x00, 0x00, 0x01, 0x80, 0x4E, 0x00, 0x00, 0x01, 0x80, 0x54,
+	0x00, 0x00, 0x01, 0x81, 0x36, 0x00, 0x02, 0x03, 0x00, 0x03, 0x01, 0x1B,
+	0x02, 0x01, 0x13, 0x26, 0x02, 0x00, 0x0F, 0x15, 0x00, 0x00, 0x05, 0x02,
+	0x34, 0x1D, 0x00, 0x00, 0x06, 0x02, 0x35, 0x1D, 0x00, 0x00, 0x01, 0x10,
+	0x4F, 0x00, 0x00, 0x11, 0x05, 0x02, 0x38, 0x1D, 0x4C, 0x00, 0x00, 0x11,
+	0x05, 0x02, 0x38, 0x1D, 0x4D, 0x00, 0x00, 0x06, 0x02, 0x30, 0x1D, 0x00,
+	0x00, 0x1B, 0x19, 0x01, 0x08, 0x0E, 0x26, 0x29, 0x19, 0x09, 0x00, 0x00,
+	0x01, 0x30, 0x0A, 0x1B, 0x01, 0x00, 0x01, 0x09, 0x4B, 0x05, 0x02, 0x2F,
+	0x1D, 0x00, 0x00, 0x20, 0x20, 0x00, 0x00, 0x01, 0x80, 0x5A, 0x00, 0x00,
+	0x01, 0x80, 0x62, 0x00, 0x00, 0x01, 0x80, 0x6B, 0x00, 0x00, 0x01, 0x80,
+	0x74, 0x00, 0x00, 0x01, 0x80, 0x7D, 0x00, 0x00, 0x01, 0x3D, 0x00, 0x00,
+	0x20, 0x11, 0x06, 0x04, 0x2B, 0x6B, 0x7A, 0x71, 0x00, 0x04, 0x01, 0x00,
+	0x3D, 0x25, 0x01, 0x00, 0x3C, 0x25, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x6D,
+	0x6D, 0x70, 0x1B, 0x01, 0x20, 0x11, 0x06, 0x11, 0x1A, 0x4C, 0x6B, 0x70,
+	0x01, 0x02, 0x50, 0x6E, 0x01, 0x02, 0x12, 0x06, 0x02, 0x39, 0x1D, 0x51,
+	0x70, 0x01, 0x02, 0x50, 0x6C, 0x6D, 0x7A, 0x6D, 0x7A, 0x6D, 0x65, 0x43,
+	0x24, 0x42, 0x24, 0x65, 0x41, 0x24, 0x40, 0x24, 0x51, 0x01, 0x01, 0x3C,
+	0x25, 0x6D, 0x7A, 0x01, 0x00, 0x3C, 0x25, 0x6D, 0x6D, 0x60, 0x05, 0x02,
+	0x39, 0x1D, 0x74, 0x1C, 0x06, 0x1C, 0x7A, 0x61, 0x6D, 0x3F, 0x68, 0x03,
+	0x00, 0x3F, 0x26, 0x02, 0x00, 0x09, 0x26, 0x02, 0x00, 0x0A, 0x68, 0x03,
+	0x01, 0x51, 0x51, 0x02, 0x00, 0x02, 0x01, 0x18, 0x04, 0x1E, 0x5A, 0x1C,
+	0x06, 0x18, 0x64, 0x03, 0x02, 0x51, 0x61, 0x1B, 0x03, 0x03, 0x1B, 0x3F,
+	0x23, 0x0D, 0x06, 0x02, 0x33, 0x1D, 0x62, 0x02, 0x02, 0x02, 0x03, 0x17,
+	0x04, 0x02, 0x39, 0x1D, 0x51, 0x01, 0x00, 0x3E, 0x25, 0x71, 0x01, 0x21,
+	0x5B, 0x01, 0x22, 0x5B, 0x1B, 0x01, 0x23, 0x11, 0x06, 0x28, 0x1A, 0x4C,
+	0x6B, 0x6D, 0x1B, 0x06, 0x1D, 0x6D, 0x60, 0x1A, 0x70, 0x1B, 0x01, 0x01,
+	0x11, 0x06, 0x03, 0x63, 0x1A, 0x70, 0x01, 0x04, 0x50, 0x6B, 0x4A, 0x1C,
+	0x06, 0x03, 0x5F, 0x04, 0x01, 0x7B, 0x51, 0x51, 0x04, 0x60, 0x51, 0x51,
+	0x04, 0x08, 0x01, 0x7F, 0x11, 0x05, 0x02, 0x38, 0x1D, 0x1A, 0x51, 0x6D,
+	0x60, 0x06, 0x80, 0x63, 0x75, 0x1C, 0x06, 0x06, 0x01, 0x02, 0x3B, 0x04,
+	0x80, 0x57, 0x76, 0x1C, 0x06, 0x06, 0x01, 0x03, 0x3B, 0x04, 0x80, 0x4D,
+	0x77, 0x1C, 0x06, 0x06, 0x01, 0x04, 0x3B, 0x04, 0x80, 0x43, 0x78, 0x1C,
+	0x06, 0x05, 0x01, 0x05, 0x3B, 0x04, 0x3A, 0x79, 0x1C, 0x06, 0x05, 0x01,
+	0x06, 0x3B, 0x04, 0x31, 0x55, 0x1C, 0x06, 0x05, 0x01, 0x02, 0x3A, 0x04,
+	0x28, 0x56, 0x1C, 0x06, 0x05, 0x01, 0x03, 0x3A, 0x04, 0x1F, 0x57, 0x1C,
+	0x06, 0x05, 0x01, 0x04, 0x3A, 0x04, 0x16, 0x58, 0x1C, 0x06, 0x05, 0x01,
+	0x05, 0x3A, 0x04, 0x0D, 0x59, 0x1C, 0x06, 0x05, 0x01, 0x06, 0x3A, 0x04,
+	0x04, 0x01, 0x00, 0x01, 0x00, 0x04, 0x04, 0x01, 0x00, 0x01, 0x00, 0x46,
+	0x25, 0x45, 0x25, 0x7A, 0x61, 0x7A, 0x51, 0x1A, 0x01, 0x01, 0x3D, 0x25,
+	0x73, 0x30, 0x1D, 0x00, 0x00, 0x01, 0x81, 0x06, 0x00, 0x01, 0x54, 0x0D,
+	0x06, 0x02, 0x32, 0x1D, 0x1B, 0x03, 0x00, 0x0A, 0x02, 0x00, 0x00, 0x00,
+	0x6D, 0x71, 0x1B, 0x01, 0x01, 0x11, 0x06, 0x08, 0x63, 0x01, 0x01, 0x15,
+	0x3E, 0x25, 0x04, 0x01, 0x2B, 0x7A, 0x00, 0x00, 0x70, 0x01, 0x06, 0x50,
+	0x6F, 0x00, 0x00, 0x70, 0x01, 0x03, 0x50, 0x6B, 0x72, 0x06, 0x02, 0x37,
+	0x1D, 0x00, 0x00, 0x26, 0x1B, 0x06, 0x07, 0x21, 0x1B, 0x06, 0x01, 0x16,
+	0x04, 0x76, 0x2B, 0x00, 0x00, 0x01, 0x01, 0x50, 0x6A, 0x01, 0x01, 0x10,
+	0x06, 0x02, 0x2C, 0x1D, 0x72, 0x27, 0x00, 0x00, 0x60, 0x05, 0x02, 0x39,
+	0x1D, 0x47, 0x1C, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x48, 0x1C, 0x06,
+	0x04, 0x01, 0x18, 0x04, 0x0A, 0x49, 0x1C, 0x06, 0x04, 0x01, 0x19, 0x04,
+	0x02, 0x39, 0x1D, 0x00, 0x04, 0x70, 0x1B, 0x01, 0x17, 0x01, 0x18, 0x4B,
+	0x05, 0x02, 0x2F, 0x1D, 0x01, 0x18, 0x11, 0x03, 0x00, 0x4D, 0x6B, 0x66,
+	0x02, 0x00, 0x06, 0x0C, 0x01, 0x80, 0x64, 0x08, 0x03, 0x01, 0x66, 0x02,
+	0x01, 0x09, 0x04, 0x0E, 0x1B, 0x01, 0x32, 0x0D, 0x06, 0x04, 0x01, 0x80,
+	0x64, 0x09, 0x01, 0x8E, 0x6C, 0x09, 0x03, 0x01, 0x02, 0x01, 0x01, 0x82,
+	0x6D, 0x08, 0x02, 0x01, 0x01, 0x03, 0x09, 0x01, 0x04, 0x0C, 0x09, 0x02,
+	0x01, 0x01, 0x80, 0x63, 0x09, 0x01, 0x80, 0x64, 0x0C, 0x0A, 0x02, 0x01,
+	0x01, 0x83, 0x0F, 0x09, 0x01, 0x83, 0x10, 0x0C, 0x09, 0x03, 0x03, 0x01,
+	0x01, 0x01, 0x0C, 0x67, 0x2A, 0x01, 0x01, 0x0E, 0x02, 0x01, 0x01, 0x04,
+	0x07, 0x28, 0x02, 0x01, 0x01, 0x80, 0x64, 0x07, 0x27, 0x02, 0x01, 0x01,
+	0x83, 0x10, 0x07, 0x28, 0x1F, 0x15, 0x06, 0x03, 0x01, 0x18, 0x09, 0x5D,
+	0x09, 0x52, 0x1B, 0x01, 0x05, 0x14, 0x02, 0x03, 0x09, 0x03, 0x03, 0x01,
+	0x1F, 0x15, 0x01, 0x01, 0x26, 0x67, 0x02, 0x03, 0x09, 0x2A, 0x03, 0x03,
+	0x01, 0x00, 0x01, 0x17, 0x67, 0x01, 0x9C, 0x10, 0x08, 0x03, 0x02, 0x01,
+	0x00, 0x01, 0x3B, 0x67, 0x01, 0x3C, 0x08, 0x02, 0x02, 0x09, 0x03, 0x02,
+	0x01, 0x00, 0x01, 0x3C, 0x67, 0x02, 0x02, 0x09, 0x03, 0x02, 0x72, 0x1B,
+	0x01, 0x2E, 0x11, 0x06, 0x0D, 0x1A, 0x72, 0x1B, 0x01, 0x30, 0x01, 0x39,
+	0x4B, 0x06, 0x03, 0x1A, 0x04, 0x74, 0x01, 0x80, 0x5A, 0x10, 0x06, 0x02,
+	0x2F, 0x1D, 0x51, 0x02, 0x03, 0x02, 0x02, 0x00, 0x01, 0x72, 0x53, 0x01,
+	0x0A, 0x08, 0x03, 0x00, 0x72, 0x53, 0x02, 0x00, 0x09, 0x00, 0x02, 0x03,
+	0x00, 0x03, 0x01, 0x66, 0x1B, 0x02, 0x01, 0x02, 0x00, 0x4B, 0x05, 0x02,
+	0x2F, 0x1D, 0x00, 0x00, 0x23, 0x70, 0x01, 0x02, 0x50, 0x0B, 0x69, 0x00,
+	0x03, 0x1B, 0x03, 0x00, 0x03, 0x01, 0x03, 0x02, 0x6B, 0x72, 0x1B, 0x01,
+	0x81, 0x00, 0x13, 0x06, 0x02, 0x36, 0x1D, 0x1B, 0x01, 0x00, 0x11, 0x06,
+	0x0B, 0x1A, 0x1B, 0x05, 0x04, 0x1A, 0x01, 0x00, 0x00, 0x72, 0x04, 0x6F,
+	0x02, 0x01, 0x1B, 0x05, 0x02, 0x33, 0x1D, 0x2A, 0x03, 0x01, 0x02, 0x02,
+	0x25, 0x02, 0x02, 0x29, 0x03, 0x02, 0x1B, 0x06, 0x03, 0x72, 0x04, 0x68,
+	0x1A, 0x02, 0x00, 0x02, 0x01, 0x0A, 0x00, 0x01, 0x72, 0x1B, 0x01, 0x81,
+	0x00, 0x0D, 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x0A, 0x1B, 0x05, 0x02,
+	0x31, 0x1D, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x12, 0x06,
+	0x19, 0x02, 0x00, 0x2A, 0x03, 0x00, 0x1B, 0x01, 0x83, 0xFF, 0xFF, 0x7F,
+	0x12, 0x06, 0x02, 0x32, 0x1D, 0x01, 0x08, 0x0E, 0x26, 0x72, 0x23, 0x09,
+	0x04, 0x60, 0x00, 0x00, 0x6A, 0x5E, 0x00, 0x00, 0x6B, 0x7A, 0x00, 0x00,
+	0x70, 0x4E, 0x6B, 0x00, 0x01, 0x6B, 0x1B, 0x05, 0x02, 0x36, 0x1D, 0x72,
+	0x1B, 0x01, 0x81, 0x00, 0x13, 0x06, 0x02, 0x36, 0x1D, 0x03, 0x00, 0x1B,
+	0x06, 0x16, 0x72, 0x02, 0x00, 0x1B, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x13,
+	0x06, 0x02, 0x36, 0x1D, 0x01, 0x08, 0x0E, 0x09, 0x03, 0x00, 0x04, 0x67,
+	0x1A, 0x02, 0x00, 0x00, 0x00, 0x6B, 0x1B, 0x01, 0x81, 0x7F, 0x12, 0x06,
+	0x08, 0x7A, 0x01, 0x00, 0x44, 0x25, 0x01, 0x00, 0x00, 0x1B, 0x44, 0x25,
+	0x44, 0x29, 0x62, 0x01, 0x7F, 0x00, 0x01, 0x72, 0x03, 0x00, 0x02, 0x00,
+	0x01, 0x05, 0x14, 0x01, 0x01, 0x15, 0x1E, 0x02, 0x00, 0x01, 0x06, 0x14,
+	0x1B, 0x01, 0x01, 0x15, 0x06, 0x02, 0x2D, 0x1D, 0x01, 0x04, 0x0E, 0x02,
+	0x00, 0x01, 0x1F, 0x15, 0x1B, 0x01, 0x1F, 0x11, 0x06, 0x02, 0x2E, 0x1D,
+	0x09, 0x00, 0x00, 0x1B, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, 0x00, 0x70,
+	0x00, 0x00, 0x1B, 0x05, 0x02, 0x32, 0x1D, 0x2A, 0x73, 0x00, 0x00, 0x22,
+	0x1B, 0x01, 0x00, 0x13, 0x06, 0x01, 0x00, 0x1A, 0x16, 0x04, 0x74, 0x00,
+	0x01, 0x01, 0x00, 0x00, 0x01, 0x0B, 0x00, 0x00, 0x01, 0x15, 0x00, 0x00,
+	0x01, 0x1F, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x01, 0x33, 0x00, 0x00,
+	0x7B, 0x1A, 0x00, 0x00, 0x1B, 0x06, 0x07, 0x7C, 0x1B, 0x06, 0x01, 0x16,
+	0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x20, 0x21, 0x0B, 0x2B, 0x00
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	15,
+	20,
+	24,
+	28,
+	32,
+	36,
+	40,
+	44,
+	48,
+	52,
+	56,
+	60,
+	64,
+	68,
+	72,
+	76,
+	80,
+	84,
+	88,
+	93,
+	98,
+	103,
+	111,
+	116,
+	121,
+	126,
+	131,
+	136,
+	141,
+	146,
+	151,
+	156,
+	161,
+	166,
+	181,
+	187,
+	193,
+	198,
+	206,
+	214,
+	220,
+	231,
+	246,
+	250,
+	255,
+	260,
+	265,
+	270,
+	275,
+	279,
+	289,
+	620,
+	625,
+	639,
+	659,
+	666,
+	678,
+	692,
+	707,
+	740,
+	960,
+	974,
+	991,
+	1000,
+	1067,
+	1123,
+	1127,
+	1131,
+	1136,
+	1184,
+	1210,
+	1254,
+	1265,
+	1274,
+	1287,
+	1291,
+	1295,
+	1299,
+	1303,
+	1307,
+	1311,
+	1315,
+	1327
+};
+
+#define T0_INTERPRETED   39
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_x509_decoder_init_main, 92)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_x509_decoder_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* %25 */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a % b);
+
+				}
+				break;
+			case 8: {
+				/* * */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a * b);
+
+				}
+				break;
+			case 9: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 10: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 11: {
+				/* -rot */
+ T0_NROT(); 
+				}
+				break;
+			case 12: {
+				/* / */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a / b);
+
+				}
+				break;
+			case 13: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 14: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 15: {
+				/* <= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a <= b));
+
+				}
+				break;
+			case 16: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 17: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 18: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 19: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 20: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 21: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 22: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 23: {
+				/* copy-ec-pkey */
+
+	size_t qlen = T0_POP();
+	uint32_t curve = T0_POP();
+	CTX->pkey.key_type = BR_KEYTYPE_EC;
+	CTX->pkey.key.ec.curve = curve;
+	CTX->pkey.key.ec.q = CTX->pkey_data;
+	CTX->pkey.key.ec.qlen = qlen;
+
+				}
+				break;
+			case 24: {
+				/* copy-rsa-pkey */
+
+	size_t elen = T0_POP();
+	size_t nlen = T0_POP();
+	CTX->pkey.key_type = BR_KEYTYPE_RSA;
+	CTX->pkey.key.rsa.n = CTX->pkey_data;
+	CTX->pkey.key.rsa.nlen = nlen;
+	CTX->pkey.key.rsa.e = CTX->pkey_data + nlen;
+	CTX->pkey.key.rsa.elen = elen;
+
+				}
+				break;
+			case 25: {
+				/* data-get8 */
+
+	size_t addr = T0_POP();
+	T0_PUSH(t0_datablock[addr]);
+
+				}
+				break;
+			case 26: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 27: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 28: {
+				/* eqOID */
+
+	const unsigned char *a2 = &t0_datablock[T0_POP()];
+	const unsigned char *a1 = &CTX->pad[0];
+	size_t len = a1[0];
+	int x;
+	if (len == a2[0]) {
+		x = -(memcmp(a1 + 1, a2 + 1, len) == 0);
+	} else {
+		x = 0;
+	}
+	T0_PUSH((uint32_t)x);
+
+				}
+				break;
+			case 29: {
+				/* fail */
+
+	CTX->err = T0_POPi();
+	T0_CO();
+
+				}
+				break;
+			case 30: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 31: {
+				/* or */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a | b);
+
+				}
+				break;
+			case 32: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 33: {
+				/* read-blob-inner */
+
+	uint32_t len = T0_POP();
+	uint32_t addr = T0_POP();
+	size_t clen = CTX->hlen;
+	if (clen > len) {
+		clen = (size_t)len;
+	}
+	if (addr != 0) {
+		memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen);
+	}
+	if (CTX->copy_dn && CTX->append_dn) {
+		CTX->append_dn(CTX->append_dn_ctx, CTX->hbuf, clen);
+	}
+	CTX->hbuf += clen;
+	CTX->hlen -= clen;
+	T0_PUSH(addr + clen);
+	T0_PUSH(len - clen);
+
+				}
+				break;
+			case 34: {
+				/* read8-low */
+
+	if (CTX->hlen == 0) {
+		T0_PUSHi(-1);
+	} else {
+		unsigned char x = *CTX->hbuf ++;
+		if (CTX->copy_dn && CTX->append_dn) {
+			CTX->append_dn(CTX->append_dn_ctx, &x, 1);
+		}
+		CTX->hlen --;
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 35: {
+				/* rot */
+ T0_ROT(); 
+				}
+				break;
+			case 36: {
+				/* set32 */
+
+	uint32_t addr = T0_POP();
+	*(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP();
+
+				}
+				break;
+			case 37: {
+				/* set8 */
+
+	uint32_t addr = T0_POP();
+	*((unsigned char *)CTX + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 38: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
diff --git a/third_party/bearssl/src/x509_knownkey.c b/third_party/bearssl/src/x509_knownkey.c
new file mode 100644
index 0000000..7674f3f
--- /dev/null
+++ b/third_party/bearssl/src/x509_knownkey.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+void
+br_x509_knownkey_init_rsa(br_x509_knownkey_context *ctx,
+	const br_rsa_public_key *pk, unsigned usages)
+{
+	ctx->vtable = &br_x509_knownkey_vtable;
+	ctx->pkey.key_type = BR_KEYTYPE_RSA;
+	ctx->pkey.key.rsa = *pk;
+	ctx->usages = usages;
+}
+
+/* see bearssl_x509.h */
+void
+br_x509_knownkey_init_ec(br_x509_knownkey_context *ctx,
+	const br_ec_public_key *pk, unsigned usages)
+{
+	ctx->vtable = &br_x509_knownkey_vtable;
+	ctx->pkey.key_type = BR_KEYTYPE_EC;
+	ctx->pkey.key.ec = *pk;
+	ctx->usages = usages;
+}
+
+static void
+kk_start_chain(const br_x509_class **ctx, const char *server_name)
+{
+	(void)ctx;
+	(void)server_name;
+}
+
+static void
+kk_start_cert(const br_x509_class **ctx, uint32_t length)
+{
+	(void)ctx;
+	(void)length;
+}
+
+static void
+kk_append(const br_x509_class **ctx, const unsigned char *buf, size_t len)
+{
+	(void)ctx;
+	(void)buf;
+	(void)len;
+}
+
+static void
+kk_end_cert(const br_x509_class **ctx)
+{
+	(void)ctx;
+}
+
+static unsigned
+kk_end_chain(const br_x509_class **ctx)
+{
+	(void)ctx;
+	return 0;
+}
+
+static const br_x509_pkey *
+kk_get_pkey(const br_x509_class *const *ctx, unsigned *usages)
+{
+	const br_x509_knownkey_context *xc;
+
+	xc = (const br_x509_knownkey_context *)ctx;
+	if (usages != NULL) {
+		*usages = xc->usages;
+	}
+	return &xc->pkey;
+}
+
+/* see bearssl_x509.h */
+const br_x509_class br_x509_knownkey_vtable = {
+	sizeof(br_x509_knownkey_context),
+	kk_start_chain,
+	kk_start_cert,
+	kk_append,
+	kk_end_cert,
+	kk_end_chain,
+	kk_get_pkey
+};
diff --git a/third_party/bearssl/src/x509_minimal.c b/third_party/bearssl/src/x509_minimal.c
new file mode 100644
index 0000000..b3079de
--- /dev/null
+++ b/third_party/bearssl/src/x509_minimal.c
@@ -0,0 +1,1697 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_x509_minimal_init_main(void *t0ctx);
+
+void br_x509_minimal_run(void *t0ctx);
+
+
+
+#include "inner.h"
+
+
+
+
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * --------------------
+ *
+ * The C code pushes the data by chunks; all decoding is done in the
+ * T0 code. The cert_length value is set to the certificate length when
+ * a new certificate is started; the T0 code picks it up as outer limit,
+ * and decoding functions use it to ensure that no attempt is made at
+ * reading past it. The T0 code also checks that once the certificate is
+ * decoded, there are no trailing bytes.
+ *
+ * The T0 code sets cert_length to 0 when the certificate is fully
+ * decoded.
+ *
+ * The C code must still perform two checks:
+ *
+ *  -- If the certificate length is 0, then the T0 code will not be
+ *  invoked at all. This invalid condition must thus be reported by the
+ *  C code.
+ *
+ *  -- When reaching the end of certificate, the C code must verify that
+ *  the certificate length has been set to 0, thereby signaling that
+ *  the T0 code properly decoded a certificate.
+ *
+ * Processing of a chain works in the following way:
+ *
+ *  -- The error flag is set to a non-zero value when validation is
+ *  finished. The value is either BR_ERR_X509_OK (validation is
+ *  successful) or another non-zero error code. When a non-zero error
+ *  code is obtained, the remaining bytes in the current certificate and
+ *  the subsequent certificates (if any) are completely ignored.
+ *
+ *  -- Each certificate is decoded in due course, with the following
+ *  "interesting points":
+ *
+ *     -- Start of the TBS: the multihash engine is reset and activated.
+ *
+ *     -- Start of the issuer DN: the secondary hash engine is started,
+ *     to process the encoded issuer DN.
+ *
+ *     -- End of the issuer DN: the secondary hash engine is stopped. The
+ *     resulting hash value is computed and then copied into the
+ *     next_dn_hash[] buffer.
+ *
+ *     -- Start of the subject DN: the secondary hash engine is started,
+ *     to process the encoded subject DN.
+ *
+ *     -- For the EE certificate only: the Common Name, if any, is matched
+ *     against the expected server name.
+ *
+ *     -- End of the subject DN: the secondary hash engine is stopped. The
+ *     resulting hash value is computed into the pad. It is then processed:
+ *
+ *        -- If this is the EE certificate, then the hash is ignored
+ *        (except for direct trust processing, see later; the hash is
+ *        simply left in current_dn_hash[]).
+ *
+ *        -- Otherwise, the hashed subject DN is compared with the saved
+ *        hash value (in saved_dn_hash[]). They must match.
+ *
+ *     Either way, the next_dn_hash[] value is then copied into the
+ *     saved_dn_hash[] value. Thus, at that point, saved_dn_hash[]
+ *     contains the hash of the issuer DN for the current certificate,
+ *     and current_dn_hash[] contains the hash of the subject DN for the
+ *     current certificate.
+ *
+ *     -- Public key: it is decoded into the cert_pkey[] buffer. Unknown
+ *     key types are reported at that point.
+ *
+ *        -- If this is the EE certificate, then the key type is compared
+ *        with the expected key type (initialization parameter). The public
+ *        key data is copied to ee_pkey_data[]. The key and hashed subject
+ *        DN are also compared with the "direct trust" keys; if the key
+ *        and DN are matched, then validation ends with a success.
+ *
+ *        -- Otherwise, the saved signature (cert_sig[]) is verified
+ *        against the saved TBS hash (tbs_hash[]) and that freshly
+ *        decoded public key. Failure here ends validation with an error.
+ *
+ *     -- Extensions: extension values are processed in due order.
+ *
+ *        -- Basic Constraints: for all certificates except EE, must be
+ *        present, indicate a CA, and have a path length compatible with
+ *        the chain length so far.
+ *
+ *        -- Key Usage: for the EE, if present, must allow signatures
+ *        or encryption/key exchange, as required for the cipher suite.
+ *        For non-EE, if present, must have the "certificate sign" bit.
+ *
+ *        -- Subject Alt Name: for the EE, dNSName names are matched
+ *        against the server name. Ignored for non-EE.
+ *
+ *        -- Authority Key Identifier, Subject Key Identifier, Issuer
+ *        Alt Name, Subject Directory Attributes, CRL Distribution Points
+ *        Freshest CRL, Authority Info Access and Subject Info Access
+ *        extensions are always ignored: they either contain only
+ *        informative data, or they relate to revocation processing, which
+ *        we explicitly do not support.
+ *
+ *        -- All other extensions are ignored if non-critical. If a
+ *        critical extension other than the ones above is encountered,
+ *        then a failure is reported.
+ *
+ *     -- End of the TBS: the multihash engine is stopped.
+ *
+ *     -- Signature algorithm: the signature algorithm on the
+ *     certificate is decoded. A failure is reported if that algorithm
+ *     is unknown. The hashed TBS corresponding to the signature hash
+ *     function is computed and stored in tbs_hash[] (if not supported,
+ *     then a failure is reported). The hash OID and length are stored
+ *     in cert_sig_hash_oid and cert_sig_hash_len.
+ *
+ *     -- Signature value: the signature value is copied into the
+ *     cert_sig[] array.
+ *
+ *     -- Certificate end: the hashed issuer DN (saved_dn_hash[]) is
+ *     looked up in the trust store (CA trust anchors only); for all
+ *     that match, the signature (cert_sig[]) is verified against the
+ *     anchor public key (hashed TBS is in tbs_hash[]). If one of these
+ *     signatures is valid, then validation ends with a success.
+ *
+ *  -- If the chain end is reached without obtaining a validation success,
+ *  then validation is reported as failed.
+ */
+
+/*
+ * The T0 compiler will produce these prototypes declarations in the
+ * header.
+ *
+void br_x509_minimal_init_main(void *ctx);
+void br_x509_minimal_run(void *ctx);
+ */
+
+/* see bearssl_x509.h */
+void
+br_x509_minimal_init(br_x509_minimal_context *ctx,
+	const br_hash_class *dn_hash_impl,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num)
+{
+	memset(ctx, 0, sizeof *ctx);
+	ctx->vtable = &br_x509_minimal_vtable;
+	ctx->dn_hash_impl = dn_hash_impl;
+	ctx->trust_anchors = trust_anchors;
+	ctx->trust_anchors_num = trust_anchors_num;
+}
+
+static void
+xm_start_chain(const br_x509_class **ctx, const char *server_name)
+{
+	br_x509_minimal_context *cc;
+	size_t u;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	for (u = 0; u < cc->num_name_elts; u ++) {
+		cc->name_elts[u].status = 0;
+		cc->name_elts[u].buf[0] = 0;
+	}
+	memset(&cc->pkey, 0, sizeof cc->pkey);
+	cc->num_certs = 0;
+	cc->err = 0;
+	cc->cpu.dp = cc->dp_stack;
+	cc->cpu.rp = cc->rp_stack;
+	br_x509_minimal_init_main(&cc->cpu);
+	if (server_name == NULL || *server_name == 0) {
+		cc->server_name = NULL;
+	} else {
+		cc->server_name = server_name;
+	}
+}
+
+static void
+xm_start_cert(const br_x509_class **ctx, uint32_t length)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err != 0) {
+		return;
+	}
+	if (length == 0) {
+		cc->err = BR_ERR_X509_TRUNCATED;
+		return;
+	}
+	cc->cert_length = length;
+}
+
+static void
+xm_append(const br_x509_class **ctx, const unsigned char *buf, size_t len)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err != 0) {
+		return;
+	}
+	cc->hbuf = buf;
+	cc->hlen = len;
+	br_x509_minimal_run(&cc->cpu);
+}
+
+static void
+xm_end_cert(const br_x509_class **ctx)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err == 0 && cc->cert_length != 0) {
+		cc->err = BR_ERR_X509_TRUNCATED;
+	}
+	cc->num_certs ++;
+}
+
+static unsigned
+xm_end_chain(const br_x509_class **ctx)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err == 0) {
+		if (cc->num_certs == 0) {
+			cc->err = BR_ERR_X509_EMPTY_CHAIN;
+		} else {
+			cc->err = BR_ERR_X509_NOT_TRUSTED;
+		}
+	} else if (cc->err == BR_ERR_X509_OK) {
+		return 0;
+	}
+	return (unsigned)cc->err;
+}
+
+static const br_x509_pkey *
+xm_get_pkey(const br_x509_class *const *ctx, unsigned *usages)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err == BR_ERR_X509_OK
+		|| cc->err == BR_ERR_X509_NOT_TRUSTED)
+	{
+		if (usages != NULL) {
+			*usages = cc->key_usages;
+		}
+		return &((br_x509_minimal_context *)(void *)ctx)->pkey;
+	} else {
+		return NULL;
+	}
+}
+
+/* see bearssl_x509.h */
+const br_x509_class br_x509_minimal_vtable = {
+	sizeof(br_x509_minimal_context),
+	xm_start_chain,
+	xm_start_cert,
+	xm_append,
+	xm_end_cert,
+	xm_end_chain,
+	xm_get_pkey
+};
+
+#define CTX   ((br_x509_minimal_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_minimal_context, cpu)))
+#define CONTEXT_NAME   br_x509_minimal_context
+
+#define DNHASH_LEN   ((CTX->dn_hash_impl->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK)
+
+/*
+ * Hash a DN (from a trust anchor) into the provided buffer. This uses the
+ * DN hash implementation and context structure from the X.509 engine
+ * context.
+ */
+static void
+hash_dn(br_x509_minimal_context *ctx, const void *dn, size_t len,
+	unsigned char *out)
+{
+	ctx->dn_hash_impl->init(&ctx->dn_hash.vtable);
+	ctx->dn_hash_impl->update(&ctx->dn_hash.vtable, dn, len);
+	ctx->dn_hash_impl->out(&ctx->dn_hash.vtable, out);
+}
+
+/*
+ * Compare two big integers for equality. The integers use unsigned big-endian
+ * encoding; extra leading bytes (of value 0) are allowed.
+ */
+static int
+eqbigint(const unsigned char *b1, size_t len1,
+	const unsigned char *b2, size_t len2)
+{
+	while (len1 > 0 && *b1 == 0) {
+		b1 ++;
+		len1 --;
+	}
+	while (len2 > 0 && *b2 == 0) {
+		b2 ++;
+		len2 --;
+	}
+	if (len1 != len2) {
+		return 0;
+	}
+	return memcmp(b1, b2, len1) == 0;
+}
+
+/*
+ * Compare two strings for equality, in a case-insensitive way. This
+ * function handles casing only for ASCII letters.
+ */
+static int
+eqnocase(const void *s1, const void *s2, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+
+	buf1 = s1;
+	buf2 = s2;
+	while (len -- > 0) {
+		int x1, x2;
+
+		x1 = *buf1 ++;
+		x2 = *buf2 ++;
+		if (x1 >= 'A' && x1 <= 'Z') {
+			x1 += 'a' - 'A';
+		}
+		if (x2 >= 'A' && x2 <= 'Z') {
+			x2 += 'a' - 'A';
+		}
+		if (x1 != x2) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static int verify_signature(br_x509_minimal_context *ctx,
+	const br_x509_pkey *pk);
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x09,
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05, 0x09, 0x2A, 0x86,
+	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E, 0x09, 0x2A, 0x86, 0x48, 0x86,
+	0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+	0x01, 0x01, 0x0C, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+	0x0D, 0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A, 0x09, 0x60, 0x86, 0x48, 0x01,
+	0x65, 0x03, 0x04, 0x02, 0x04, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
+	0x04, 0x02, 0x01, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02,
+	0x02, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x07,
+	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE,
+	0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2B,
+	0x81, 0x04, 0x00, 0x23, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x01,
+	0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01, 0x08, 0x2A, 0x86,
+	0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D,
+	0x04, 0x03, 0x03, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x04,
+	0x03, 0x55, 0x04, 0x03, 0x00, 0x1F, 0x03, 0xFC, 0x07, 0x7F, 0x0B, 0x5E,
+	0x0F, 0x1F, 0x12, 0xFE, 0x16, 0xBF, 0x1A, 0x9F, 0x1E, 0x7E, 0x22, 0x3F,
+	0x26, 0x1E, 0x29, 0xDF, 0x00, 0x1F, 0x03, 0xFD, 0x07, 0x9F, 0x0B, 0x7E,
+	0x0F, 0x3F, 0x13, 0x1E, 0x16, 0xDF, 0x1A, 0xBF, 0x1E, 0x9E, 0x22, 0x5F,
+	0x26, 0x3E, 0x29, 0xFF, 0x03, 0x55, 0x1D, 0x13, 0x03, 0x55, 0x1D, 0x0F,
+	0x03, 0x55, 0x1D, 0x11, 0x03, 0x55, 0x1D, 0x20, 0x08, 0x2B, 0x06, 0x01,
+	0x05, 0x05, 0x07, 0x02, 0x01, 0x03, 0x55, 0x1D, 0x23, 0x03, 0x55, 0x1D,
+	0x0E, 0x03, 0x55, 0x1D, 0x12, 0x03, 0x55, 0x1D, 0x09, 0x03, 0x55, 0x1D,
+	0x1F, 0x03, 0x55, 0x1D, 0x2E, 0x08, 0x2B, 0x06, 0x01, 0x05, 0x05, 0x07,
+	0x01, 0x01, 0x08, 0x2B, 0x06, 0x01, 0x05, 0x05, 0x07, 0x01, 0x0B
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x00, 0x0D, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x01,
+	0x00, 0x11, 0x00, 0x00, 0x01, 0x01, 0x09, 0x00, 0x00, 0x01, 0x01, 0x0A,
+	0x00, 0x00, 0x25, 0x25, 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_BOOLEAN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_DN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_SERVER_NAME), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TIME), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_CRITICAL_EXTENSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_DN_MISMATCH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXPIRED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_FORBIDDEN_KEY_USAGE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CA), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_PARTIAL_BYTE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_WEAK_PUBLIC_KEY), 0x00, 0x00, 0x01,
+	T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_length)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_hash_len)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_hash_oid)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_len)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, cert_signer_key_type)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, current_dn_hash)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, key_usages)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_x509_minimal_context, pkey_data)), 0x01,
+	T0_INT2(BR_X509_BUFSIZE_KEY), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, min_rsa_size)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, next_dn_hash)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, num_certs)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, pad)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, saved_dn_hash)), 0x00, 0x00, 0x01, 0x80,
+	0x73, 0x00, 0x00, 0x01, 0x80, 0x7C, 0x00, 0x00, 0x01, 0x81, 0x02, 0x00,
+	0x00, 0x8F, 0x05, 0x05, 0x33, 0x41, 0x01, 0x00, 0x00, 0x33, 0x01, 0x0A,
+	0x0E, 0x09, 0x01, 0x9A, 0xFF, 0xB8, 0x00, 0x0A, 0x00, 0x00, 0x01, 0x82,
+	0x19, 0x00, 0x00, 0x01, 0x82, 0x01, 0x00, 0x00, 0x01, 0x81, 0x68, 0x00,
+	0x02, 0x03, 0x00, 0x03, 0x01, 0x26, 0x02, 0x01, 0x13, 0x3A, 0x02, 0x00,
+	0x0F, 0x15, 0x00, 0x00, 0x01, 0x81, 0x74, 0x00, 0x00, 0x05, 0x02, 0x51,
+	0x29, 0x00, 0x00, 0x06, 0x02, 0x52, 0x29, 0x00, 0x00, 0x01, 0x10, 0x74,
+	0x00, 0x00, 0x11, 0x05, 0x02, 0x55, 0x29, 0x71, 0x00, 0x00, 0x11, 0x05,
+	0x02, 0x55, 0x29, 0x72, 0x00, 0x00, 0x06, 0x02, 0x4B, 0x29, 0x00, 0x00,
+	0x01, 0x82, 0x11, 0x00, 0x00, 0x26, 0x21, 0x01, 0x08, 0x0E, 0x3A, 0x3F,
+	0x21, 0x09, 0x00, 0x0B, 0x03, 0x00, 0x5A, 0x2B, 0xAC, 0x38, 0xAC, 0xB0,
+	0x26, 0x01, 0x20, 0x11, 0x06, 0x11, 0x25, 0x71, 0xAA, 0xB0, 0x01, 0x02,
+	0x75, 0xAD, 0x01, 0x02, 0x12, 0x06, 0x02, 0x56, 0x29, 0x76, 0xB0, 0x01,
+	0x02, 0x75, 0xAB, 0xAC, 0xBF, 0x99, 0x64, 0x60, 0x22, 0x16, 0xAC, 0xA4,
+	0x03, 0x01, 0x03, 0x02, 0xA4, 0x02, 0x02, 0x02, 0x01, 0x19, 0x06, 0x02,
+	0x4A, 0x29, 0x76, 0x02, 0x00, 0x06, 0x05, 0x9A, 0x03, 0x03, 0x04, 0x09,
+	0x99, 0x60, 0x67, 0x22, 0x28, 0x05, 0x02, 0x49, 0x29, 0x67, 0x64, 0x22,
+	0x16, 0xAC, 0xAC, 0x9B, 0x05, 0x02, 0x56, 0x29, 0xB9, 0x27, 0x06, 0x27,
+	0xBF, 0xA1, 0xAC, 0x62, 0xA7, 0x03, 0x05, 0x62, 0x3A, 0x02, 0x05, 0x09,
+	0x3A, 0x02, 0x05, 0x0A, 0xA7, 0x03, 0x06, 0x76, 0x63, 0x2A, 0x01, 0x81,
+	0x00, 0x09, 0x02, 0x05, 0x12, 0x06, 0x02, 0x57, 0x29, 0x76, 0x59, 0x03,
+	0x04, 0x04, 0x3A, 0x85, 0x27, 0x06, 0x34, 0x9B, 0x05, 0x02, 0x56, 0x29,
+	0x68, 0x27, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x69, 0x27, 0x06, 0x04,
+	0x01, 0x18, 0x04, 0x0A, 0x6A, 0x27, 0x06, 0x04, 0x01, 0x19, 0x04, 0x02,
+	0x56, 0x29, 0x03, 0x07, 0x76, 0xA1, 0x26, 0x03, 0x08, 0x26, 0x62, 0x33,
+	0x0D, 0x06, 0x02, 0x4F, 0x29, 0xA2, 0x58, 0x03, 0x04, 0x04, 0x02, 0x56,
+	0x29, 0x76, 0x02, 0x00, 0x06, 0x21, 0x02, 0x04, 0x59, 0x30, 0x11, 0x06,
+	0x08, 0x25, 0x02, 0x05, 0x02, 0x06, 0x1E, 0x04, 0x10, 0x58, 0x30, 0x11,
+	0x06, 0x08, 0x25, 0x02, 0x07, 0x02, 0x08, 0x1D, 0x04, 0x03, 0x56, 0x29,
+	0x25, 0x04, 0x24, 0x02, 0x04, 0x59, 0x30, 0x11, 0x06, 0x08, 0x25, 0x02,
+	0x05, 0x02, 0x06, 0x24, 0x04, 0x10, 0x58, 0x30, 0x11, 0x06, 0x08, 0x25,
+	0x02, 0x07, 0x02, 0x08, 0x23, 0x04, 0x03, 0x56, 0x29, 0x25, 0x26, 0x06,
+	0x01, 0x29, 0x25, 0x01, 0x00, 0x03, 0x09, 0xB1, 0x01, 0x21, 0x8C, 0x01,
+	0x22, 0x8C, 0x26, 0x01, 0x23, 0x11, 0x06, 0x81, 0x26, 0x25, 0x71, 0xAA,
+	0xAC, 0x26, 0x06, 0x81, 0x1A, 0x01, 0x00, 0x03, 0x0A, 0xAC, 0x9B, 0x25,
+	0xB0, 0x26, 0x01, 0x01, 0x11, 0x06, 0x04, 0xA3, 0x03, 0x0A, 0xB0, 0x01,
+	0x04, 0x75, 0xAA, 0x6E, 0x27, 0x06, 0x0F, 0x02, 0x00, 0x06, 0x03, 0xC0,
+	0x04, 0x05, 0x96, 0x01, 0x7F, 0x03, 0x09, 0x04, 0x80, 0x6C, 0x8E, 0x27,
+	0x06, 0x06, 0x02, 0x00, 0x98, 0x04, 0x80, 0x62, 0xC2, 0x27, 0x06, 0x11,
+	0x02, 0x00, 0x06, 0x09, 0x01, 0x00, 0x03, 0x03, 0x95, 0x03, 0x03, 0x04,
+	0x01, 0xC0, 0x04, 0x80, 0x4D, 0x70, 0x27, 0x06, 0x0A, 0x02, 0x0A, 0x06,
+	0x03, 0x97, 0x04, 0x01, 0xC0, 0x04, 0x3F, 0x6D, 0x27, 0x06, 0x03, 0xC0,
+	0x04, 0x38, 0xC5, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x31, 0x8D, 0x27, 0x06,
+	0x03, 0xC0, 0x04, 0x2A, 0xC3, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x23, 0x77,
+	0x27, 0x06, 0x03, 0xC0, 0x04, 0x1C, 0x82, 0x27, 0x06, 0x03, 0xC0, 0x04,
+	0x15, 0x6C, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x0E, 0xC4, 0x27, 0x06, 0x03,
+	0xC0, 0x04, 0x07, 0x02, 0x0A, 0x06, 0x02, 0x48, 0x29, 0xC0, 0x76, 0x76,
+	0x04, 0xFE, 0x62, 0x76, 0x76, 0x04, 0x08, 0x01, 0x7F, 0x11, 0x05, 0x02,
+	0x55, 0x29, 0x25, 0x76, 0x39, 0x02, 0x00, 0x06, 0x08, 0x02, 0x03, 0x3B,
+	0x2F, 0x05, 0x02, 0x44, 0x29, 0x02, 0x00, 0x06, 0x01, 0x17, 0x02, 0x00,
+	0x02, 0x09, 0x2F, 0x05, 0x02, 0x50, 0x29, 0xB0, 0x73, 0xAA, 0x9B, 0x06,
+	0x80, 0x77, 0xBA, 0x27, 0x06, 0x07, 0x01, 0x02, 0x59, 0x87, 0x04, 0x80,
+	0x5E, 0xBB, 0x27, 0x06, 0x07, 0x01, 0x03, 0x59, 0x88, 0x04, 0x80, 0x53,
+	0xBC, 0x27, 0x06, 0x07, 0x01, 0x04, 0x59, 0x89, 0x04, 0x80, 0x48, 0xBD,
+	0x27, 0x06, 0x06, 0x01, 0x05, 0x59, 0x8A, 0x04, 0x3E, 0xBE, 0x27, 0x06,
+	0x06, 0x01, 0x06, 0x59, 0x8B, 0x04, 0x34, 0x7C, 0x27, 0x06, 0x06, 0x01,
+	0x02, 0x58, 0x87, 0x04, 0x2A, 0x7D, 0x27, 0x06, 0x06, 0x01, 0x03, 0x58,
+	0x88, 0x04, 0x20, 0x7E, 0x27, 0x06, 0x06, 0x01, 0x04, 0x58, 0x89, 0x04,
+	0x16, 0x7F, 0x27, 0x06, 0x06, 0x01, 0x05, 0x58, 0x8A, 0x04, 0x0C, 0x80,
+	0x27, 0x06, 0x06, 0x01, 0x06, 0x58, 0x8B, 0x04, 0x02, 0x56, 0x29, 0x5D,
+	0x34, 0x5F, 0x36, 0x1C, 0x26, 0x05, 0x02, 0x56, 0x29, 0x5C, 0x36, 0x04,
+	0x02, 0x56, 0x29, 0xBF, 0xA1, 0x26, 0x01, T0_INT2(BR_X509_BUFSIZE_SIG),
+	0x12, 0x06, 0x02, 0x4F, 0x29, 0x26, 0x5E, 0x34, 0x5B, 0xA2, 0x76, 0x76,
+	0x01, 0x00, 0x5A, 0x35, 0x18, 0x00, 0x00, 0x01, 0x30, 0x0A, 0x26, 0x01,
+	0x00, 0x01, 0x09, 0x6F, 0x05, 0x02, 0x47, 0x29, 0x00, 0x00, 0x30, 0x30,
+	0x00, 0x00, 0x01, 0x81, 0x08, 0x00, 0x00, 0x01, 0x81, 0x10, 0x00, 0x00,
+	0x01, 0x81, 0x19, 0x00, 0x00, 0x01, 0x81, 0x22, 0x00, 0x00, 0x01, 0x81,
+	0x2B, 0x00, 0x01, 0x7B, 0x01, 0x01, 0x11, 0x3A, 0x01, 0x83, 0xFD, 0x7F,
+	0x11, 0x15, 0x06, 0x03, 0x3A, 0x25, 0x00, 0x3A, 0x26, 0x03, 0x00, 0x26,
+	0xC6, 0x05, 0x04, 0x41, 0x01, 0x00, 0x00, 0x26, 0x01, 0x81, 0x00, 0x0D,
+	0x06, 0x04, 0x93, 0x04, 0x80, 0x49, 0x26, 0x01, 0x90, 0x00, 0x0D, 0x06,
+	0x0F, 0x01, 0x06, 0x14, 0x01, 0x81, 0x40, 0x2F, 0x93, 0x02, 0x00, 0x01,
+	0x00, 0x94, 0x04, 0x33, 0x26, 0x01, 0x83, 0xFF, 0x7F, 0x0D, 0x06, 0x14,
+	0x01, 0x0C, 0x14, 0x01, 0x81, 0x60, 0x2F, 0x93, 0x02, 0x00, 0x01, 0x06,
+	0x94, 0x02, 0x00, 0x01, 0x00, 0x94, 0x04, 0x17, 0x01, 0x12, 0x14, 0x01,
+	0x81, 0x70, 0x2F, 0x93, 0x02, 0x00, 0x01, 0x0C, 0x94, 0x02, 0x00, 0x01,
+	0x06, 0x94, 0x02, 0x00, 0x01, 0x00, 0x94, 0x00, 0x00, 0x01, 0x82, 0x15,
+	0x00, 0x00, 0x26, 0x01, 0x83, 0xB0, 0x00, 0x01, 0x83, 0xB7, 0x7F, 0x6F,
+	0x00, 0x00, 0x01, 0x81, 0x34, 0x00, 0x00, 0x01, 0x80, 0x6B, 0x00, 0x00,
+	0x01, 0x81, 0x78, 0x00, 0x00, 0x01, 0x3D, 0x00, 0x00, 0x01, 0x80, 0x43,
+	0x00, 0x00, 0x01, 0x80, 0x4D, 0x00, 0x00, 0x01, 0x80, 0x57, 0x00, 0x00,
+	0x01, 0x80, 0x61, 0x00, 0x00, 0x30, 0x11, 0x06, 0x04, 0x41, 0xAA, 0xBF,
+	0xB1, 0x00, 0x00, 0x01, 0x82, 0x09, 0x00, 0x00, 0x01, 0x81, 0x6C, 0x00,
+	0x00, 0x26, 0x01, 0x83, 0xB8, 0x00, 0x01, 0x83, 0xBF, 0x7F, 0x6F, 0x00,
+	0x00, 0x01, 0x30, 0x61, 0x36, 0x01, 0x7F, 0x79, 0x1A, 0x01, 0x00, 0x79,
+	0x1A, 0x04, 0x7A, 0x00, 0x01, 0x81, 0x38, 0x00, 0x01, 0x7B, 0x0D, 0x06,
+	0x02, 0x4E, 0x29, 0x26, 0x03, 0x00, 0x0A, 0x02, 0x00, 0x00, 0x00, 0x30,
+	0x26, 0x3E, 0x3A, 0x01, 0x82, 0x00, 0x13, 0x2F, 0x06, 0x04, 0x41, 0x01,
+	0x00, 0x00, 0x30, 0x66, 0x09, 0x36, 0x3F, 0x00, 0x00, 0x14, 0x01, 0x3F,
+	0x15, 0x01, 0x81, 0x00, 0x2F, 0x93, 0x00, 0x02, 0x01, 0x00, 0x03, 0x00,
+	0xAC, 0x26, 0x06, 0x80, 0x59, 0xB0, 0x01, 0x20, 0x30, 0x11, 0x06, 0x17,
+	0x25, 0x71, 0xAA, 0x9B, 0x25, 0x01, 0x7F, 0x2E, 0x03, 0x01, 0xB0, 0x01,
+	0x20, 0x74, 0xAA, 0xAF, 0x02, 0x01, 0x20, 0x76, 0x76, 0x04, 0x38, 0x01,
+	0x21, 0x30, 0x11, 0x06, 0x08, 0x25, 0x72, 0xB3, 0x01, 0x01, 0x1F, 0x04,
+	0x2A, 0x01, 0x22, 0x30, 0x11, 0x06, 0x11, 0x25, 0x72, 0xB3, 0x26, 0x06,
+	0x06, 0x2C, 0x02, 0x00, 0x2F, 0x03, 0x00, 0x01, 0x02, 0x1F, 0x04, 0x13,
+	0x01, 0x26, 0x30, 0x11, 0x06, 0x08, 0x25, 0x72, 0xB3, 0x01, 0x06, 0x1F,
+	0x04, 0x05, 0x41, 0xAB, 0x01, 0x00, 0x25, 0x04, 0xFF, 0x23, 0x76, 0x02,
+	0x00, 0x00, 0x00, 0xAC, 0xB1, 0x26, 0x01, 0x01, 0x11, 0x06, 0x08, 0xA3,
+	0x05, 0x02, 0x50, 0x29, 0xB1, 0x04, 0x02, 0x50, 0x29, 0x26, 0x01, 0x02,
+	0x11, 0x06, 0x0C, 0x25, 0x72, 0xAD, 0x65, 0x2B, 0x40, 0x0D, 0x06, 0x02,
+	0x50, 0x29, 0xB1, 0x01, 0x7F, 0x10, 0x06, 0x02, 0x55, 0x29, 0x25, 0x76,
+	0x00, 0x00, 0xAC, 0x26, 0x06, 0x1A, 0xAC, 0x9B, 0x25, 0x26, 0x06, 0x11,
+	0xAC, 0x26, 0x06, 0x0C, 0xAC, 0x9B, 0x25, 0x86, 0x27, 0x05, 0x02, 0x48,
+	0x29, 0xBF, 0x04, 0x71, 0x76, 0x76, 0x04, 0x63, 0x76, 0x00, 0x02, 0x03,
+	0x00, 0xB0, 0x01, 0x03, 0x75, 0xAA, 0xB7, 0x03, 0x01, 0x02, 0x01, 0x01,
+	0x07, 0x12, 0x06, 0x02, 0x55, 0x29, 0x26, 0x01, 0x00, 0x30, 0x11, 0x06,
+	0x05, 0x25, 0x4C, 0x29, 0x04, 0x15, 0x01, 0x01, 0x30, 0x11, 0x06, 0x0A,
+	0x25, 0xB7, 0x02, 0x01, 0x14, 0x02, 0x01, 0x0E, 0x04, 0x05, 0x25, 0xB7,
+	0x01, 0x00, 0x25, 0x02, 0x00, 0x06, 0x19, 0x01, 0x00, 0x30, 0x01, 0x38,
+	0x15, 0x06, 0x03, 0x01, 0x10, 0x2F, 0x3A, 0x01, 0x81, 0x40, 0x15, 0x06,
+	0x03, 0x01, 0x20, 0x2F, 0x61, 0x36, 0x04, 0x07, 0x01, 0x04, 0x15, 0x05,
+	0x02, 0x4C, 0x29, 0xBF, 0x00, 0x00, 0x37, 0xAC, 0xBF, 0x1B, 0x00, 0x03,
+	0x01, 0x00, 0x03, 0x00, 0x37, 0xAC, 0x26, 0x06, 0x30, 0xB0, 0x01, 0x11,
+	0x74, 0xAA, 0x26, 0x05, 0x02, 0x43, 0x29, 0x26, 0x06, 0x20, 0xAC, 0x9B,
+	0x25, 0x84, 0x27, 0x03, 0x01, 0x01, 0x00, 0x2E, 0x03, 0x02, 0xAF, 0x26,
+	0x02, 0x01, 0x15, 0x06, 0x07, 0x2C, 0x06, 0x04, 0x01, 0x7F, 0x03, 0x00,
+	0x02, 0x02, 0x20, 0x76, 0x04, 0x5D, 0x76, 0x04, 0x4D, 0x76, 0x1B, 0x02,
+	0x00, 0x00, 0x00, 0xB0, 0x01, 0x06, 0x75, 0xAE, 0x00, 0x00, 0xB5, 0x83,
+	0x06, 0x0E, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, 0x00, 0x01, 0x00, 0x00,
+	0xB5, 0x6B, 0x04, 0x08, 0x8F, 0x06, 0x05, 0x25, 0x01, 0x00, 0x04, 0x00,
+	0x00, 0x00, 0xB6, 0x83, 0x06, 0x0E, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01,
+	0x00, 0x01, 0x00, 0x00, 0xB6, 0x6B, 0x04, 0x08, 0x8F, 0x06, 0x05, 0x25,
+	0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0xB7, 0x26, 0x01, 0x81, 0x00, 0x0D,
+	0x06, 0x04, 0x00, 0x04, 0x80, 0x55, 0x26, 0x01, 0x81, 0x40, 0x0D, 0x06,
+	0x07, 0x25, 0x01, 0x00, 0x00, 0x04, 0x80, 0x47, 0x26, 0x01, 0x81, 0x60,
+	0x0D, 0x06, 0x0E, 0x01, 0x1F, 0x15, 0x01, 0x01, 0xA0, 0x01, 0x81, 0x00,
+	0x01, 0x8F, 0x7F, 0x04, 0x32, 0x26, 0x01, 0x81, 0x70, 0x0D, 0x06, 0x0F,
+	0x01, 0x0F, 0x15, 0x01, 0x02, 0xA0, 0x01, 0x90, 0x00, 0x01, 0x83, 0xFF,
+	0x7F, 0x04, 0x1C, 0x26, 0x01, 0x81, 0x78, 0x0D, 0x06, 0x11, 0x01, 0x07,
+	0x15, 0x01, 0x03, 0xA0, 0x01, 0x84, 0x80, 0x00, 0x01, 0x80, 0xC3, 0xFF,
+	0x7F, 0x04, 0x04, 0x25, 0x01, 0x00, 0x00, 0x6F, 0x05, 0x03, 0x25, 0x01,
+	0x00, 0x00, 0x00, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, 0x00, 0x01, 0x7F,
+	0x00, 0xB7, 0x33, 0x26, 0x3C, 0x06, 0x03, 0x3A, 0x25, 0x00, 0x01, 0x06,
+	0x0E, 0x3A, 0x26, 0x01, 0x06, 0x14, 0x01, 0x02, 0x10, 0x06, 0x04, 0x41,
+	0x01, 0x7F, 0x00, 0x01, 0x3F, 0x15, 0x09, 0x00, 0x00, 0x26, 0x06, 0x06,
+	0x0B, 0x9F, 0x33, 0x40, 0x04, 0x77, 0x25, 0x26, 0x00, 0x00, 0xB0, 0x01,
+	0x03, 0x75, 0xAA, 0xB7, 0x06, 0x02, 0x54, 0x29, 0x00, 0x00, 0x3A, 0x26,
+	0x06, 0x07, 0x31, 0x26, 0x06, 0x01, 0x1A, 0x04, 0x76, 0x41, 0x00, 0x00,
+	0x01, 0x01, 0x75, 0xA9, 0x01, 0x01, 0x10, 0x06, 0x02, 0x42, 0x29, 0xB7,
+	0x3D, 0x00, 0x04, 0xB0, 0x26, 0x01, 0x17, 0x01, 0x18, 0x6F, 0x05, 0x02,
+	0x47, 0x29, 0x01, 0x18, 0x11, 0x03, 0x00, 0x72, 0xAA, 0xA5, 0x02, 0x00,
+	0x06, 0x0C, 0x01, 0x80, 0x64, 0x08, 0x03, 0x01, 0xA5, 0x02, 0x01, 0x09,
+	0x04, 0x0E, 0x26, 0x01, 0x32, 0x0D, 0x06, 0x04, 0x01, 0x80, 0x64, 0x09,
+	0x01, 0x8E, 0x6C, 0x09, 0x03, 0x01, 0x02, 0x01, 0x01, 0x82, 0x6D, 0x08,
+	0x02, 0x01, 0x01, 0x03, 0x09, 0x01, 0x04, 0x0C, 0x09, 0x02, 0x01, 0x01,
+	0x80, 0x63, 0x09, 0x01, 0x80, 0x64, 0x0C, 0x0A, 0x02, 0x01, 0x01, 0x83,
+	0x0F, 0x09, 0x01, 0x83, 0x10, 0x0C, 0x09, 0x03, 0x03, 0x01, 0x01, 0x01,
+	0x0C, 0xA6, 0x40, 0x01, 0x01, 0x0E, 0x02, 0x01, 0x01, 0x04, 0x07, 0x3E,
+	0x02, 0x01, 0x01, 0x80, 0x64, 0x07, 0x3D, 0x02, 0x01, 0x01, 0x83, 0x10,
+	0x07, 0x3E, 0x2F, 0x15, 0x06, 0x03, 0x01, 0x18, 0x09, 0x91, 0x09, 0x78,
+	0x26, 0x01, 0x05, 0x14, 0x02, 0x03, 0x09, 0x03, 0x03, 0x01, 0x1F, 0x15,
+	0x01, 0x01, 0x3A, 0xA6, 0x02, 0x03, 0x09, 0x40, 0x03, 0x03, 0x01, 0x00,
+	0x01, 0x17, 0xA6, 0x01, 0x9C, 0x10, 0x08, 0x03, 0x02, 0x01, 0x00, 0x01,
+	0x3B, 0xA6, 0x01, 0x3C, 0x08, 0x02, 0x02, 0x09, 0x03, 0x02, 0x01, 0x00,
+	0x01, 0x3C, 0xA6, 0x02, 0x02, 0x09, 0x03, 0x02, 0xB7, 0x26, 0x01, 0x2E,
+	0x11, 0x06, 0x0D, 0x25, 0xB7, 0x26, 0x01, 0x30, 0x01, 0x39, 0x6F, 0x06,
+	0x03, 0x25, 0x04, 0x74, 0x01, 0x80, 0x5A, 0x10, 0x06, 0x02, 0x47, 0x29,
+	0x76, 0x02, 0x03, 0x02, 0x02, 0x00, 0x01, 0xB7, 0x7A, 0x01, 0x0A, 0x08,
+	0x03, 0x00, 0xB7, 0x7A, 0x02, 0x00, 0x09, 0x00, 0x02, 0x03, 0x00, 0x03,
+	0x01, 0xA5, 0x26, 0x02, 0x01, 0x02, 0x00, 0x6F, 0x05, 0x02, 0x47, 0x29,
+	0x00, 0x00, 0x33, 0xB0, 0x01, 0x02, 0x75, 0x0B, 0xA8, 0x00, 0x03, 0x26,
+	0x03, 0x00, 0x03, 0x01, 0x03, 0x02, 0xAA, 0xB7, 0x26, 0x01, 0x81, 0x00,
+	0x13, 0x06, 0x02, 0x53, 0x29, 0x26, 0x01, 0x00, 0x11, 0x06, 0x0B, 0x25,
+	0x26, 0x05, 0x04, 0x25, 0x01, 0x00, 0x00, 0xB7, 0x04, 0x6F, 0x02, 0x01,
+	0x26, 0x05, 0x02, 0x4F, 0x29, 0x40, 0x03, 0x01, 0x02, 0x02, 0x36, 0x02,
+	0x02, 0x3F, 0x03, 0x02, 0x26, 0x06, 0x03, 0xB7, 0x04, 0x68, 0x25, 0x02,
+	0x00, 0x02, 0x01, 0x0A, 0x00, 0x01, 0xB7, 0x26, 0x01, 0x81, 0x00, 0x0D,
+	0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x0A, 0x26, 0x05, 0x02, 0x4D, 0x29,
+	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x12, 0x06, 0x19, 0x02,
+	0x00, 0x40, 0x03, 0x00, 0x26, 0x01, 0x83, 0xFF, 0xFF, 0x7F, 0x12, 0x06,
+	0x02, 0x4E, 0x29, 0x01, 0x08, 0x0E, 0x3A, 0xB7, 0x33, 0x09, 0x04, 0x60,
+	0x00, 0x00, 0xA9, 0x92, 0x00, 0x00, 0xAA, 0xBF, 0x00, 0x00, 0xB0, 0x73,
+	0xAA, 0x00, 0x01, 0xAA, 0x26, 0x05, 0x02, 0x53, 0x29, 0xB7, 0x26, 0x01,
+	0x81, 0x00, 0x13, 0x06, 0x02, 0x53, 0x29, 0x03, 0x00, 0x26, 0x06, 0x16,
+	0xB7, 0x02, 0x00, 0x26, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x13, 0x06, 0x02,
+	0x53, 0x29, 0x01, 0x08, 0x0E, 0x09, 0x03, 0x00, 0x04, 0x67, 0x25, 0x02,
+	0x00, 0x00, 0x00, 0xAA, 0x26, 0x01, 0x81, 0x7F, 0x12, 0x06, 0x08, 0xBF,
+	0x01, 0x00, 0x66, 0x36, 0x01, 0x00, 0x00, 0x26, 0x66, 0x36, 0x66, 0x3F,
+	0xA2, 0x01, 0x7F, 0x00, 0x00, 0xB0, 0x01, 0x0C, 0x30, 0x11, 0x06, 0x05,
+	0x25, 0x72, 0xB3, 0x04, 0x3E, 0x01, 0x12, 0x30, 0x11, 0x06, 0x05, 0x25,
+	0x72, 0xB4, 0x04, 0x33, 0x01, 0x13, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72,
+	0xB4, 0x04, 0x28, 0x01, 0x14, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB4,
+	0x04, 0x1D, 0x01, 0x16, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB4, 0x04,
+	0x12, 0x01, 0x1E, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB2, 0x04, 0x07,
+	0x41, 0xAB, 0x01, 0x00, 0x01, 0x00, 0x25, 0x00, 0x01, 0xB7, 0x03, 0x00,
+	0x02, 0x00, 0x01, 0x05, 0x14, 0x01, 0x01, 0x15, 0x2D, 0x02, 0x00, 0x01,
+	0x06, 0x14, 0x26, 0x01, 0x01, 0x15, 0x06, 0x02, 0x45, 0x29, 0x01, 0x04,
+	0x0E, 0x02, 0x00, 0x01, 0x1F, 0x15, 0x26, 0x01, 0x1F, 0x11, 0x06, 0x02,
+	0x46, 0x29, 0x09, 0x00, 0x00, 0x26, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F,
+	0x00, 0xB0, 0x00, 0x01, 0xAA, 0x26, 0x05, 0x05, 0x66, 0x36, 0x01, 0x7F,
+	0x00, 0x01, 0x01, 0x03, 0x00, 0x9C, 0x26, 0x01, 0x83, 0xFF, 0x7E, 0x11,
+	0x06, 0x16, 0x25, 0x26, 0x06, 0x10, 0x9D, 0x26, 0x05, 0x05, 0x25, 0xBF,
+	0x01, 0x00, 0x00, 0x02, 0x00, 0x81, 0x03, 0x00, 0x04, 0x6D, 0x04, 0x1B,
+	0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, 0x02, 0x00, 0x81, 0x03,
+	0x00, 0x26, 0x06, 0x0B, 0x9C, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00,
+	0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, 0x01, 0x00, 0x40, 0x66,
+	0x36, 0x01, 0x7F, 0x00, 0x01, 0xAA, 0x01, 0x01, 0x03, 0x00, 0x26, 0x06,
+	0x10, 0x9E, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, 0x02, 0x00,
+	0x81, 0x03, 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, 0x01, 0x00,
+	0x40, 0x66, 0x36, 0x01, 0x7F, 0x00, 0x01, 0xAA, 0x01, 0x01, 0x03, 0x00,
+	0x26, 0x06, 0x10, 0xB7, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00,
+	0x02, 0x00, 0x81, 0x03, 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05,
+	0x01, 0x00, 0x40, 0x66, 0x36, 0x01, 0x7F, 0x00, 0x00, 0xB7, 0x01, 0x08,
+	0x0E, 0x3A, 0xB7, 0x33, 0x09, 0x00, 0x00, 0xB7, 0x3A, 0xB7, 0x01, 0x08,
+	0x0E, 0x33, 0x09, 0x00, 0x00, 0x26, 0x05, 0x02, 0x4E, 0x29, 0x40, 0xB8,
+	0x00, 0x00, 0x32, 0x26, 0x01, 0x00, 0x13, 0x06, 0x01, 0x00, 0x25, 0x1A,
+	0x04, 0x74, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x0B, 0x00, 0x00, 0x01,
+	0x15, 0x00, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x01,
+	0x33, 0x00, 0x00, 0xC0, 0x25, 0x00, 0x00, 0x26, 0x06, 0x07, 0xC1, 0x26,
+	0x06, 0x01, 0x1A, 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x30, 0x31, 0x0B,
+	0x41, 0x00, 0x00, 0x01, 0x81, 0x70, 0x00, 0x00, 0x01, 0x82, 0x0D, 0x00,
+	0x00, 0x01, 0x82, 0x22, 0x00, 0x00, 0x01, 0x82, 0x05, 0x00, 0x00, 0x26,
+	0x01, 0x83, 0xFB, 0x50, 0x01, 0x83, 0xFB, 0x6F, 0x6F, 0x06, 0x04, 0x25,
+	0x01, 0x00, 0x00, 0x26, 0x01, 0x83, 0xB0, 0x00, 0x01, 0x83, 0xBF, 0x7F,
+	0x6F, 0x06, 0x04, 0x25, 0x01, 0x00, 0x00, 0x01, 0x83, 0xFF, 0x7F, 0x15,
+	0x01, 0x83, 0xFF, 0x7E, 0x0D, 0x00
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	15,
+	20,
+	25,
+	29,
+	33,
+	37,
+	41,
+	45,
+	49,
+	53,
+	57,
+	61,
+	65,
+	69,
+	73,
+	77,
+	81,
+	85,
+	89,
+	93,
+	97,
+	101,
+	105,
+	109,
+	113,
+	117,
+	121,
+	125,
+	130,
+	135,
+	140,
+	145,
+	150,
+	155,
+	160,
+	165,
+	173,
+	178,
+	183,
+	188,
+	193,
+	198,
+	203,
+	208,
+	213,
+	234,
+	239,
+	244,
+	249,
+	264,
+	269,
+	275,
+	281,
+	286,
+	294,
+	302,
+	308,
+	313,
+	324,
+	960,
+	975,
+	979,
+	984,
+	989,
+	994,
+	999,
+	1004,
+	1118,
+	1123,
+	1135,
+	1140,
+	1145,
+	1150,
+	1154,
+	1159,
+	1164,
+	1169,
+	1174,
+	1184,
+	1189,
+	1194,
+	1206,
+	1221,
+	1226,
+	1240,
+	1262,
+	1273,
+	1376,
+	1423,
+	1456,
+	1547,
+	1553,
+	1616,
+	1623,
+	1651,
+	1679,
+	1784,
+	1826,
+	1839,
+	1851,
+	1865,
+	1880,
+	2100,
+	2114,
+	2131,
+	2140,
+	2207,
+	2263,
+	2267,
+	2271,
+	2276,
+	2324,
+	2350,
+	2426,
+	2470,
+	2481,
+	2566,
+	2604,
+	2642,
+	2652,
+	2662,
+	2671,
+	2684,
+	2688,
+	2692,
+	2696,
+	2700,
+	2704,
+	2708,
+	2712,
+	2724,
+	2732,
+	2737,
+	2742,
+	2747,
+	2752
+};
+
+#define T0_INTERPRETED   60
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_x509_minimal_init_main, 144)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_x509_minimal_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* %25 */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a % b);
+
+				}
+				break;
+			case 8: {
+				/* * */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a * b);
+
+				}
+				break;
+			case 9: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 10: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 11: {
+				/* -rot */
+ T0_NROT(); 
+				}
+				break;
+			case 12: {
+				/* / */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a / b);
+
+				}
+				break;
+			case 13: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 14: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 15: {
+				/* <= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a <= b));
+
+				}
+				break;
+			case 16: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 17: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 18: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 19: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 20: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 21: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 22: {
+				/* blobcopy */
+
+	size_t len = T0_POP();
+	unsigned char *src = (unsigned char *)CTX + T0_POP();
+	unsigned char *dst = (unsigned char *)CTX + T0_POP();
+	memcpy(dst, src, len);
+
+				}
+				break;
+			case 23: {
+				/* check-direct-trust */
+
+	size_t u;
+
+	for (u = 0; u < CTX->trust_anchors_num; u ++) {
+		const br_x509_trust_anchor *ta;
+		unsigned char hashed_DN[64];
+		int kt;
+
+		ta = &CTX->trust_anchors[u];
+		if (ta->flags & BR_X509_TA_CA) {
+			continue;
+		}
+		hash_dn(CTX, ta->dn.data, ta->dn.len, hashed_DN);
+		if (memcmp(hashed_DN, CTX->current_dn_hash, DNHASH_LEN)) {
+			continue;
+		}
+		kt = CTX->pkey.key_type;
+		if ((ta->pkey.key_type & 0x0F) != kt) {
+			continue;
+		}
+		switch (kt) {
+
+		case BR_KEYTYPE_RSA:
+			if (!eqbigint(CTX->pkey.key.rsa.n,
+				CTX->pkey.key.rsa.nlen,
+				ta->pkey.key.rsa.n,
+				ta->pkey.key.rsa.nlen)
+				|| !eqbigint(CTX->pkey.key.rsa.e,
+				CTX->pkey.key.rsa.elen,
+				ta->pkey.key.rsa.e,
+				ta->pkey.key.rsa.elen))
+			{
+				continue;
+			}
+			break;
+
+		case BR_KEYTYPE_EC:
+			if (CTX->pkey.key.ec.curve != ta->pkey.key.ec.curve
+				|| CTX->pkey.key.ec.qlen != ta->pkey.key.ec.qlen
+				|| memcmp(CTX->pkey.key.ec.q,
+					ta->pkey.key.ec.q,
+					ta->pkey.key.ec.qlen) != 0)
+			{
+				continue;
+			}
+			break;
+
+		default:
+			continue;
+		}
+
+		/*
+		 * Direct trust match!
+		 */
+		CTX->err = BR_ERR_X509_OK;
+		T0_CO();
+	}
+
+				}
+				break;
+			case 24: {
+				/* check-trust-anchor-CA */
+
+	size_t u;
+
+	for (u = 0; u < CTX->trust_anchors_num; u ++) {
+		const br_x509_trust_anchor *ta;
+		unsigned char hashed_DN[64];
+
+		ta = &CTX->trust_anchors[u];
+		if (!(ta->flags & BR_X509_TA_CA)) {
+			continue;
+		}
+		hash_dn(CTX, ta->dn.data, ta->dn.len, hashed_DN);
+		if (memcmp(hashed_DN, CTX->saved_dn_hash, DNHASH_LEN)) {
+			continue;
+		}
+		if (verify_signature(CTX, &ta->pkey) == 0) {
+			CTX->err = BR_ERR_X509_OK;
+			T0_CO();
+		}
+	}
+
+				}
+				break;
+			case 25: {
+				/* check-validity-range */
+
+	uint32_t nbs = T0_POP();
+	uint32_t nbd = T0_POP();
+	uint32_t nas = T0_POP();
+	uint32_t nad = T0_POP();
+	int r;
+	if (CTX->itime != 0) {
+		r = CTX->itime(CTX->itime_ctx, nbd, nbs, nad, nas);
+		if (r < -1 || r > 1) {
+			CTX->err = BR_ERR_X509_TIME_UNKNOWN;
+			T0_CO();
+		}
+	} else {
+		uint32_t vd = CTX->days;
+		uint32_t vs = CTX->seconds;
+		if (vd == 0 && vs == 0) {
+			CTX->err = BR_ERR_X509_TIME_UNKNOWN;
+			T0_CO();
+		}
+		if (vd < nbd || (vd == nbd && vs < nbs)) {
+			r = -1;
+		} else if (vd > nad || (vd == nad && vs > nas)) {
+			r = 1;
+		} else {
+			r = 0;
+		}
+	}
+	T0_PUSHi(r);
+
+				}
+				break;
+			case 26: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 27: {
+				/* compute-dn-hash */
+
+	CTX->dn_hash_impl->out(&CTX->dn_hash.vtable, CTX->current_dn_hash);
+	CTX->do_dn_hash = 0;
+
+				}
+				break;
+			case 28: {
+				/* compute-tbs-hash */
+
+	int id = T0_POPi();
+	size_t len;
+	len = br_multihash_out(&CTX->mhash, id, CTX->tbs_hash);
+	T0_PUSH(len);
+
+				}
+				break;
+			case 29: {
+				/* copy-ee-ec-pkey */
+
+	size_t qlen = T0_POP();
+	uint32_t curve = T0_POP();
+	memcpy(CTX->ee_pkey_data, CTX->pkey_data, qlen);
+	CTX->pkey.key_type = BR_KEYTYPE_EC;
+	CTX->pkey.key.ec.curve = curve;
+	CTX->pkey.key.ec.q = CTX->ee_pkey_data;
+	CTX->pkey.key.ec.qlen = qlen;
+
+				}
+				break;
+			case 30: {
+				/* copy-ee-rsa-pkey */
+
+	size_t elen = T0_POP();
+	size_t nlen = T0_POP();
+	memcpy(CTX->ee_pkey_data, CTX->pkey_data, nlen + elen);
+	CTX->pkey.key_type = BR_KEYTYPE_RSA;
+	CTX->pkey.key.rsa.n = CTX->ee_pkey_data;
+	CTX->pkey.key.rsa.nlen = nlen;
+	CTX->pkey.key.rsa.e = CTX->ee_pkey_data + nlen;
+	CTX->pkey.key.rsa.elen = elen;
+
+				}
+				break;
+			case 31: {
+				/* copy-name-SAN */
+
+	unsigned tag = T0_POP();
+	unsigned ok = T0_POP();
+	size_t u, len;
+
+	len = CTX->pad[0];
+	for (u = 0; u < CTX->num_name_elts; u ++) {
+		br_name_element *ne;
+
+		ne = &CTX->name_elts[u];
+		if (ne->status == 0 && ne->oid[0] == 0 && ne->oid[1] == tag) {
+			if (ok && ne->len > len) {
+				memcpy(ne->buf, CTX->pad + 1, len);
+				ne->buf[len] = 0;
+				ne->status = 1;
+			} else {
+				ne->status = -1;
+			}
+			break;
+		}
+	}
+
+				}
+				break;
+			case 32: {
+				/* copy-name-element */
+
+	size_t len;
+	int32_t off = T0_POPi();
+	int ok = T0_POPi();
+
+	if (off >= 0) {
+		br_name_element *ne = &CTX->name_elts[off];
+
+		if (ok) {
+			len = CTX->pad[0];
+			if (len < ne->len) {
+				memcpy(ne->buf, CTX->pad + 1, len);
+				ne->buf[len] = 0;
+				ne->status = 1;
+			} else {
+				ne->status = -1;
+			}
+		} else {
+			ne->status = -1;
+		}
+	}
+
+				}
+				break;
+			case 33: {
+				/* data-get8 */
+
+	size_t addr = T0_POP();
+	T0_PUSH(t0_datablock[addr]);
+
+				}
+				break;
+			case 34: {
+				/* dn-hash-length */
+
+	T0_PUSH(DNHASH_LEN);
+
+				}
+				break;
+			case 35: {
+				/* do-ecdsa-vrfy */
+
+	size_t qlen = T0_POP();
+	int curve = T0_POP();
+	br_x509_pkey pk;
+
+	pk.key_type = BR_KEYTYPE_EC;
+	pk.key.ec.curve = curve;
+	pk.key.ec.q = CTX->pkey_data;
+	pk.key.ec.qlen = qlen;
+	T0_PUSH(verify_signature(CTX, &pk));
+
+				}
+				break;
+			case 36: {
+				/* do-rsa-vrfy */
+
+	size_t elen = T0_POP();
+	size_t nlen = T0_POP();
+	br_x509_pkey pk;
+
+	pk.key_type = BR_KEYTYPE_RSA;
+	pk.key.rsa.n = CTX->pkey_data;
+	pk.key.rsa.nlen = nlen;
+	pk.key.rsa.e = CTX->pkey_data + nlen;
+	pk.key.rsa.elen = elen;
+	T0_PUSH(verify_signature(CTX, &pk));
+
+				}
+				break;
+			case 37: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 38: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 39: {
+				/* eqOID */
+
+	const unsigned char *a2 = &t0_datablock[T0_POP()];
+	const unsigned char *a1 = &CTX->pad[0];
+	size_t len = a1[0];
+	int x;
+	if (len == a2[0]) {
+		x = -(memcmp(a1 + 1, a2 + 1, len) == 0);
+	} else {
+		x = 0;
+	}
+	T0_PUSH((uint32_t)x);
+
+				}
+				break;
+			case 40: {
+				/* eqblob */
+
+	size_t len = T0_POP();
+	const unsigned char *a2 = (const unsigned char *)CTX + T0_POP();
+	const unsigned char *a1 = (const unsigned char *)CTX + T0_POP();
+	T0_PUSHi(-(memcmp(a1, a2, len) == 0));
+
+				}
+				break;
+			case 41: {
+				/* fail */
+
+	CTX->err = T0_POPi();
+	T0_CO();
+
+				}
+				break;
+			case 42: {
+				/* get16 */
+
+	uint32_t addr = T0_POP();
+	T0_PUSH(*(uint16_t *)(void *)((unsigned char *)CTX + addr));
+
+				}
+				break;
+			case 43: {
+				/* get32 */
+
+	uint32_t addr = T0_POP();
+	T0_PUSH(*(uint32_t *)(void *)((unsigned char *)CTX + addr));
+
+				}
+				break;
+			case 44: {
+				/* match-server-name */
+
+	size_t n1, n2;
+
+	if (CTX->server_name == NULL) {
+		T0_PUSH(0);
+		T0_RET();
+	}
+	n1 = strlen(CTX->server_name);
+	n2 = CTX->pad[0];
+	if (n1 == n2 && eqnocase(&CTX->pad[1], CTX->server_name, n1)) {
+		T0_PUSHi(-1);
+		T0_RET();
+	}
+	if (n2 >= 2 && CTX->pad[1] == '*' && CTX->pad[2] == '.') {
+		size_t u;
+
+		u = 0;
+		while (u < n1 && CTX->server_name[u] != '.') {
+			u ++;
+		}
+		u ++;
+		n1 -= u;
+		if ((n2 - 2) == n1
+			&& eqnocase(&CTX->pad[3], CTX->server_name + u, n1))
+		{
+			T0_PUSHi(-1);
+			T0_RET();
+		}
+	}
+	T0_PUSH(0);
+
+				}
+				break;
+			case 45: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 46: {
+				/* offset-name-element */
+
+	unsigned san = T0_POP();
+	size_t u;
+
+	for (u = 0; u < CTX->num_name_elts; u ++) {
+		if (CTX->name_elts[u].status == 0) {
+			const unsigned char *oid;
+			size_t len, off;
+
+			oid = CTX->name_elts[u].oid;
+			if (san) {
+				if (oid[0] != 0 || oid[1] != 0) {
+					continue;
+				}
+				off = 2;
+			} else {
+				off = 0;
+			}
+			len = oid[off];
+			if (len != 0 && len == CTX->pad[0]
+				&& memcmp(oid + off + 1,
+					CTX->pad + 1, len) == 0)
+			{
+				T0_PUSH(u);
+				T0_RET();
+			}
+		}
+	}
+	T0_PUSHi(-1);
+
+				}
+				break;
+			case 47: {
+				/* or */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a | b);
+
+				}
+				break;
+			case 48: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 49: {
+				/* read-blob-inner */
+
+	uint32_t len = T0_POP();
+	uint32_t addr = T0_POP();
+	size_t clen = CTX->hlen;
+	if (clen > len) {
+		clen = (size_t)len;
+	}
+	if (addr != 0) {
+		memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen);
+	}
+	if (CTX->do_mhash) {
+		br_multihash_update(&CTX->mhash, CTX->hbuf, clen);
+	}
+	if (CTX->do_dn_hash) {
+		CTX->dn_hash_impl->update(
+			&CTX->dn_hash.vtable, CTX->hbuf, clen);
+	}
+	CTX->hbuf += clen;
+	CTX->hlen -= clen;
+	T0_PUSH(addr + clen);
+	T0_PUSH(len - clen);
+
+				}
+				break;
+			case 50: {
+				/* read8-low */
+
+	if (CTX->hlen == 0) {
+		T0_PUSHi(-1);
+	} else {
+		unsigned char x = *CTX->hbuf ++;
+		if (CTX->do_mhash) {
+			br_multihash_update(&CTX->mhash, &x, 1);
+		}
+		if (CTX->do_dn_hash) {
+			CTX->dn_hash_impl->update(&CTX->dn_hash.vtable, &x, 1);
+		}
+		CTX->hlen --;
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 51: {
+				/* rot */
+ T0_ROT(); 
+				}
+				break;
+			case 52: {
+				/* set16 */
+
+	uint32_t addr = T0_POP();
+	*(uint16_t *)(void *)((unsigned char *)CTX + addr) = T0_POP();
+
+				}
+				break;
+			case 53: {
+				/* set32 */
+
+	uint32_t addr = T0_POP();
+	*(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP();
+
+				}
+				break;
+			case 54: {
+				/* set8 */
+
+	uint32_t addr = T0_POP();
+	*((unsigned char *)CTX + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 55: {
+				/* start-dn-hash */
+
+	CTX->dn_hash_impl->init(&CTX->dn_hash.vtable);
+	CTX->do_dn_hash = 1;
+
+				}
+				break;
+			case 56: {
+				/* start-tbs-hash */
+
+	br_multihash_init(&CTX->mhash);
+	CTX->do_mhash = 1;
+
+				}
+				break;
+			case 57: {
+				/* stop-tbs-hash */
+
+	CTX->do_mhash = 0;
+
+				}
+				break;
+			case 58: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			case 59: {
+				/* zero-server-name */
+
+	T0_PUSHi(-(CTX->server_name == NULL));
+
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
+
+
+
+/*
+ * Verify the signature on the certificate with the provided public key.
+ * This function checks the public key type with regards to the expected
+ * type. Returned value is either 0 on success, or a non-zero error code.
+ */
+static int
+verify_signature(br_x509_minimal_context *ctx, const br_x509_pkey *pk)
+{
+	int kt;
+
+	kt = ctx->cert_signer_key_type;
+	if ((pk->key_type & 0x0F) != kt) {
+		return BR_ERR_X509_WRONG_KEY_TYPE;
+	}
+	switch (kt) {
+		unsigned char tmp[64];
+
+	case BR_KEYTYPE_RSA:
+		if (ctx->irsa == 0) {
+			return BR_ERR_X509_UNSUPPORTED;
+		}
+		if (!ctx->irsa(ctx->cert_sig, ctx->cert_sig_len,
+			&t0_datablock[ctx->cert_sig_hash_oid],
+			ctx->cert_sig_hash_len, &pk->key.rsa, tmp))
+		{
+			return BR_ERR_X509_BAD_SIGNATURE;
+		}
+		if (memcmp(ctx->tbs_hash, tmp, ctx->cert_sig_hash_len) != 0) {
+			return BR_ERR_X509_BAD_SIGNATURE;
+		}
+		return 0;
+
+	case BR_KEYTYPE_EC:
+		if (ctx->iecdsa == 0) {
+			return BR_ERR_X509_UNSUPPORTED;
+		}
+		if (!ctx->iecdsa(ctx->iec, ctx->tbs_hash,
+			ctx->cert_sig_hash_len, &pk->key.ec,
+			ctx->cert_sig, ctx->cert_sig_len))
+		{
+			return BR_ERR_X509_BAD_SIGNATURE;
+		}
+		return 0;
+
+	default:
+		return BR_ERR_X509_UNSUPPORTED;
+	}
+}
+
+
diff --git a/third_party/bearssl/src/x509_minimal_full.c b/third_party/bearssl/src/x509_minimal_full.c
new file mode 100644
index 0000000..2b54426
--- /dev/null
+++ b/third_party/bearssl/src/x509_minimal_full.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+void
+br_x509_minimal_init_full(br_x509_minimal_context *xc,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num)
+{
+	/*
+	 * All hash functions are activated.
+	 * Note: the X.509 validation engine will nonetheless refuse to
+	 * validate signatures that use MD5 as hash function.
+	 */
+	static const br_hash_class *hashes[] = {
+		&br_md5_vtable,
+		&br_sha1_vtable,
+		&br_sha224_vtable,
+		&br_sha256_vtable,
+		&br_sha384_vtable,
+		&br_sha512_vtable
+	};
+
+	int id;
+
+	br_x509_minimal_init(xc, &br_sha256_vtable,
+		trust_anchors, trust_anchors_num);
+	br_x509_minimal_set_rsa(xc, &br_rsa_i31_pkcs1_vrfy);
+	br_x509_minimal_set_ecdsa(xc,
+		&br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1);
+	for (id = br_md5_ID; id <= br_sha512_ID; id ++) {
+		const br_hash_class *hc;
+
+		hc = hashes[id - 1];
+		br_x509_minimal_set_hash(xc, id, hc);
+	}
+}
diff --git a/third_party/citro3d.c b/third_party/citro3d.c
new file mode 100644
index 0000000..7aa0bad
--- /dev/null
+++ b/third_party/citro3d.c
@@ -0,0 +1,1720 @@
+/*
+Copyright (C) 2014-2018 fincs
+
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any
+damages arising from the use of this software.
+
+Permission is granted to anyone to use this software for any
+purpose, including commercial applications, and to alter it and
+redistribute it freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you
+   must not claim that you wrote the original software. If you use
+   this software in a product, an acknowledgment in the product
+   documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and
+   must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source
+   distribution.
+*/
+#include <stdbool.h>
+#include <stdint.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
+typedef u32 C3D_IVec;
+
+typedef union
+{
+	struct
+	{
+		float w; ///< W-component
+		float z; ///< Z-component
+		float y; ///< Y-component
+		float x; ///< X-component
+	};
+	float c[4];
+} C3D_FVec;
+
+typedef union
+{
+	C3D_FVec r[4]; ///< Rows are vectors
+	float m[4*4]; ///< Raw access
+} C3D_Mtx;
+
+
+typedef struct
+{
+	u32 flags[2];
+	u64 permutation;
+	int attrCount;
+} C3D_AttrInfo;
+
+static void AttrInfo_Init(C3D_AttrInfo* info);
+static int  AttrInfo_AddLoader(C3D_AttrInfo* info, int regId, GPU_FORMATS format, int count);
+
+static C3D_AttrInfo* C3D_GetAttrInfo(void);
+
+
+
+
+typedef struct
+{
+	u32 offset;
+	u32 flags[2];
+} C3D_BufCfg;
+
+
+typedef struct
+{
+	void* data;
+	GPU_TEXCOLOR fmt : 4;
+	size_t size : 28;
+
+	union
+	{
+		u32 dim;
+		struct
+		{
+			u16 height;
+			u16 width;
+		};
+	};
+
+	u32 param;
+	u32 border;
+	union
+	{
+		u32 lodParam;
+		struct
+		{
+			u16 lodBias;
+			u8 maxLevel;
+			u8 minLevel;
+		};
+	};
+} C3D_Tex;
+
+static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level);
+static void C3D_TexGenerateMipmap(C3D_Tex* tex, GPU_TEXFACE face);
+static void C3D_TexBind(int unitId, C3D_Tex* tex);
+static void C3D_TexFlush(C3D_Tex* tex);
+static void C3D_TexDelete(C3D_Tex* tex);
+
+static inline int C3D_TexCalcMaxLevel(u32 width, u32 height)
+{
+	return (31-__builtin_clz(width < height ? width : height)) - 3; // avoid sizes smaller than 8
+}
+
+static inline u32 C3D_TexCalcLevelSize(u32 size, int level)
+{
+	return size >> (2*level);
+}
+
+static inline u32 C3D_TexCalcTotalSize(u32 size, int maxLevel)
+{
+	/*
+	S  = s + sr + sr^2 + sr^3 + ... + sr^n
+	Sr = sr + sr^2 + sr^3 + ... + sr^(n+1)
+	S-Sr = s - sr^(n+1)
+	S(1-r) = s(1 - r^(n+1))
+	S = s (1 - r^(n+1)) / (1-r)
+
+	r = 1/4
+	1-r = 3/4
+
+	S = 4s (1 - (1/4)^(n+1)) / 3
+	S = 4s (1 - 1/4^(n+1)) / 3
+	S = (4/3) (s - s/4^(n+1))
+	S = (4/3) (s - s/(1<<(2n+2)))
+	S = (4/3) (s - s>>(2n+2))
+	*/
+	return (size - C3D_TexCalcLevelSize(size,maxLevel+1)) * 4 / 3;
+}
+
+static inline void* C3D_TexGetImagePtr(C3D_Tex* tex, void* data, int level, u32* size)
+{
+	if (size) *size = level >= 0 ? C3D_TexCalcLevelSize(tex->size, level) : C3D_TexCalcTotalSize(tex->size, tex->maxLevel);
+	if (!level) return data;
+	return (u8*)data + (level > 0 ? C3D_TexCalcTotalSize(tex->size, level-1) : 0);
+}
+
+static inline void* C3D_Tex2DGetImagePtr(C3D_Tex* tex, int level, u32* size)
+{
+	return C3D_TexGetImagePtr(tex, tex->data, level, size);
+}
+
+static inline void C3D_TexUpload(C3D_Tex* tex, const void* data)
+{
+	C3D_TexLoadImage(tex, data, GPU_TEXFACE_2D, 0);
+}
+
+
+
+
+
+
+
+
+
+static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset);
+static void C3D_CullFace(GPU_CULLMODE mode);
+static void C3D_StencilTest(void);
+static void C3D_StencilOp(void);
+static void C3D_EarlyDepthTest(bool enable, GPU_EARLYDEPTHFUNC function, u32 ref);
+static void C3D_DepthTest(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask);
+static void C3D_AlphaTest(bool enable, GPU_TESTFUNC function, int ref);
+static void C3D_AlphaBlend(GPU_BLENDEQUATION colorEq, GPU_BLENDEQUATION alphaEq, GPU_BLENDFACTOR srcClr, GPU_BLENDFACTOR dstClr, GPU_BLENDFACTOR srcAlpha, GPU_BLENDFACTOR dstAlpha);
+static void C3D_ColorLogicOp(GPU_LOGICOP op);
+static void C3D_FragOpMode(GPU_FRAGOPMODE mode);
+static void C3D_FragOpShadow(float scale, float bias);
+
+
+
+
+
+
+#define C3D_DEFAULT_CMDBUF_SIZE 0x40000
+
+enum
+{
+	C3D_UNSIGNED_BYTE = 0,
+	C3D_UNSIGNED_SHORT = 1,
+};
+
+static bool C3D_Init(size_t cmdBufSize);
+static void C3D_Fini(void);
+
+static void C3D_BindProgram(shaderProgram_s* program);
+
+static void C3D_SetViewport(u32 x, u32 y, u32 w, u32 h);
+static void C3D_SetScissor(GPU_SCISSORMODE mode, u32 left, u32 top, u32 right, u32 bottom);
+
+static void C3D_DrawElements(GPU_Primitive_t primitive, int count);
+
+// Immediate-mode vertex submission
+static void C3D_ImmDrawBegin(GPU_Primitive_t primitive);
+static void C3D_ImmSendAttrib(float x, float y, float z, float w);
+static void C3D_ImmDrawEnd(void);
+
+static inline void C3D_ImmDrawRestartPrim(void)
+{
+	GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1);
+}
+
+
+
+
+
+typedef struct
+{
+	u32 data[128];
+} C3D_FogLut;
+
+static inline float FogLut_CalcZ(float depth, float near, float far)
+{
+	return far*near/(depth*(far-near)+near);
+}
+
+static void FogLut_FromArray(C3D_FogLut* lut, const float data[256]);
+
+static void C3D_FogGasMode(GPU_FOGMODE fogMode, GPU_GASMODE gasMode, bool zFlip);
+static void C3D_FogColor(u32 color);
+static void C3D_FogLutBind(C3D_FogLut* lut);
+
+
+
+
+
+
+
+
+
+
+
+typedef struct
+{
+	void* colorBuf;
+	void* depthBuf;
+	u16 width;
+	u16 height;
+	GPU_COLORBUF colorFmt;
+	GPU_DEPTHBUF depthFmt;
+	bool block32;
+	u8 colorMask : 4;
+	u8 depthMask : 4;
+} C3D_FrameBuf;
+
+// Flags for C3D_FrameBufClear
+typedef enum
+{
+	C3D_CLEAR_COLOR = BIT(0),
+	C3D_CLEAR_DEPTH = BIT(1),
+	C3D_CLEAR_ALL   = C3D_CLEAR_COLOR | C3D_CLEAR_DEPTH,
+} C3D_ClearBits;
+
+static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt);
+static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt);
+
+static C3D_FrameBuf* C3D_GetFrameBuf(void);
+static void C3D_SetFrameBuf(C3D_FrameBuf* fb);
+static void C3D_FrameBufClear(C3D_FrameBuf* fb, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth);
+static void C3D_FrameBufTransfer(C3D_FrameBuf* fb, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags);
+
+static inline void C3D_FrameBufAttrib(C3D_FrameBuf* fb, u16 width, u16 height, bool block32)
+{
+	fb->width   = width;
+	fb->height  = height;
+	fb->block32 = block32;
+}
+
+static inline void C3D_FrameBufColor(C3D_FrameBuf* fb, void* buf, GPU_COLORBUF fmt)
+{
+	if (buf)
+	{
+		fb->colorBuf  = buf;
+		fb->colorFmt  = fmt;
+		fb->colorMask = 0xF;
+	} else
+	{
+		fb->colorBuf  = NULL;
+		fb->colorFmt  = GPU_RB_RGBA8;
+		fb->colorMask = 0;
+	}
+}
+
+static inline void C3D_FrameBufDepth(C3D_FrameBuf* fb, void* buf, GPU_DEPTHBUF fmt)
+{
+	if (buf)
+	{
+		fb->depthBuf  = buf;
+		fb->depthFmt  = fmt;
+		fb->depthMask = fmt == GPU_RB_DEPTH24_STENCIL8 ? 0x3 : 0x2;
+	} else
+	{
+		fb->depthBuf  = NULL;
+		fb->depthFmt  = GPU_RB_DEPTH24;
+		fb->depthMask = 0;
+	}
+}
+
+
+
+
+
+
+
+typedef struct C3D_RenderTarget_tag C3D_RenderTarget;
+
+struct C3D_RenderTarget_tag
+{
+	C3D_FrameBuf frameBuf;
+
+	bool used, linked;
+	gfxScreen_t screen;
+	gfx3dSide_t side;
+	u32 transferFlags;
+};
+
+// Flags for C3D_FrameBegin
+enum
+{
+	C3D_FRAME_SYNCDRAW = BIT(0), // Perform C3D_FrameSync before checking the GPU status
+	C3D_FRAME_NONBLOCK = BIT(1), // Return false instead of waiting if the GPU is busy
+};
+
+static void C3D_FrameSync(void);
+
+static bool C3D_FrameBegin(u8 flags);
+static bool C3D_FrameDrawOn(C3D_RenderTarget* target);
+static void C3D_FrameSplit(u8 flags);
+static void C3D_FrameEnd(u8 flags);
+
+static void C3D_RenderTargetCreate(C3D_RenderTarget* target, int width, int height, GPU_COLORBUF colorFmt, GPU_DEPTHBUF depthFmt);
+static void C3D_RenderTargetDelete(C3D_RenderTarget* target);
+static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags);
+
+static inline void C3D_RenderTargetDetachOutput(C3D_RenderTarget* target)
+{
+	C3D_RenderTargetSetOutput(NULL, target->screen, target->side, 0);
+}
+
+static inline void C3D_RenderTargetClear(C3D_RenderTarget* target, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth)
+{
+	C3D_FrameBufClear(&target->frameBuf, clearBits, clearColor, clearDepth);
+}
+
+static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags);
+
+
+
+
+typedef struct
+{
+	u16 srcRgb, srcAlpha;
+	union
+	{
+		u32 opAll;
+		struct { u32 opRgb:12, opAlpha:12; };
+	};
+	u16 funcRgb, funcAlpha;
+	u32 color;
+	u16 scaleRgb, scaleAlpha;
+} C3D_TexEnv;
+
+static inline void C3D_TexEnvInit(C3D_TexEnv* env)
+{
+	env->srcRgb     = GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0);
+	env->srcAlpha   = GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0);
+	env->opAll      = 0;
+	env->funcRgb    = GPU_REPLACE;
+	env->funcAlpha  = GPU_REPLACE;
+	env->color      = 0xFFFFFFFF;
+	env->scaleRgb   = GPU_TEVSCALE_1;
+	env->scaleAlpha = GPU_TEVSCALE_1;
+}
+
+
+void Mtx_Multiply(C3D_Mtx* out, const C3D_Mtx* a, const C3D_Mtx* b)
+{
+	// http://www.wolframalpha.com/input/?i={{a,b,c,d},{e,f,g,h},{i,j,k,l},{m,n,o,p}}{{α,β,γ,δ},{ε,θ,ι,κ},{λ,μ,ν,ξ},{ο,π,ρ,σ}}
+	int i, j;
+	for (j = 0; j < 4; ++j)
+		for (i = 0; i < 4; ++i)
+			out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
+}
+
+
+
+
+#define C3D_FVUNIF_COUNT 96
+#define C3D_IVUNIF_COUNT 4
+
+static C3D_FVec C3D_FVUnif[C3D_FVUNIF_COUNT];
+static C3D_IVec C3D_IVUnif[C3D_IVUNIF_COUNT];
+static u16      C3D_BoolUnifs;
+
+static bool C3D_FVUnifDirty[C3D_FVUNIF_COUNT];
+static bool C3D_IVUnifDirty[C3D_IVUNIF_COUNT];
+static bool C3D_BoolUnifsDirty;
+
+static inline C3D_FVec* C3D_FVUnifWritePtr(int id, int size)
+{
+	int i;
+	for (i = 0; i < size; i ++)
+		C3D_FVUnifDirty[id+i] = true;
+	return &C3D_FVUnif[id];
+}
+
+static inline void C3D_FVUnifMtx4x4(int id, const C3D_Mtx* mtx)
+{
+	int i;
+	C3D_FVec* ptr = C3D_FVUnifWritePtr(id, 4);
+	for (i = 0; i < 4; i ++)
+		ptr[i] = mtx->r[i]; // Struct copy.
+}
+
+static inline void C3D_FVUnifSet(int id, float x, float y, float z, float w)
+{
+	C3D_FVec* ptr = C3D_FVUnifWritePtr(id, 1);
+	ptr->x = x;
+	ptr->y = y;
+	ptr->z = z;
+	ptr->w = w;
+}
+
+static void C3D_UpdateUniforms(void);
+
+
+
+
+
+
+typedef struct
+{
+	u32 fragOpMode;
+	u32 fragOpShadow;
+	u32 zScale, zOffset;
+	GPU_CULLMODE cullMode;
+	bool zBuffer, earlyDepth;
+	GPU_EARLYDEPTHFUNC earlyDepthFunc;
+	u32 earlyDepthRef;
+
+	u32 alphaTest;
+	u32 stencilMode, stencilOp;
+	u32 depthTest;
+
+	u32 alphaBlend;
+	GPU_LOGICOP clrLogicOp;
+} C3D_Effect;
+
+typedef struct
+{
+	gxCmdQueue_s gxQueue;
+	u32* cmdBuf;
+	size_t cmdBufSize;
+
+	u32 flags;
+	shaderProgram_s* program;
+
+	C3D_AttrInfo attrInfo;
+	C3D_Effect effect;
+
+	u32 texConfig;
+	C3D_Tex* tex[3];
+
+	u32 texEnvBuf, texEnvBufClr;
+	u32 fogClr;
+	C3D_FogLut* fogLut;
+
+	C3D_FrameBuf fb;
+	u32 viewport[5];
+	u32 scissor[3];
+} C3D_Context;
+
+enum
+{
+	C3DiF_Active = BIT(0),
+	C3DiF_DrawUsed = BIT(1),
+	C3DiF_AttrInfo = BIT(2),
+	C3DiF_Effect = BIT(4),
+	C3DiF_FrameBuf = BIT(5),
+	C3DiF_Viewport = BIT(6),
+	C3DiF_Scissor = BIT(7),
+	C3DiF_Program = BIT(8),
+	C3DiF_TexEnvBuf = BIT(9),
+	C3DiF_VshCode = BIT(11),
+	C3DiF_GshCode = BIT(12),
+	C3DiF_TexStatus = BIT(14),
+	C3DiF_FogLut = BIT(17),
+	C3DiF_Gas = BIT(18),
+
+	C3DiF_Reset = BIT(19),
+
+#define C3DiF_Tex(n) BIT(23+(n))
+	C3DiF_TexAll = 7 << 23,
+};
+
+static C3D_Context __C3D_Context;
+static inline C3D_Context* C3Di_GetContext(void)
+{
+	extern C3D_Context __C3D_Context;
+	return &__C3D_Context;
+}
+
+static inline bool addrIsVRAM(const void* addr)
+{
+	u32 vaddr = (u32)addr;
+	return vaddr >= OS_VRAM_VADDR && vaddr < OS_VRAM_VADDR + OS_VRAM_SIZE;
+}
+
+static inline vramAllocPos addrGetVRAMBank(const void* addr)
+{
+	u32 vaddr = (u32)addr;
+	return vaddr < OS_VRAM_VADDR + OS_VRAM_SIZE/2 ? VRAM_ALLOC_A : VRAM_ALLOC_B;
+}
+
+static void C3Di_UpdateContext(void);
+static void C3Di_AttrInfoBind(C3D_AttrInfo* info);
+static void C3Di_FrameBufBind(C3D_FrameBuf* fb);
+static void C3Di_TexEnvBind(int id, C3D_TexEnv* env);
+static void C3Di_SetTex(int unit, C3D_Tex* tex);
+static void C3Di_EffectBind(C3D_Effect* effect);
+
+static void C3Di_DirtyUniforms(void);
+static void C3Di_LoadShaderUniforms(shaderInstance_s* si);
+static void C3Di_ClearShaderUniforms(GPU_SHADER_TYPE type);
+
+static bool C3Di_SplitFrame(u32** pBuf, u32* pSize);
+
+static void C3Di_RenderQueueInit(void);
+static void C3Di_RenderQueueExit(void);
+static void C3Di_RenderQueueWaitDone(void);
+static void C3Di_RenderQueueEnableVBlank(void);
+static void C3Di_RenderQueueDisableVBlank(void);
+
+
+
+
+
+
+
+
+
+static void AttrInfo_Init(C3D_AttrInfo* info)
+{
+	memset(info, 0, sizeof(*info));
+	info->flags[1] = 0xFFF << 16;
+}
+
+static int AttrInfo_AddLoader(C3D_AttrInfo* info, int regId, GPU_FORMATS format, int count)
+{
+	if (info->attrCount == 12) return -1;
+	int id = info->attrCount++;
+	if (regId < 0) regId = id;
+	if (id < 8)
+		info->flags[0] |= GPU_ATTRIBFMT(id, count, format);
+	else
+		info->flags[1] |= GPU_ATTRIBFMT(id-8, count, format);
+
+	info->flags[1] = (info->flags[1] &~ (0xF0000000 | BIT(id+16))) | (id << 28);
+	info->permutation |= regId << (id*4);
+	return id;
+}
+
+static C3D_AttrInfo* C3D_GetAttrInfo(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	ctx->flags |= C3DiF_AttrInfo;
+	return &ctx->attrInfo;
+}
+
+static void C3Di_AttrInfoBind(C3D_AttrInfo* info)
+{
+	GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFERS_FORMAT_LOW, (u32*)info->flags, sizeof(info->flags)/sizeof(u32));
+	GPUCMD_AddMaskedWrite(GPUREG_VSH_INPUTBUFFER_CONFIG, 0xB, 0xA0000000 | (info->attrCount - 1));
+	GPUCMD_AddWrite(GPUREG_VSH_NUM_ATTR, info->attrCount - 1);
+	GPUCMD_AddIncrementalWrites(GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, (u32*)&info->permutation, 2);
+}
+
+
+
+
+
+
+
+
+
+
+
+#define BUFFER_BASE_PADDR 0x18000000
+
+
+
+static void C3D_DrawElements(GPU_Primitive_t primitive, int count)
+{
+	C3Di_UpdateContext();
+
+	// Set primitive type
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 2, primitive != GPU_TRIANGLES ? primitive : GPU_GEOMETRY_PRIM);
+	// Start a new primitive (breaks off a triangle strip/fan)
+	GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1);
+	// Number of vertices
+	GPUCMD_AddWrite(GPUREG_NUMVERTICES, count);
+	// First vertex
+	GPUCMD_AddWrite(GPUREG_VERTEX_OFFSET, 0);
+	// Enable triangle element drawing mode if necessary
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 2, 0x100);
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 2, 0x100);
+	// Enable drawing mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 0);
+	// Trigger element drawing
+	GPUCMD_AddWrite(GPUREG_DRAWELEMENTS, 1);
+	// Go back to configuration mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 1);
+	// Disable triangle element drawing mode if necessary
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 2, 0);
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 2, 0);
+	// Clear the post-vertex cache
+	GPUCMD_AddWrite(GPUREG_VTX_FUNC, 1);
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0);
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0);
+
+	C3Di_GetContext()->flags |= C3DiF_DrawUsed;
+}
+
+
+
+
+
+
+
+
+static inline C3D_Effect* getEffect()
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	ctx->flags |= C3DiF_Effect;
+	return &ctx->effect;
+}
+
+static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset)
+{
+	C3D_Effect* e = getEffect();
+	e->zBuffer = bIsZBuffer;
+	e->zScale  = f32tof24(zScale);
+	e->zOffset = f32tof24(zOffset);
+}
+
+static void C3D_CullFace(GPU_CULLMODE mode)
+{
+	C3D_Effect* e = getEffect();
+	e->cullMode = mode;
+}
+
+static void C3D_StencilTest(void)
+{
+	C3D_Effect* e = getEffect();
+	e->stencilMode = false | (GPU_ALWAYS << 4) | (0xFF << 24);
+}
+
+static void C3D_StencilOp(void)
+{
+	C3D_Effect* e = getEffect();
+	e->stencilOp = GPU_STENCIL_KEEP | (GPU_STENCIL_KEEP << 4) | (GPU_STENCIL_KEEP << 8);
+}
+
+static void C3D_EarlyDepthTest(bool enable, GPU_EARLYDEPTHFUNC function, u32 ref)
+{
+	C3D_Effect* e = getEffect();
+	e->earlyDepth = enable;
+	e->earlyDepthFunc = function;
+	e->earlyDepthRef = ref;
+}
+
+static void C3D_DepthTest(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask)
+{
+	C3D_Effect* e = getEffect();
+	e->depthTest = (!!enable) | ((function & 7) << 4) | (writemask << 8);
+}
+
+static void C3D_AlphaTest(bool enable, GPU_TESTFUNC function, int ref)
+{
+	C3D_Effect* e = getEffect();
+	e->alphaTest = (!!enable) | ((function & 7) << 4) | (ref << 8);
+}
+
+static void C3D_AlphaBlend(GPU_BLENDEQUATION colorEq, GPU_BLENDEQUATION alphaEq, GPU_BLENDFACTOR srcClr, GPU_BLENDFACTOR dstClr, GPU_BLENDFACTOR srcAlpha, GPU_BLENDFACTOR dstAlpha)
+{
+	C3D_Effect* e = getEffect();
+	e->alphaBlend = colorEq | (alphaEq << 8) | (srcClr << 16) | (dstClr << 20) | (srcAlpha << 24) | (dstAlpha << 28);
+	e->fragOpMode &= ~0xFF00;
+	e->fragOpMode |= 0x0100;
+}
+
+static void C3D_ColorLogicOp(GPU_LOGICOP op)
+{
+	C3D_Effect* e = getEffect();
+	e->fragOpMode &= ~0xFF00;
+	e->clrLogicOp = op;
+}
+
+static void C3D_FragOpMode(GPU_FRAGOPMODE mode)
+{
+	C3D_Effect* e = getEffect();
+	e->fragOpMode &= ~0xFF00FF;
+	e->fragOpMode |= 0xE40000 | mode;
+}
+
+static void C3D_FragOpShadow(float scale, float bias)
+{
+	C3D_Effect* e = getEffect();
+	e->fragOpShadow = f32tof16(scale+bias) | (f32tof16(-scale)<<16);
+}
+
+static void C3Di_EffectBind(C3D_Effect* e)
+{
+	GPUCMD_AddWrite(GPUREG_DEPTHMAP_ENABLE, e->zBuffer ? 1 : 0);
+	GPUCMD_AddWrite(GPUREG_FACECULLING_CONFIG, e->cullMode & 0x3);
+	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHMAP_SCALE, (u32*)&e->zScale, 2);
+	GPUCMD_AddIncrementalWrites(GPUREG_FRAGOP_ALPHA_TEST, (u32*)&e->alphaTest, 4);
+	GPUCMD_AddMaskedWrite(GPUREG_GAS_DELTAZ_DEPTH, 0x8, (u32)GPU_MAKEGASDEPTHFUNC((e->depthTest>>4)&7) << 24);
+	GPUCMD_AddWrite(GPUREG_BLEND_COLOR, 0);
+	GPUCMD_AddWrite(GPUREG_BLEND_FUNC, e->alphaBlend);
+	GPUCMD_AddWrite(GPUREG_LOGIC_OP, e->clrLogicOp);
+	GPUCMD_AddMaskedWrite(GPUREG_COLOR_OPERATION, 7, e->fragOpMode);
+	GPUCMD_AddWrite(GPUREG_FRAGOP_SHADOW, e->fragOpShadow);
+	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_TEST1, 1, e->earlyDepth ? 1 : 0);
+	GPUCMD_AddWrite(GPUREG_EARLYDEPTH_TEST2, e->earlyDepth ? 1 : 0);
+	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_FUNC, 1, e->earlyDepthFunc);
+	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_DATA, 0x7, e->earlyDepthRef);
+}
+
+
+
+
+
+
+
+static void FogLut_FromArray(C3D_FogLut* lut, const float data[256])
+{
+	int i;
+	for (i = 0; i < 128; i ++)
+	{
+		float in = data[i], diff = data[i+128];
+
+		u32 val = 0;
+		if (in > 0.0f)
+		{
+			in *= 0x800;
+			val = (in < 0x800) ? (u32)in : 0x7FF;
+		}
+
+		u32 val2 = 0;
+		if (diff != 0.0f)
+		{
+			diff *= 0x800;
+			if (diff < -0x1000) diff = -0x1000;
+			else if (diff > 0xFFF) diff = 0xFFF;
+			val2 = (s32)diff & 0x1FFF;
+		}
+
+		lut->data[i] = val2 | (val << 13);
+	}
+}
+
+static void C3D_FogGasMode(GPU_FOGMODE fogMode, GPU_GASMODE gasMode, bool zFlip)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	ctx->flags |= C3DiF_TexEnvBuf;
+	ctx->texEnvBuf &= ~0x100FF;
+	ctx->texEnvBuf |= (fogMode&7) | ((gasMode&1)<<3) | (zFlip ? BIT(16) : 0);
+}
+
+static void C3D_FogColor(u32 color)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	ctx->flags |= C3DiF_TexEnvBuf;
+	ctx->fogClr = color;
+}
+
+static void C3D_FogLutBind(C3D_FogLut* lut)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	if (lut)
+	{
+		ctx->flags |= C3DiF_FogLut;
+		ctx->fogLut = lut;
+	} else
+		ctx->flags &= ~C3DiF_FogLut;
+}
+
+
+
+
+
+
+
+
+
+
+static const u8 colorFmtSizes[] = {2,1,0,0,0};
+static const u8 depthFmtSizes[] = {0,0,1,2};
+
+static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt)
+{
+	u32 size = width*height;
+	return size*(2+colorFmtSizes[fmt]);
+}
+
+static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt)
+{
+	u32 size = width*height;
+	return size*(2+depthFmtSizes[fmt]);
+}
+
+static C3D_FrameBuf* C3D_GetFrameBuf(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	ctx->flags |= C3DiF_FrameBuf;
+	return &ctx->fb;
+}
+
+static void C3D_SetFrameBuf(C3D_FrameBuf* fb)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	if (fb != &ctx->fb)
+		memcpy(&ctx->fb, fb, sizeof(*fb));
+	ctx->flags |= C3DiF_FrameBuf;
+}
+
+static void C3Di_FrameBufBind(C3D_FrameBuf* fb)
+{
+	u32 param[4] = { 0, 0, 0, 0 };
+
+	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 1);
+
+	param[0] = osConvertVirtToPhys(fb->depthBuf) >> 3;
+	param[1] = osConvertVirtToPhys(fb->colorBuf) >> 3;
+	param[2] = 0x01000000 | (((u32)(fb->height-1) & 0xFFF) << 12) | (fb->width & 0xFFF);
+	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, param, 3);
+
+	GPUCMD_AddWrite(GPUREG_RENDERBUF_DIM,       param[2]);
+	GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT,  fb->depthFmt);
+	GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT,  colorFmtSizes[fb->colorFmt] | ((u32)fb->colorFmt << 16));
+	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_BLOCK32, fb->block32 ? 1 : 0);
+
+	// Enable or disable color/depth buffers
+	param[0] = param[1] = fb->colorBuf ? fb->colorMask : 0;
+	param[2] = param[3] = fb->depthBuf ? fb->depthMask : 0;
+	GPUCMD_AddIncrementalWrites(GPUREG_COLORBUFFER_READ, param, 4);
+}
+
+static void C3D_FrameBufClear(C3D_FrameBuf* frameBuf, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth)
+{
+	u32 size = (u32)frameBuf->width * frameBuf->height;
+	u32 cfs = colorFmtSizes[frameBuf->colorFmt];
+	u32 dfs = depthFmtSizes[frameBuf->depthFmt];
+	void* colorBufEnd = (u8*)frameBuf->colorBuf + size*(2+cfs);
+	void* depthBufEnd = (u8*)frameBuf->depthBuf + size*(2+dfs);
+
+	if (clearBits & C3D_CLEAR_COLOR)
+	{
+		if (clearBits & C3D_CLEAR_DEPTH)
+			GX_MemoryFill(
+				(u32*)frameBuf->colorBuf, clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8),
+				(u32*)frameBuf->depthBuf, clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8));
+		else
+			GX_MemoryFill(
+				(u32*)frameBuf->colorBuf, clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8),
+				NULL, 0, NULL, 0);
+	} else
+		GX_MemoryFill(
+			(u32*)frameBuf->depthBuf, clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8),
+			NULL, 0, NULL, 0);
+}
+
+static void C3D_FrameBufTransfer(C3D_FrameBuf* frameBuf, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags)
+{
+	u32* outputFrameBuf = (u32*)gfxGetFramebuffer(screen, side, NULL, NULL);
+	u32 dim = GX_BUFFER_DIM((u32)frameBuf->width, (u32)frameBuf->height);
+	GX_DisplayTransfer((u32*)frameBuf->colorBuf, dim, outputFrameBuf, dim, transferFlags);
+}
+
+
+
+
+
+
+
+static void C3D_ImmDrawBegin(GPU_Primitive_t primitive)
+{
+	C3Di_UpdateContext();
+
+	// Set primitive type
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 2, primitive);
+	// Start a new primitive (breaks off a triangle strip/fan)
+	GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1);
+	// Enable vertex submission mode
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 1, 1);
+	// Enable drawing mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 0);
+	// Begin immediate-mode vertex submission
+	GPUCMD_AddWrite(GPUREG_FIXEDATTRIB_INDEX, 0xF);
+}
+
+static inline void write24(u8* p, u32 val)
+{
+	p[0] = val;
+	p[1] = val>>8;
+	p[2] = val>>16;
+}
+
+static void C3D_ImmSendAttrib(float x, float y, float z, float w)
+{
+	union
+	{
+		u32 packed[3];
+		struct
+		{
+			u8 x[3];
+			u8 y[3];
+			u8 z[3];
+			u8 w[3];
+		};
+	} param;
+
+	// Convert the values to float24
+	write24(param.x, f32tof24(x));
+	write24(param.y, f32tof24(y));
+	write24(param.z, f32tof24(z));
+	write24(param.w, f32tof24(w));
+
+	// Reverse the packed words
+	u32 p = param.packed[0];
+	param.packed[0] = param.packed[2];
+	param.packed[2] = p;
+
+	// Send the attribute
+	GPUCMD_AddIncrementalWrites(GPUREG_FIXEDATTRIB_DATA0, param.packed, 3);
+}
+
+static void C3D_ImmDrawEnd(void)
+{
+	// Go back to configuration mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 1);
+	// Disable vertex submission mode
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 1, 0);
+	// Clear the post-vertex cache
+	GPUCMD_AddWrite(GPUREG_VTX_FUNC, 1);
+
+	C3Di_GetContext()->flags |= C3DiF_DrawUsed;
+}
+
+
+
+static C3D_RenderTarget *linkedTarget[3];
+
+static bool inFrame, inSafeTransfer;
+static bool needSwapTop, needSwapBot, isTopStereo;
+static u32 vblankCounter[2];
+
+static void C3Di_RenderTargetDestroy(C3D_RenderTarget* target);
+
+static void onVBlank0(void* unused)
+{
+	vblankCounter[0]++;
+}
+
+static void onVBlank1(void* unused)
+{
+	vblankCounter[1]++;
+}
+
+static void onQueueFinish(gxCmdQueue_s* queue)
+{
+	if (inSafeTransfer)
+	{
+		inSafeTransfer = false;
+		if (inFrame)
+		{
+			gxCmdQueueStop(queue);
+			gxCmdQueueClear(queue);
+		}
+	}
+	else
+	{
+		if (needSwapTop)
+		{
+			gfxScreenSwapBuffers(GFX_TOP, isTopStereo);
+			needSwapTop = false;
+		}
+		if (needSwapBot)
+		{
+			gfxScreenSwapBuffers(GFX_BOTTOM, false);
+			needSwapBot = false;
+		}
+	}
+}
+
+static void C3D_FrameSync(void)
+{
+	u32 cur[2];
+	u32 start[2] = { vblankCounter[0], vblankCounter[1] };
+	do
+	{
+		gspWaitForAnyEvent();
+		cur[0] = vblankCounter[0];
+		cur[1] = vblankCounter[1];
+	} while (cur[0]==start[0] || cur[1]==start[1]);
+}
+
+static bool C3Di_WaitAndClearQueue(s64 timeout)
+{
+	gxCmdQueue_s* queue = &C3Di_GetContext()->gxQueue;
+	if (!gxCmdQueueWait(queue, timeout))
+		return false;
+	gxCmdQueueStop(queue);
+	gxCmdQueueClear(queue);
+	return true;
+}
+
+static void C3Di_RenderQueueEnableVBlank(void)
+{
+	gspSetEventCallback(GSPGPU_EVENT_VBlank0, onVBlank0, NULL, false);
+	gspSetEventCallback(GSPGPU_EVENT_VBlank1, onVBlank1, NULL, false);
+}
+
+static void C3Di_RenderQueueDisableVBlank(void)
+{
+	gspSetEventCallback(GSPGPU_EVENT_VBlank0, NULL, NULL, false);
+	gspSetEventCallback(GSPGPU_EVENT_VBlank1, NULL, NULL, false);
+}
+
+static void C3Di_RenderQueueInit(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	C3Di_RenderQueueEnableVBlank();
+
+	GX_BindQueue(&ctx->gxQueue);
+	gxCmdQueueSetCallback(&ctx->gxQueue, onQueueFinish, NULL);
+	gxCmdQueueRun(&ctx->gxQueue);
+}
+
+static void C3Di_RenderQueueExit(void)
+{
+	C3Di_WaitAndClearQueue(-1);
+	gxCmdQueueSetCallback(&C3Di_GetContext()->gxQueue, NULL, NULL);
+	GX_BindQueue(NULL);
+
+	C3Di_RenderQueueDisableVBlank();
+}
+
+static void C3Di_RenderQueueWaitDone(void)
+{
+	C3Di_WaitAndClearQueue(-1);
+}
+
+static bool C3D_FrameBegin(u8 flags)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	if (inFrame) return false;
+
+	if (!C3Di_WaitAndClearQueue((flags & C3D_FRAME_NONBLOCK) ? 0 : -1))
+		return false;
+
+	inFrame = true;
+	GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0);
+	return true;
+}
+
+static bool C3D_FrameDrawOn(C3D_RenderTarget* target)
+{
+	if (!inFrame) return false;
+
+	target->used = true;
+	C3D_SetFrameBuf(&target->frameBuf);
+	C3D_SetViewport(0, 0, target->frameBuf.width, target->frameBuf.height);
+	return true;
+}
+
+static void C3D_FrameSplit(u8 flags)
+{
+	u32 *cmdBuf, cmdBufSize;
+	if (!inFrame) return;
+	if (C3Di_SplitFrame(&cmdBuf, &cmdBufSize))
+		GX_ProcessCommandList(cmdBuf, cmdBufSize*4, flags);
+}
+
+static void C3D_FrameEnd(u8 flags)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	if (!inFrame) return;
+
+	C3D_FrameSplit(flags);
+	GPUCMD_SetBuffer(NULL, 0, 0);
+	inFrame = false;
+
+	// Flush the entire linear memory if the user did not explicitly mandate to flush the command list
+	if (!(flags & GX_CMDLIST_FLUSH))
+	{
+		extern u32 __ctru_linear_heap;
+		extern u32 __ctru_linear_heap_size;
+		GSPGPU_FlushDataCache((void*)__ctru_linear_heap, __ctru_linear_heap_size);
+	}
+
+	C3D_RenderTarget* target;
+	isTopStereo = false;
+	needSwapTop = true;
+	needSwapBot = true;
+
+	for (int i = 2; i >= 0; i --)
+	{
+		target = linkedTarget[i];
+		if (!target || !target->used)
+			continue;
+
+		target->used = false;
+		C3D_FrameBufTransfer(&target->frameBuf, target->screen, target->side, target->transferFlags);
+
+		if (target->screen == GFX_TOP && target->side == GFX_RIGHT) isTopStereo = true;
+	}
+
+	gxCmdQueueRun(&ctx->gxQueue);
+}
+
+void C3D_RenderTargetCreate(C3D_RenderTarget* target, int width, int height, GPU_COLORBUF colorFmt, GPU_DEPTHBUF depthFmt)
+{
+	size_t colorSize = C3D_CalcColorBufSize(width,height,colorFmt);
+	size_t depthSize = C3D_CalcDepthBufSize(width,height,depthFmt);
+	memset(target, 0, sizeof(C3D_RenderTarget));
+
+	void* depthBuf = NULL;
+	void* colorBuf = vramAlloc(colorSize);
+	if (!colorBuf) goto _fail;
+
+	vramAllocPos vramBank = addrGetVRAMBank(colorBuf);
+	depthBuf = vramAllocAt(depthSize, vramBank ^ VRAM_ALLOC_ANY); // Attempt opposite bank first...
+	if (!depthBuf) depthBuf = vramAllocAt(depthSize, vramBank); // ... if that fails, attempt same bank
+	if (!depthBuf) goto _fail;
+
+	C3D_FrameBuf* fb = &target->frameBuf;
+	C3D_FrameBufAttrib(fb, width, height, false);
+	C3D_FrameBufColor(fb, colorBuf, colorFmt);
+	C3D_FrameBufDepth(fb, depthBuf, depthFmt);
+	return;
+
+_fail:
+	if (depthBuf) vramFree(depthBuf);
+	if (colorBuf) vramFree(colorBuf);
+}
+
+static void C3Di_RenderTargetDestroy(C3D_RenderTarget* target)
+{
+	vramFree(target->frameBuf.colorBuf);
+	vramFree(target->frameBuf.depthBuf);
+}
+
+static void C3D_RenderTargetDelete(C3D_RenderTarget* target)
+{
+	if (inFrame)
+		svcBreak(USERBREAK_PANIC); // Shouldn't happen.
+	if (target->linked)
+		C3D_RenderTargetDetachOutput(target);
+	else
+		C3Di_WaitAndClearQueue(-1);
+	C3Di_RenderTargetDestroy(target);
+}
+
+static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags)
+{
+	int id = 0;
+	if (screen==GFX_BOTTOM) id = 2;
+	else if (side==GFX_RIGHT) id = 1;
+	if (linkedTarget[id])
+	{
+		linkedTarget[id]->linked = false;
+		if (!inFrame)
+			C3Di_WaitAndClearQueue(-1);
+	}
+	linkedTarget[id] = target;
+	if (target)
+	{
+		target->linked = true;
+		target->transferFlags = transferFlags;
+		target->screen = screen;
+		target->side = side;
+	}
+}
+
+static void C3Di_SafeTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
+{
+	C3Di_WaitAndClearQueue(-1);
+	inSafeTransfer = true;
+	GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
+	gxCmdQueueRun(&C3Di_GetContext()->gxQueue);
+}
+
+static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
+{
+	if (inFrame)
+	{
+		C3D_FrameSplit(0);
+		GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
+	} else
+	{
+		C3Di_SafeTextureCopy(inadr, indim, outadr, outdim, size, flags);
+		gspWaitForPPF();
+	}
+}
+
+
+
+
+
+
+
+static void C3Di_TexEnvBind(int id, C3D_TexEnv* env)
+{
+	if (id >= 4) id += 2;
+	GPUCMD_AddIncrementalWrites(GPUREG_TEXENV0_SOURCE + id*8, (u32*)env, sizeof(C3D_TexEnv)/sizeof(u32));
+}
+
+
+
+
+
+static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level)
+{
+	u32 size = 0;
+	void* out = C3D_TexGetImagePtr(tex, tex->data, level, &size);
+
+	if (!addrIsVRAM(out))
+		memcpy(out, data, size);
+	else
+		C3D_SyncTextureCopy((u32*)data, 0, (u32*)out, 0, size, 8);
+}
+
+static void C3D_TexBind(int unitId, C3D_Tex* tex)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	ctx->flags |= C3DiF_Tex(unitId);
+	ctx->tex[unitId] = tex;
+}
+
+static void C3D_TexFlush(C3D_Tex* tex)
+{
+	if (!addrIsVRAM(tex->data))
+		GSPGPU_FlushDataCache(tex->data, C3D_TexCalcTotalSize(tex->size, tex->maxLevel));
+}
+
+static void C3D_TexDelete(C3D_Tex* tex)
+{
+	void* addr = tex->data;
+	if (addrIsVRAM(addr))
+		vramFree(addr);
+	else
+		linearFree(addr);
+}
+
+static void C3Di_SetTex(int unit, C3D_Tex* tex)
+{
+	u32 reg[10];
+	u32 regcount = 5;
+	reg[0] = tex->border;
+	reg[1] = tex->dim;
+	reg[2] = tex->param;
+	reg[3] = tex->lodParam;
+	reg[4] = osConvertVirtToPhys(tex->data) >> 3;
+
+	switch (unit)
+	{
+		case 0:
+			GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT0_BORDER_COLOR, reg, regcount);
+			GPUCMD_AddWrite(GPUREG_TEXUNIT0_TYPE, tex->fmt);
+			break;
+		case 1:
+			GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT1_BORDER_COLOR, reg, 5);
+			GPUCMD_AddWrite(GPUREG_TEXUNIT1_TYPE, tex->fmt);
+			break;
+		case 2:
+			GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT2_BORDER_COLOR, reg, 5);
+			GPUCMD_AddWrite(GPUREG_TEXUNIT2_TYPE, tex->fmt);
+			break;
+	}
+}
+
+
+
+
+
+
+
+
+static struct
+{
+	bool dirty;
+	int count;
+	float24Uniform_s* data;
+} C3Di_ShaderFVecData;
+
+static bool C3Di_FVUnifEverDirty[C3D_FVUNIF_COUNT];
+static bool C3Di_IVUnifEverDirty[C3D_IVUNIF_COUNT];
+
+static void C3D_UpdateUniforms(void)
+{
+	int i = 0;
+
+	// Update FVec uniforms that come from shader constants
+	if (C3Di_ShaderFVecData.dirty)
+	{
+		while (i < C3Di_ShaderFVecData.count)
+		{
+			float24Uniform_s* u = &C3Di_ShaderFVecData.data[i++];
+			GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)u, 4);
+			C3D_FVUnifDirty[u->id] = false;
+		}
+		C3Di_ShaderFVecData.dirty = false;
+		i = 0;
+	}
+
+	// Update FVec uniforms
+	while (i < C3D_FVUNIF_COUNT)
+	{
+		if (!C3D_FVUnifDirty[i])
+		{
+			i ++;
+			continue;
+		}
+
+		// Find the number of consecutive dirty uniforms
+		int j;
+		for (j = i; j < C3D_FVUNIF_COUNT && C3D_FVUnifDirty[j]; j ++);
+
+		// Upload the uniforms
+		GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG, 0x80000000|i);
+		GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA, (u32*)&C3D_FVUnif[i], (j-i)*4);
+
+		// Clear the dirty flag
+		int k;
+		for (k = i; k < j; k ++)
+		{
+			C3D_FVUnifDirty[k] = false;
+			C3Di_FVUnifEverDirty[k] = true;
+		}
+
+		// Advance
+		i = j;
+	}
+
+	// Update IVec uniforms
+	for (i = 0; i < C3D_IVUNIF_COUNT; i ++)
+	{
+		if (!C3D_IVUnifDirty[i]) continue;
+
+		GPUCMD_AddWrite(GPUREG_VSH_INTUNIFORM_I0+i, C3D_IVUnif[i]);
+		C3D_IVUnifDirty[i] = false;
+		C3Di_IVUnifEverDirty[i] = false;
+	}
+
+	// Update bool uniforms
+	if (C3D_BoolUnifsDirty)
+	{
+		GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000 | C3D_BoolUnifs);
+		C3D_BoolUnifsDirty = false;
+	}
+}
+
+static void C3Di_DirtyUniforms(void)
+{
+	int i;
+	C3D_BoolUnifsDirty = true;
+	if (C3Di_ShaderFVecData.count)
+		C3Di_ShaderFVecData.dirty = true;
+	for (i = 0; i < C3D_FVUNIF_COUNT; i ++)
+		C3D_FVUnifDirty[i] = C3D_FVUnifDirty[i] || C3Di_FVUnifEverDirty[i];
+	for (i = 0; i < C3D_IVUNIF_COUNT; i ++)
+		C3D_IVUnifDirty[i] = C3D_IVUnifDirty[i] || C3Di_IVUnifEverDirty[i];
+}
+
+static void C3Di_LoadShaderUniforms(shaderInstance_s* si)
+{
+	if (si->boolUniformMask)
+	{
+		C3D_BoolUnifs &= ~si->boolUniformMask;
+		C3D_BoolUnifs |= si->boolUniforms;
+	}
+
+	C3D_BoolUnifsDirty = true;
+
+	if (si->intUniformMask)
+	{
+		int i;
+		for (i = 0; i < 4; i ++)
+		{
+			if (si->intUniformMask & BIT(i))
+			{
+				C3D_IVUnif[i] = si->intUniforms[i];
+				C3D_IVUnifDirty[i] = true;
+			}
+		}
+	}
+	C3Di_ShaderFVecData.dirty = true;
+	C3Di_ShaderFVecData.count = si->numFloat24Uniforms;
+	C3Di_ShaderFVecData.data  = si->float24Uniforms;
+}
+
+
+
+
+
+
+
+
+static aptHookCookie hookCookie;
+
+static void C3Di_AptEventHook(APT_HookType hookType, void* param)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	switch (hookType)
+	{
+		case APTHOOK_ONSUSPEND:
+		{
+			C3Di_RenderQueueWaitDone();
+			C3Di_RenderQueueDisableVBlank();
+			break;
+		}
+		case APTHOOK_ONRESTORE:
+		{
+			C3Di_RenderQueueEnableVBlank();
+			ctx->flags |= C3DiF_AttrInfo | C3DiF_Effect | C3DiF_FrameBuf
+				| C3DiF_Viewport | C3DiF_Scissor | C3DiF_Program | C3DiF_VshCode | C3DiF_GshCode
+				| C3DiF_TexAll | C3DiF_TexEnvBuf | C3DiF_Gas | C3DiF_Reset;
+
+			C3Di_DirtyUniforms();
+
+			if (ctx->fogLut)
+				ctx->flags |= C3DiF_FogLut;
+			break;
+		}
+		default:
+			break;
+	}
+}
+
+static bool C3D_Init(size_t cmdBufSize)
+{
+	int i;
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (ctx->flags & C3DiF_Active)
+		return false;
+
+	cmdBufSize = (cmdBufSize + 0xF) &~ 0xF; // 0x10-byte align
+	ctx->cmdBufSize = cmdBufSize/4;
+	ctx->cmdBuf = (u32*)linearAlloc(cmdBufSize);
+	if (!ctx->cmdBuf)
+		return false;
+
+	ctx->gxQueue.maxEntries = 32;
+	ctx->gxQueue.entries = (gxCmdEntry_s*)malloc(ctx->gxQueue.maxEntries*sizeof(gxCmdEntry_s));
+	if (!ctx->gxQueue.entries)
+	{
+		linearFree(ctx->cmdBuf);
+		return false;
+	}
+
+	ctx->flags = C3DiF_Active | C3DiF_TexEnvBuf | C3DiF_Effect | C3DiF_TexStatus | C3DiF_TexAll | C3DiF_Reset;
+
+	// TODO: replace with direct struct access
+	C3D_DepthMap(true, -1.0f, 0.0f);
+	C3D_CullFace(GPU_CULL_BACK_CCW);
+	C3D_StencilTest();
+	C3D_StencilOp();
+	C3D_EarlyDepthTest(false, GPU_EARLYDEPTH_GREATER, 0);
+	C3D_DepthTest(true, GPU_GREATER, GPU_WRITE_ALL);
+	C3D_AlphaTest(false, GPU_ALWAYS, 0x00);
+	C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
+	C3D_FragOpMode(GPU_FRAGOPMODE_GL);
+	C3D_FragOpShadow(0.0, 1.0);
+
+	ctx->texConfig = BIT(12);
+	ctx->texEnvBuf = 0;
+	ctx->texEnvBufClr = 0xFFFFFFFF;
+	ctx->fogClr = 0;
+	ctx->fogLut = NULL;
+
+	for (i = 0; i < 3; i ++)
+		ctx->tex[i] = NULL;
+
+	C3Di_RenderQueueInit();
+	aptHook(&hookCookie, C3Di_AptEventHook, NULL);
+
+	return true;
+}
+
+static void C3D_SetViewport(u32 x, u32 y, u32 w, u32 h)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	ctx->flags |= C3DiF_Viewport | C3DiF_Scissor;
+	ctx->viewport[0] = f32tof24(w / 2.0f);
+	ctx->viewport[1] = f32tof31(2.0f / w) << 1;
+	ctx->viewport[2] = f32tof24(h / 2.0f);
+	ctx->viewport[3] = f32tof31(2.0f / h) << 1;
+	ctx->viewport[4] = (y << 16) | (x & 0xFFFF);
+	ctx->scissor[0] = GPU_SCISSOR_DISABLE;
+}
+
+static void C3D_SetScissor(GPU_SCISSORMODE mode, u32 left, u32 top, u32 right, u32 bottom)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	ctx->flags |= C3DiF_Scissor;
+	ctx->scissor[0] = mode;
+	if (mode == GPU_SCISSOR_DISABLE) return;
+	ctx->scissor[1] = (top << 16) | (left & 0xFFFF);
+	ctx->scissor[2] = ((bottom-1) << 16) | ((right-1) & 0xFFFF);
+}
+
+static void C3Di_UpdateContext(void)
+{
+	int i;
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (ctx->flags & C3DiF_FrameBuf)
+	{
+		ctx->flags &= ~C3DiF_FrameBuf;
+		if (ctx->flags & C3DiF_DrawUsed)
+		{
+			ctx->flags &= ~C3DiF_DrawUsed;
+			GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 1);
+			GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1);
+		}
+		C3Di_FrameBufBind(&ctx->fb);
+	}
+
+	if (ctx->flags & C3DiF_Viewport)
+	{
+		ctx->flags &= ~C3DiF_Viewport;
+		GPUCMD_AddIncrementalWrites(GPUREG_VIEWPORT_WIDTH, ctx->viewport, 4);
+		GPUCMD_AddWrite(GPUREG_VIEWPORT_XY, ctx->viewport[4]);
+	}
+
+	if (ctx->flags & C3DiF_Scissor)
+	{
+		ctx->flags &= ~C3DiF_Scissor;
+		GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, ctx->scissor, 3);
+	}
+
+	if (ctx->flags & C3DiF_Program)
+	{
+		shaderProgramConfigure(ctx->program, (ctx->flags & C3DiF_VshCode) != 0, (ctx->flags & C3DiF_GshCode) != 0);
+		ctx->flags &= ~(C3DiF_Program | C3DiF_VshCode | C3DiF_GshCode);
+	}
+
+	if (ctx->flags & C3DiF_AttrInfo)
+	{
+		ctx->flags &= ~C3DiF_AttrInfo;
+		C3Di_AttrInfoBind(&ctx->attrInfo);
+	}
+
+	if (ctx->flags & C3DiF_Effect)
+	{
+		ctx->flags &= ~C3DiF_Effect;
+		C3Di_EffectBind(&ctx->effect);
+	}
+
+	if (ctx->flags & C3DiF_TexAll)
+	{
+		u32 units = 0;
+		for (i = 0; i < 3; i ++)
+		{
+			if (ctx->tex[i])
+			{
+				units |= BIT(i);
+				if (ctx->flags & C3DiF_Tex(i))
+					C3Di_SetTex(i, ctx->tex[i]);
+			}
+		}
+
+		// Enable texture units and clear texture cache
+		ctx->texConfig &= ~7;
+		ctx->texConfig |= units | BIT(16);
+		ctx->flags &= ~C3DiF_TexAll;
+		ctx->flags |= C3DiF_TexStatus;
+	}
+
+	if (ctx->flags & C3DiF_TexStatus)
+	{
+		ctx->flags &= ~C3DiF_TexStatus;
+		GPUCMD_AddMaskedWrite(GPUREG_TEXUNIT_CONFIG, 0xB, ctx->texConfig);
+		// Clear texture cache if requested *after* configuring texture units
+		if (ctx->texConfig & BIT(16))
+		{
+			ctx->texConfig &= ~BIT(16);
+			GPUCMD_AddMaskedWrite(GPUREG_TEXUNIT_CONFIG, 0x4, BIT(16));
+		}
+		GPUCMD_AddWrite(GPUREG_TEXUNIT0_SHADOW, BIT(0));
+	}
+
+	if (ctx->flags & C3DiF_TexEnvBuf)
+	{
+		ctx->flags &= ~C3DiF_TexEnvBuf;
+		GPUCMD_AddMaskedWrite(GPUREG_TEXENV_UPDATE_BUFFER, 0x7, ctx->texEnvBuf);
+		GPUCMD_AddWrite(GPUREG_TEXENV_BUFFER_COLOR, ctx->texEnvBufClr);
+		GPUCMD_AddWrite(GPUREG_FOG_COLOR, ctx->fogClr);
+	}
+
+	if ((ctx->flags & C3DiF_FogLut) && (ctx->texEnvBuf&7) != GPU_NO_FOG)
+	{
+		ctx->flags &= ~C3DiF_FogLut;
+		if (ctx->fogLut)
+		{
+			GPUCMD_AddWrite(GPUREG_FOG_LUT_INDEX, 0);
+			GPUCMD_AddWrites(GPUREG_FOG_LUT_DATA0, ctx->fogLut->data, 128);
+		}
+	}
+
+	if (ctx->flags & C3DiF_Reset)
+	{
+		// Reset texture environment
+		C3D_TexEnv texEnv;
+		C3D_TexEnvInit(&texEnv);
+		for (i = 0; i < 6; i++)
+		{
+			C3Di_TexEnvBind(i, &texEnv);
+		}
+
+		// Reset lighting
+		GPUCMD_AddWrite(GPUREG_LIGHTING_ENABLE0, false);
+		GPUCMD_AddWrite(GPUREG_LIGHTING_ENABLE1,  true);
+
+		// Reset attirubte buffer info
+		C3D_BufCfg buffers[12] = { 0 };
+		GPUCMD_AddWrite(GPUREG_ATTRIBBUFFERS_LOC, BUFFER_BASE_PADDR >> 3);
+		GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFER0_OFFSET, (u32*)buffers, 12 * 3);
+
+		ctx->flags &= ~C3DiF_Reset;
+	}
+
+	C3D_UpdateUniforms();
+}
+
+static bool C3Di_SplitFrame(u32** pBuf, u32* pSize)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!gpuCmdBufOffset)
+		return false; // Nothing was drawn
+
+	if (ctx->flags & C3DiF_DrawUsed)
+	{
+		ctx->flags &= ~C3DiF_DrawUsed;
+		GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 1);
+		GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 1);
+		GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1);
+	}
+
+	GPUCMD_Split(pBuf, pSize);
+	return true;
+}
+
+static void C3D_Fini(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	aptUnhook(&hookCookie);
+	C3Di_RenderQueueExit();
+	free(ctx->gxQueue.entries);
+	linearFree(ctx->cmdBuf);
+	ctx->flags = 0;
+}
+
+static void C3D_BindProgram(shaderProgram_s* program)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	shaderProgram_s* oldProg = ctx->program;
+	if (oldProg != program)
+	{
+		ctx->program = program;
+		ctx->flags |= C3DiF_Program | C3DiF_AttrInfo;
+
+		if (!oldProg)
+			ctx->flags |= C3DiF_VshCode | C3DiF_GshCode;
+		else
+		{
+			DVLP_s* oldProgV = oldProg->vertexShader->dvle->dvlp;
+			DVLP_s* newProgV = program->vertexShader->dvle->dvlp;
+
+			if (oldProgV != newProgV)
+				ctx->flags |= C3DiF_VshCode | C3DiF_GshCode;
+		}
+	}
+
+	C3Di_LoadShaderUniforms(program->vertexShader);
+}
diff --git a/third_party/gldc/LICENSE b/third_party/gldc/LICENSE
new file mode 100644
index 0000000..e5f645c
--- /dev/null
+++ b/third_party/gldc/LICENSE
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2018, Luke Benstead
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/gldc/Makefile b/third_party/gldc/Makefile
new file mode 100644
index 0000000..09f3181
--- /dev/null
+++ b/third_party/gldc/Makefile
@@ -0,0 +1,26 @@
+SOURCE_DIRS		:= src src/yalloc
+
+C_FILES := $(foreach dir,$(SOURCE_DIRS),$(wildcard $(dir)/*.c))
+OBJS 	:= $(notdir $(C_FILES:%.c=%.o))
+
+C_FLAGS = -O3 -DNDEBUG -mfsrra -mfsca -fno-math-errno -ffp-contract=fast -ffast-math -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions -ml -m4-single-only -ffunction-sections -fdata-sections -std=gnu99
+
+C_DEFINES = -DDREAMCAST -DNDEBUG -D__DREAMCAST__ -D__arch_dreamcast -D_arch_dreamcast -D_arch_sub_pristine
+
+TARGET := libGLdc.a
+
+ifeq ($(strip $(KOS_BASE)),)
+$(error "Please set KOS variables in your environment.")
+endif
+
+default: $(TARGET)
+
+%.o: src/%.c
+	kos-cc $(C_DEFINES) $(C_FLAGS)  -c $< -o $@
+
+%.o: src/yalloc/%.c
+	kos-cc $(C_DEFINES) $(C_FLAGS) -c $< -o $@
+
+$(TARGET): $(OBJS)
+	kos-ar cr $@ $^
+	kos-ranlib $@
\ No newline at end of file
diff --git a/third_party/gldc/README.md b/third_party/gldc/README.md
new file mode 100644
index 0000000..aedd739
--- /dev/null
+++ b/third_party/gldc/README.md
@@ -0,0 +1,66 @@
+This is a fork of GLdc optimised for the Dreamcast port of ClassiCube, and unfortunately is essentially useless for any other project
+
+---
+
+# GLdc
+
+**Development of GLdc has moved to [Gitlab](https://gitlab.com/simulant/GLdc)**
+
+This is a partial implementation of OpenGL 1.2 for the SEGA Dreamcast for use
+with the KallistiOS SDK.
+
+It began as a fork of libGL by Josh Pearson but has undergone a large refactor
+which is essentially a rewrite.
+
+The aim is to implement as much of OpenGL 1.2 as possible, and to add additional
+features via extensions.
+
+Things left to (re)implement:
+
+ - Spotlights (Trivial)
+ - Framebuffer extension (Trivial)
+ - Texture Matrix (Trivial)
+ 
+Things I'd like to do:
+
+ - Use a clean "gl.h"
+ - Define an extension for modifier volumes
+ - Add support for point sprites
+ - Optimise, add unit tests for correctness
+
+# Compiling
+
+GLdc uses CMake for its build system, it currently ships with two "backends":
+
+ - kospvr - This is the hardware-accelerated Dreamcast backend
+ - software - This is a stub software rasterizer used for testing testing and debugging
+ 
+To compile a Dreamcast debug build, you'll want to do something like the following:
+
+```
+mkdir dcbuild
+cd dcbuild
+cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" ..
+make
+```
+
+For a release build, replace the cmake line with with the following:
+```
+cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
+```
+
+You will need KallistiOS compiled and configured (e.g. the KOS_BASE environment
+variable must be set)
+
+To compile for PC:
+
+```
+mkdir pcbuild
+cd pcbuild
+cmake -G "Unix Makefiles" ..
+make
+```
+ 
+# Special Thanks!
+
+ - Massive shout out to Hayden Kowalchuk for diagnosing and fixing a large number of bugs while porting GL Quake to the Dreamcast. Absolute hero!  
diff --git a/third_party/gldc/src/aligned_vector.h b/third_party/gldc/src/aligned_vector.h
new file mode 100644
index 0000000..7b152db
--- /dev/null
+++ b/third_party/gldc/src/aligned_vector.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <malloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+#define AV_FORCE_INLINE static inline
+#else
+#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function))
+#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline))
+#define AV_FORCE_INLINE static AV_INLINE_DEBUG
+#endif
+
+
+#ifdef __DREAMCAST__
+#include <kos/string.h>
+
+AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
+{
+  if(!len)
+  {
+    return dest;
+  }
+
+  const uint8_t *s = (uint8_t *)src;
+  uint8_t *d = (uint8_t *)dest;
+
+  uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
+  // Underflow would be like adding a negative offset
+
+  // Can use 'd' as a scratch reg now
+  asm volatile (
+    "clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
+  ".align 2\n"
+  "0:\n\t"
+    "dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
+    "mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
+    "bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
+    " mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
+    : [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
+    : [offset] "z" (diff) // inputs
+    : "t", "memory" // clobbers
+  );
+
+  return dest;
+}
+
+#else
+#define AV_MEMCPY4 memcpy
+#endif
+#define AV_ELEMENT_SIZE 32
+
+typedef struct {
+    uint32_t size;
+    uint32_t capacity;
+} __attribute__((aligned(32))) AlignedVectorHeader;
+
+typedef struct {
+    AlignedVectorHeader hdr;
+    uint8_t* data;
+} AlignedVector;
+
+#define ALIGNED_VECTOR_CHUNK_SIZE 256u
+
+
+#define ROUND_TO_CHUNK_SIZE(v) \
+    ((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
+
+
+AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) {
+    const AlignedVectorHeader* hdr = &vector->hdr;
+    assert(index < hdr->size);
+    return vector->data + (index * AV_ELEMENT_SIZE);
+}
+
+AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) {
+    AlignedVectorHeader* hdr = &vector->hdr;
+    uint32_t original_byte_size = (hdr->size * AV_ELEMENT_SIZE);
+
+    if(element_count < hdr->capacity) {
+        return vector->data + original_byte_size;
+    }
+
+    /* We overallocate so that we don't make small allocations during push backs */
+    element_count = ROUND_TO_CHUNK_SIZE(element_count);
+
+    uint32_t new_byte_size = (element_count * AV_ELEMENT_SIZE);
+    uint8_t* original_data = vector->data;
+
+    vector->data = (uint8_t*) memalign(0x20, new_byte_size);
+    assert(vector->data);
+
+    AV_MEMCPY4(vector->data, original_data, original_byte_size);
+    free(original_data);
+
+    hdr->capacity = element_count;
+    return vector->data + original_byte_size;
+}
+
+AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) {
+    return (AlignedVectorHeader*) &vector->hdr;
+}
+
+AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) {
+    const AlignedVectorHeader* hdr = &vector->hdr;
+    return hdr->size;
+}
+
+AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) {
+    const AlignedVectorHeader* hdr = &vector->hdr;
+    return hdr->capacity;
+}
+
+AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) {
+    return vector->data;
+}
+
+#define av_assert(x) \
+    do {\
+        if(!(x)) {\
+            fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
+            exit(1);\
+        }\
+    } while(0); \
+
+/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */
+AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) {
+    void* ret = NULL;
+
+    AlignedVectorHeader* hdr = &vector->hdr;
+    uint32_t previous_count = hdr->size;
+    if(hdr->capacity <= element_count) {
+        /* If we didn't have capacity, increase capacity (slow) */
+
+        aligned_vector_reserve(vector, element_count);
+        hdr->size = element_count;
+
+        ret = aligned_vector_at(vector, previous_count);
+
+        av_assert(hdr->size == element_count);
+        av_assert(hdr->size <= hdr->capacity);
+    } else if(previous_count < element_count) {
+        /* So we grew, but had the capacity, just get a pointer to
+         * where we were */
+        hdr->size = element_count;
+        av_assert(hdr->size < hdr->capacity);
+        ret = aligned_vector_at(vector, previous_count);
+    } else if(hdr->size != element_count) {
+        hdr->size = element_count;
+        av_assert(hdr->size < hdr->capacity);
+    }
+
+    return ret;
+}
+
+AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) {
+    /* Resize enough room */
+    AlignedVectorHeader* hdr = &vector->hdr;
+
+    assert(count);
+#ifndef NDEBUG
+    uint32_t initial_size = hdr->size;
+#endif
+
+    uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count);
+    assert(dest);
+
+    /* Copy the objects in */
+    AV_MEMCPY4(dest, objs, count * AV_ELEMENT_SIZE);
+
+    assert(hdr->size == initial_size + count);
+    return dest;
+}
+
+
+AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) {
+    AlignedVectorHeader* hdr = &vector->hdr;
+    void* ret = aligned_vector_resize(vector, hdr->size + additional_count);
+    assert(ret);  // Should always return something
+    return ret;
+}
+
+AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
+    AlignedVectorHeader* hdr = &vector->hdr;
+    hdr->size = 0;
+}
+
+AV_FORCE_INLINE void aligned_vector_init(AlignedVector* vector) {
+    /* Now initialize the header*/
+    AlignedVectorHeader* const hdr = &vector->hdr;
+    hdr->size = 0;
+    hdr->capacity = 0;
+    vector->data = NULL;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/third_party/gldc/src/flush.c b/third_party/gldc/src/flush.c
new file mode 100644
index 0000000..f7328bd
--- /dev/null
+++ b/third_party/gldc/src/flush.c
@@ -0,0 +1,66 @@
+#include <stdbool.h>
+#include "aligned_vector.h"
+#include "private.h"
+
+PolyList OP_LIST;
+PolyList PT_LIST;
+PolyList TR_LIST;
+
+/**
+ *  FAST_MODE will use invW for all Z coordinates sent to the
+ *  GPU.
+ *
+ *  This will break orthographic mode so default is FALSE
+ **/
+
+#define FAST_MODE GL_FALSE
+
+void glKosInit() {
+    TRACE();
+
+    _glInitContext();
+    _glInitTextures();
+
+    OP_LIST.list_type = PVR_LIST_OP_POLY;
+    PT_LIST.list_type = PVR_LIST_PT_POLY;
+    TR_LIST.list_type = PVR_LIST_TR_POLY;
+
+    aligned_vector_init(&OP_LIST.vector);
+    aligned_vector_init(&PT_LIST.vector);
+    aligned_vector_init(&TR_LIST.vector);
+
+    aligned_vector_reserve(&OP_LIST.vector, 1024 * 3);
+    aligned_vector_reserve(&PT_LIST.vector,  512 * 3);
+    aligned_vector_reserve(&TR_LIST.vector, 1024 * 3);
+}
+
+
+void glKosSwapBuffers() {
+    TRACE();
+    
+    pvr_scene_begin();   
+        if(aligned_vector_size(&OP_LIST.vector) > 2) {
+            pvr_list_begin(PVR_LIST_OP_POLY);
+            SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector));
+            pvr_list_finish();
+        }
+
+        if(aligned_vector_size(&PT_LIST.vector) > 2) {
+            pvr_list_begin(PVR_LIST_PT_POLY);
+            SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector));
+            pvr_list_finish();
+        }
+
+        if(aligned_vector_size(&TR_LIST.vector) > 2) {
+            pvr_list_begin(PVR_LIST_TR_POLY);
+            SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector));
+            pvr_list_finish();
+        }        
+    pvr_scene_finish();
+    
+    aligned_vector_clear(&OP_LIST.vector);
+    aligned_vector_clear(&PT_LIST.vector);
+    aligned_vector_clear(&TR_LIST.vector);
+
+    _glApplyScissor(true);
+}
diff --git a/third_party/gldc/src/private.h b/third_party/gldc/src/private.h
new file mode 100644
index 0000000..ed80647
--- /dev/null
+++ b/third_party/gldc/src/private.h
@@ -0,0 +1,187 @@
+#ifndef PRIVATE_H
+#define PRIVATE_H
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "sh4.h"
+#include "types.h"
+#include "aligned_vector.h"
+
+#define MAX_TEXTURE_COUNT 768
+
+
+#define GL_SCISSOR_TEST     0x0008
+#define GL_NEAREST          0x2600
+#define GL_LINEAR           0x2601
+#define GL_OUT_OF_MEMORY    0x0505
+
+#define GLushort   unsigned short
+#define GLuint     unsigned int
+#define GLenum     unsigned int
+#define GLubyte    unsigned char
+#define GLboolean  unsigned char
+
+#define GL_FALSE   0
+#define GL_TRUE    1
+
+
+void glClearDepth(float depth);
+
+GLuint gldcGenTexture(void);
+void   gldcDeleteTexture(GLuint texture);
+void   gldcBindTexture(GLuint texture);
+
+/* Loads texture from SH4 RAM into PVR VRAM */
+int  gldcAllocTexture(int w, int h, int format);
+void gldcGetTexture(void** data, int* width, int* height);
+
+void glViewport(int x, int y, int width, int height);
+void glScissor( int x, int y, int width, int height);
+
+void glKosInit();
+void glKosSwapBuffers();
+
+
+extern void* memcpy4 (void *dest, const void *src, size_t count);
+
+#define GL_NO_INSTRUMENT inline __attribute__((no_instrument_function))
+#define GL_INLINE_DEBUG GL_NO_INSTRUMENT __attribute__((always_inline))
+#define GL_FORCE_INLINE static GL_INLINE_DEBUG
+#define _GL_UNUSED(x) (void)(x)
+
+#define TRACE_ENABLED 0
+#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);} (void) 0
+
+typedef struct {
+    unsigned int flags;      /* Constant PVR_CMD_USERCLIP */
+    unsigned int d1, d2, d3; /* Ignored for this type */
+    unsigned int sx,         /* Start x */
+             sy,         /* Start y */
+             ex,         /* End x */
+             ey;         /* End y */
+} PVRTileClipCommand; /* Tile Clip command for the pvr */
+
+typedef struct {
+    unsigned int list_type;
+    AlignedVector vector;
+} PolyList;
+
+typedef struct {
+    float x_plus_hwidth;
+    float y_plus_hheight;
+    float hwidth;  /* width * 0.5f */
+    float hheight; /* height * 0.5f */
+} Viewport;
+
+extern Viewport VIEWPORT;
+
+typedef struct {
+    //0
+    GLuint   index;
+    GLuint   color; /* This is the PVR texture format */
+    //8
+    GLenum minFilter;
+    GLenum magFilter;
+    //16
+    void *data;
+    //20
+    GLushort width;
+    GLushort height;
+    // 24
+    GLushort  mipmap;  /* Bitmask of supplied mipmap levels */
+    // 26
+    GLubyte mipmap_bias;
+    GLubyte _pad3;
+    // 28
+    GLushort _pad0;
+    // 30
+    GLubyte _pad1;
+    GLubyte _pad2;
+} __attribute__((aligned(32))) TextureObject;
+
+
+GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) {
+#ifdef __DREAMCAST__
+    _Complex float double_scratch;
+
+    asm volatile (
+        "fschg\n\t"
+        "clrs\n\t"
+        ".align 2\n\t"
+        "fmov.d @%[in]+, %[scratch]\n\t"
+        "fmov.d %[scratch], @%[out]\n\t"
+        "fmov.d @%[in]+, %[scratch]\n\t"
+        "add #8, %[out]\n\t"
+        "fmov.d %[scratch], @%[out]\n\t"
+        "fmov.d @%[in]+, %[scratch]\n\t"
+        "add #8, %[out]\n\t"
+        "fmov.d %[scratch], @%[out]\n\t"
+        "fmov.d @%[in], %[scratch]\n\t"
+        "add #8, %[out]\n\t"
+        "fmov.d %[scratch], @%[out]\n\t"
+        "fschg\n"
+        : [in] "+&r" ((uint32_t) src), [scratch] "=&d" (double_scratch), [out] "+&r" ((uint32_t) dest)
+        :
+        : "t", "memory" // clobbers
+    );
+#else
+    *dest = *src;
+#endif
+}
+
+void _glInitContext();
+void _glInitSubmissionTarget();
+void _glInitTextures();
+
+extern TextureObject* TEXTURE_ACTIVE;
+extern GLboolean TEXTURES_ENABLED;
+
+extern GLboolean DEPTH_TEST_ENABLED;
+extern GLboolean DEPTH_MASK_ENABLED;
+
+extern GLboolean CULLING_ENABLED;
+
+extern GLboolean FOG_ENABLED;
+extern GLboolean ALPHA_TEST_ENABLED;
+extern GLboolean BLEND_ENABLED;
+
+extern GLboolean SCISSOR_TEST_ENABLED;
+extern GLenum SHADE_MODEL;
+extern GLboolean AUTOSORT_ENABLED;
+
+
+extern PolyList OP_LIST;
+extern PolyList PT_LIST;
+extern PolyList TR_LIST;
+
+GL_FORCE_INLINE PolyList* _glActivePolyList() {
+    if(BLEND_ENABLED) {
+        return &TR_LIST;
+    } else if(ALPHA_TEST_ENABLED) {
+        return &PT_LIST;
+    } else {
+        return &OP_LIST;
+    }
+}
+
+/* Memory allocation extension (GL_KOS_texture_memory_management) */
+void glDefragmentTextureMemory_KOS(void);
+
+GLuint _glFreeTextureMemory(void);
+GLuint _glUsedTextureMemory(void);
+GLuint _glFreeContiguousTextureMemory(void);
+
+void _glApplyScissor(int force);
+
+extern GLboolean STATE_DIRTY;
+
+
+/* This is from KOS pvr_buffers.c */
+#define PVR_MIN_Z 0.0001f
+
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#define CLAMP( X, _MIN, _MAX )  ( (X)<(_MIN) ? (_MIN) : ((X)>(_MAX) ? (_MAX) : (X)) )
+
+#endif // PRIVATE_H
diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c
new file mode 100644
index 0000000..0dff66f
--- /dev/null
+++ b/third_party/gldc/src/sh4.c
@@ -0,0 +1,494 @@
+#include <math.h>
+#include "sh4.h"
+#include "sh4_math.h"
+
+#define CLIP_DEBUG 0
+
+#define likely(x)      __builtin_expect(!!(x), 1)
+#define unlikely(x)    __builtin_expect(!!(x), 0)
+
+#define SQ_BASE_ADDRESS (void*) 0xe0000000
+
+GL_FORCE_INLINE float _glFastInvert(float x) {
+    return MATH_fsrra(x * x);
+}
+
+GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
+    TRACE();
+
+    const float f = _glFastInvert(vertex->w);
+
+    /* Convert to NDC and apply viewport */
+    vertex->xyz[0] = (vertex->xyz[0] * f * VIEWPORT.hwidth)  + VIEWPORT.x_plus_hwidth;
+    vertex->xyz[1] = (vertex->xyz[1] * f * VIEWPORT.hheight) + VIEWPORT.y_plus_hheight;
+
+    /* Orthographic projections need to use invZ otherwise we lose
+    the depth information. As w == 1, and clip-space range is -w to +w
+    we add 1.0 to the Z to bring it into range. We add a little extra to
+    avoid a divide by zero.
+    */
+    if(vertex->w == 1.0f) {
+        vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]);
+    } else {
+        vertex->xyz[2] = f;
+    }
+}
+
+
+volatile uint32_t *sq = SQ_BASE_ADDRESS;
+
+static inline void _glFlushBuffer() {
+    TRACE();
+
+    /* Wait for both store queues to complete */
+    sq = (uint32_t*) 0xe0000000;
+    sq[0] = sq[8] = 0;
+}
+
+static inline void _glPushHeaderOrVertex(Vertex* v)  {
+    TRACE();
+
+    uint32_t* s = (uint32_t*) v;
+    sq[0] = *(s++);
+    sq[1] = *(s++);
+    sq[2] = *(s++);
+    sq[3] = *(s++);
+    sq[4] = *(s++);
+    sq[5] = *(s++);
+    sq[6] = *(s++);
+    sq[7] = *(s++);
+    __asm__("pref @%0" : : "r"(sq));
+    sq += 8;
+}
+
+static void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) {
+    const float d0 = v1->w + v1->xyz[2];
+    const float d1 = v2->w + v2->xyz[2];
+    const float t = (fabs(d0) * MATH_fsrra((d1 - d0) * (d1 - d0))) + 0.000001f;
+    const float invt = 1.0f - t;
+
+    vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
+    vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
+    vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
+
+    vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
+    vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
+
+    vout->w = invt * v1->w + t * v2->w;
+
+    vout->bgra[0] = invt * v1->bgra[0] + t * v2->bgra[0];
+    vout->bgra[1] = invt * v1->bgra[1] + t * v2->bgra[1];
+    vout->bgra[2] = invt * v1->bgra[2] + t * v2->bgra[2];
+    vout->bgra[3] = invt * v1->bgra[3] + t * v2->bgra[3];
+}
+
+#define SPAN_SORT_CFG 0x005F8030
+static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
+static volatile uint32_t* PVR_LMMODE1 = (uint32_t*) 0xA05F6888;
+static volatile uint32_t* QACR = (uint32_t*) 0xFF000038;
+
+#define V0_VIS (1 << 0)
+#define V1_VIS (1 << 1)
+#define V2_VIS (1 << 2)
+#define V3_VIS (1 << 3)
+
+
+// https://casual-effects.com/research/McGuire2011Clipping/clip.glsl
+static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_t visible_mask) {
+    Vertex __attribute__((aligned(32))) scratch[4];
+    Vertex* a = &scratch[0];
+    Vertex* b = &scratch[1];
+
+    switch(visible_mask) {
+    case V0_VIS:
+    {
+        //          v0
+        //         / |
+        //       /   |
+        // .....A....B...
+        //    /      |
+        //  v3--v2---v1
+        _glClipEdge(v3, v0, a);
+        a->flags = PVR_CMD_VERTEX_EOL;
+        _glClipEdge(v0, v1, b);
+        b->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(v0);
+        _glPushHeaderOrVertex(v0);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+    }
+    break;
+    case V1_VIS:
+    {
+        //          v1
+        //         / |
+        //       /   |
+        // ....A.....B...
+        //    /      |
+        //  v0--v3---v2
+        _glClipEdge(v0, v1, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v1, v2, b);
+        b->flags = PVR_CMD_VERTEX_EOL;
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(v1);
+        _glPushHeaderOrVertex(v1);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    case V2_VIS:
+    {
+        //          v2
+        //         / |
+        //       /   |
+        // ....A.....B...
+        //    /      |
+        //  v1--v0---v3
+
+        _glClipEdge(v1, v2, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v2, v3, b);
+        b->flags = PVR_CMD_VERTEX_EOL;
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(v2);
+        _glPushHeaderOrVertex(v2);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    case V3_VIS:
+    {
+        //          v3
+        //         / |
+        //       /   |
+        // ....A.....B...
+        //    /      |
+        //  v2--v1---v0
+        _glClipEdge(v2, v3, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v3, v0, b);
+        b->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(v3);
+        _glPushHeaderOrVertex(v3);
+    }
+    break;
+    case V0_VIS | V1_VIS:
+    {
+        //    v0-----------v1
+        //      \           |
+        //   ....B..........A...
+        //         \        |
+        //          v3-----v2
+        _glClipEdge(v1, v2, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v3, v0, b);
+        b->flags = PVR_CMD_VERTEX_EOL;
+
+        _glPerspectiveDivideVertex(v1);
+        _glPushHeaderOrVertex(v1);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(v0);
+        _glPushHeaderOrVertex(v0);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    // case V0_VIS | V2_VIS: degenerate case that should never happen
+    case V0_VIS | V3_VIS:
+    {
+        //    v3-----------v0
+        //      \           |
+        //   ....B..........A...
+        //         \        |
+        //          v2-----v1
+        _glClipEdge(v0, v1, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v2, v3, b);
+        b->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+
+        _glPerspectiveDivideVertex(v0);
+        _glPushHeaderOrVertex(v0);
+
+        _glPerspectiveDivideVertex(v3);
+        _glPushHeaderOrVertex(v3);
+    } break;
+    case V1_VIS | V2_VIS:
+    {
+        //    v1-----------v2
+        //      \           |
+        //   ....B..........A...
+        //         \        |
+        //          v0-----v3
+        _glClipEdge(v2, v3, a);
+        a->flags = PVR_CMD_VERTEX_EOL;
+        _glClipEdge(v0, v1, b);
+        b->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(v1);
+        _glPushHeaderOrVertex(v1);
+
+        _glPerspectiveDivideVertex(v2);
+        _glPushHeaderOrVertex(v2);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+    } break;
+    // case V1_VIS | V3_VIS: degenerate case that should never happen
+    case V2_VIS | V3_VIS:
+    {
+        //    v2-----------v3
+        //      \           |
+        //   ....B..........A...
+        //         \        |
+        //          v1-----v0
+        _glClipEdge(v3, v0, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v1, v2, b);
+        b->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+
+        _glPerspectiveDivideVertex(v2);
+        _glPushHeaderOrVertex(v2);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(v3);
+        _glPushHeaderOrVertex(v3);
+    } break;
+    case V0_VIS | V1_VIS | V2_VIS:
+    {
+        //        --v1--
+        //    v0--      --v2
+        //      \        |
+        //   .....B.....A...
+        //          \   |
+        //            v3
+        // v1,v2,v0  v2,v0,A  v0,A,B
+        _glClipEdge(v2, v3, a);
+        a->flags = PVR_CMD_VERTEX;
+        _glClipEdge(v3, v0, b);
+        b->flags = PVR_CMD_VERTEX_EOL;
+
+        _glPerspectiveDivideVertex(v1);
+        _glPushHeaderOrVertex(v1);
+
+        _glPerspectiveDivideVertex(v2);
+        _glPushHeaderOrVertex(v2);
+
+        _glPerspectiveDivideVertex(v0);
+        _glPushHeaderOrVertex(v0);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    case V0_VIS | V1_VIS | V3_VIS:
+    {
+        //        --v0--
+        //    v3--      --v1
+        //      \        |
+        //   .....B.....A...
+        //          \   |
+        //            v2
+        // v0,v1,v3  v1,v3,A  v3,A,B
+        _glClipEdge(v1, v2, a);
+        a->flags  = PVR_CMD_VERTEX;
+        _glClipEdge(v2, v3, b);
+        b->flags  = PVR_CMD_VERTEX_EOL;
+        v3->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(v0);
+        _glPushHeaderOrVertex(v0);
+
+        _glPerspectiveDivideVertex(v1);
+        _glPushHeaderOrVertex(v1);
+
+        _glPerspectiveDivideVertex(v3);
+        _glPushHeaderOrVertex(v3);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    case V0_VIS | V2_VIS | V3_VIS:
+    {
+        //        --v3--
+        //    v2--      --v0
+        //      \        |
+        //   .....B.....A...
+        //          \   |
+        //            v1
+        // v3,v0,v2  v0,v2,A  v2,A,B
+        _glClipEdge(v0, v1, a);
+        a->flags  = PVR_CMD_VERTEX;
+        _glClipEdge(v1, v2, b);
+        b->flags  = PVR_CMD_VERTEX_EOL;
+        v3->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(v3);
+        _glPushHeaderOrVertex(v3);
+
+        _glPerspectiveDivideVertex(v0);
+        _glPushHeaderOrVertex(v0);
+
+        _glPerspectiveDivideVertex(v2);
+        _glPushHeaderOrVertex(v2);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    case V1_VIS | V2_VIS | V3_VIS:
+    {
+        //        --v2--
+        //    v1--      --v3
+        //      \        |
+        //   .....B.....A...
+        //          \   |
+        //            v0
+        // v2,v3,v1  v3,v1,A  v1,A,B
+        _glClipEdge(v3, v0, a);
+        a->flags  = PVR_CMD_VERTEX;
+        _glClipEdge(v0, v1, b);
+        b->flags  = PVR_CMD_VERTEX_EOL;
+        v3->flags = PVR_CMD_VERTEX;
+
+        _glPerspectiveDivideVertex(v2);
+        _glPushHeaderOrVertex(v2);
+
+        _glPerspectiveDivideVertex(v3);
+        _glPushHeaderOrVertex(v3);
+
+        _glPerspectiveDivideVertex(v1);
+        _glPushHeaderOrVertex(v1);
+
+        _glPerspectiveDivideVertex(a);
+        _glPushHeaderOrVertex(a);
+
+        _glPerspectiveDivideVertex(b);
+        _glPushHeaderOrVertex(b);
+    } break;
+    }
+}
+
+void SceneListSubmit(Vertex* v3, int n) {
+    TRACE();
+    /* You need at least a header, and 3 vertices to render anything */
+    if(n < 4) return;
+
+    PVR_SET(SPAN_SORT_CFG, 0x0);
+
+    //Set PVR DMA registers
+    *PVR_LMMODE0 = 0;
+    *PVR_LMMODE1 = 0;
+
+    //Set QACR registers
+	QACR[1] = QACR[0] = 0x11;
+
+#if CLIP_DEBUG
+    Vertex* vertex = (Vertex*) src;
+    for(int i = 0; i < n; ++i) {
+        fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
+    }
+
+    fprintf(stderr, "----\n");
+#endif
+    uint8_t visible_mask = 0;
+
+    sq = SQ_BASE_ADDRESS;
+
+    for(int i = 0; i < n; ++i, ++v3) {
+        PREFETCH(v3 + 1);
+        switch(v3->flags & 0xFF000000) {
+        case PVR_CMD_VERTEX_EOL:
+            break;
+        case PVR_CMD_VERTEX:
+            continue;
+        default:
+            _glPushHeaderOrVertex(v3);
+            continue;
+        };
+
+    // Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2}  {2, 3, 0}]
+        Vertex* const v0 = v3 - 3;
+        Vertex* const v1 = v3 - 2;
+        Vertex* const v2 = v3 - 1;
+
+        visible_mask = v3->flags & 0xFF;
+        v3->flags &= ~0xFF;
+        
+        // Stats gathering found that when testing a 64x64x64 sized world, at most
+        //   ~400-500 triangles needed clipping
+        //   ~13% of the triangles in a frame needed clipping (percentage increased when less triangles overall)
+        // Based on this, the decision was made to optimise for rendering quads there 
+        //  were either entirely visible or entirely culled, at the expensive at making
+        //  partially visible quads a bit slower due to needing to be split into two triangles first
+        // Performance measuring indicated that overall FPS improved from this change
+        //  to switching to try to process 1 quad instead of 2 triangles though
+
+        switch(visible_mask) {
+        case V0_VIS | V1_VIS | V2_VIS | V3_VIS: // All vertices visible
+        {
+            // Triangle strip: {1,2,0} {2,0,3}
+            _glPerspectiveDivideVertex(v1);
+            _glPushHeaderOrVertex(v1);
+
+            _glPerspectiveDivideVertex(v2);
+            _glPushHeaderOrVertex(v2);
+
+            _glPerspectiveDivideVertex(v0);
+            _glPushHeaderOrVertex(v0);
+
+            _glPerspectiveDivideVertex(v3);
+            _glPushHeaderOrVertex(v3);
+        }
+        break;
+        
+        default: // Some vertices visible
+            SubmitClipped(v0, v1, v2, v3, visible_mask);
+            break;
+        }
+    }
+
+    _glFlushBuffer();
+}
diff --git a/third_party/gldc/src/sh4.h b/third_party/gldc/src/sh4.h
new file mode 100644
index 0000000..fddb790
--- /dev/null
+++ b/third_party/gldc/src/sh4.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <kos.h>
+#include <dc/pvr.h>
+
+#include "private.h"
+#include "types.h"
+
+#ifndef GL_FORCE_INLINE
+#define GL_NO_INSTRUMENT inline __attribute__((no_instrument_function))
+#define GL_INLINE_DEBUG GL_NO_INSTRUMENT __attribute__((always_inline))
+#define GL_FORCE_INLINE static GL_INLINE_DEBUG
+#endif
+
+#define PREFETCH(addr) __builtin_prefetch((addr))
+
+GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {
+  if(!len) {
+    return dest;
+  }
+
+  const uint8_t *s = (uint8_t *)src;
+  uint8_t *d = (uint8_t *)dest;
+
+  uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
+  // Underflow would be like adding a negative offset
+
+  // Can use 'd' as a scratch reg now
+  asm volatile (
+    "clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
+  ".align 2\n"
+  "0:\n\t"
+    "dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
+    "mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
+    "bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
+    " mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
+    : [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
+    : [offset] "z" (diff) // inputs
+    : "t", "memory" // clobbers
+  );
+
+  return dest;
+}
+
+#define PT_ALPHA_REF 0x011c
+
+static inline void GPUSetAlphaCutOff(uint8_t val) {
+    PVR_SET(PT_ALPHA_REF, val);
+}
+
+typedef struct {
+    uint32_t cmd;
+    uint32_t mode1;
+    uint32_t mode2;
+    uint32_t mode3;
+    uint32_t d1;
+    uint32_t d2;
+    uint32_t d3;
+    uint32_t d4;
+} PolyHeader;
+
+void SceneListSubmit(Vertex* v2, int n);
+
+static inline int DimensionFlag(const int w) {
+    switch(w) {
+        case 16: return 1;
+        case 32: return 2;
+        case 64: return 3;
+        case 128: return 4;
+        case 256: return 5;
+        case 512: return 6;
+        case 1024: return 7;
+        case 8:
+        default:
+            return 0;
+    }
+}
diff --git a/third_party/gldc/src/sh4_math.h b/third_party/gldc/src/sh4_math.h
new file mode 100644
index 0000000..c12c30d
--- /dev/null
+++ b/third_party/gldc/src/sh4_math.h
@@ -0,0 +1,1820 @@
+// ---- sh4_math.h - SH7091 Math Module ----
+//
+// Version 1.1.3
+//
+// This file is part of the DreamHAL project, a hardware abstraction library
+// primarily intended for use on the SH7091 found in hardware such as the SEGA
+// Dreamcast game console.
+//
+// This math module is hereby released into the public domain in the hope that it
+// may prove useful. Now go hit 60 fps! :)
+//
+// --Moopthehedgehog
+//
+
+// Notes:
+// - GCC 4 users have a different return type for the fsca functions due to an
+//  internal compiler error regarding complex numbers; no issue under GCC 9.2.0
+// - Using -m4 instead of -m4-single-only completely breaks the matrix and
+//  vector operations
+// - Function inlining must be enabled and not blocked by compiler options such
+//  as -ffunction-sections, as blocking inlining will result in significant
+//  performance degradation for the vector and matrix functions employing a
+//  RETURN_VECTOR_STRUCT return type. I have added compiler hints and attributes
+//  "static inline __attribute__((always_inline))" to mitigate this, so in most
+//  cases the functions should be inlined regardless. If in doubt, check the
+//  compiler asm output!
+//
+
+#ifndef __SH4_MATH_H_
+#define __SH4_MATH_H_
+
+#define GNUC_FSCA_ERROR_VERSION 4
+
+//
+// Fast SH4 hardware math functions
+//
+//
+// High-accuracy users beware, the fsrra functions have an error of +/- 2^-21
+// per http://www.shared-ptr.com/sh_insns.html
+//
+
+//==============================================================================
+// Definitions
+//==============================================================================
+//
+// Structures, useful definitions, and reference comments
+//
+
+// Front matrix format:
+//
+//    FV0 FV4 FV8  FV12
+//    --- --- ---  ----
+//  [ fr0 fr4 fr8  fr12 ]
+//  [ fr1 fr5 fr9  fr13 ]
+//  [ fr2 fr6 fr10 fr14 ]
+//  [ fr3 fr7 fr11 fr15 ]
+//
+// Back matrix, XMTRX, is similar, although it has no FVn vector groups:
+//
+//  [ xf0 xf4 xf8  xf12 ]
+//  [ xf1 xf5 xf9  xf13 ]
+//  [ xf2 xf6 xf10 xf14 ]
+//  [ xf3 xf7 xf11 xf15 ]
+//
+
+typedef struct __attribute__((aligned(32))) {
+  float fr0;
+  float fr1;
+  float fr2;
+  float fr3;
+  float fr4;
+  float fr5;
+  float fr6;
+  float fr7;
+  float fr8;
+  float fr9;
+  float fr10;
+  float fr11;
+  float fr12;
+  float fr13;
+  float fr14;
+  float fr15;
+} ALL_FLOATS_STRUCT;
+
+// Return structs should be defined locally so that GCC optimizes them into
+// register usage instead of memory accesses.
+typedef struct {
+  float z1;
+  float z2;
+  float z3;
+  float z4;
+} RETURN_VECTOR_STRUCT;
+
+#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
+typedef struct {
+  float sine;
+  float cosine;
+} RETURN_FSCA_STRUCT;
+#endif
+
+// Identity Matrix
+//
+//    FV0 FV4 FV8 FV12
+//    --- --- --- ----
+//  [  1   0   0   0  ]
+//  [  0   1   0   0  ]
+//  [  0   0   1   0  ]
+//  [  0   0   0   1  ]
+//
+
+static const ALL_FLOATS_STRUCT MATH_identity_matrix = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f};
+
+// Constants
+#define MATH_pi 3.14159265358979323846264338327950288419716939937510f
+#define MATH_e 2.71828182845904523536028747135266249775724709369995f
+#define MATH_phi 1.61803398874989484820458683436563811772030917980576f
+
+//==============================================================================
+// Basic math functions
+//==============================================================================
+//
+// The following functions are available.
+// Please see their definitions for other usage info, otherwise they may not
+// work for you.
+//
+/*
+  // |x|
+  float MATH_fabs(float x)
+
+  // sqrt(x)
+  float MATH_fsqrt(float x)
+
+  // a*b+c
+  float MATH_fmac(float a, float b, float c)
+
+  // a*b-c
+  float MATH_fmac_Dec(float a, float b, float c)
+
+  // fminf() - return the min of two floats
+  // This doesn't check for NaN
+  float MATH_Fast_Fminf(float a, float b)
+
+  // fmaxf() - return the max of two floats
+  // This doesn't check for NaN
+  float MATH_Fast_Fmaxf(float a, float b)
+
+  // Fast floorf() - return the nearest integer <= x as a float
+  // This doesn't check for NaN
+  float MATH_Fast_Floorf(float x)
+
+  // Fast ceilf() - return the nearest integer >= x as a float
+  // This doesn't check for NaN
+  float MATH_Fast_Ceilf(float x)
+
+  // Very fast floorf() - return the nearest integer <= x as a float
+  // Inspired by a cool trick I came across here:
+  // https://www.codeproject.com/Tips/700780/Fast-floor-ceiling-functions
+  // This doesn't check for NaN
+  float MATH_Very_Fast_Floorf(float x)
+
+  // Very fast ceilf() - return the nearest integer >= x as a float
+  // Inspired by a cool trick I came across here:
+  // https://www.codeproject.com/Tips/700780/Fast-floor-ceiling-functions
+  // This doesn't check for NaN
+  float MATH_Very_Fast_Ceilf(float x)
+*/
+
+// |x|
+// This one works on ARM and x86, too!
+static inline __attribute__((always_inline)) float MATH_fabs(float x)
+{
+  asm volatile ("fabs %[floatx]\n"
+    : [floatx] "+f" (x) // outputs, "+" means r/w
+    : // no inputs
+    : // no clobbers
+  );
+
+  return x;
+}
+
+// sqrt(x)
+// This one works on ARM and x86, too!
+// NOTE: There is a much faster version (MATH_Fast_Sqrt()) in the fsrra section of
+// this file. Chances are you probably want that one.
+static inline __attribute__((always_inline)) float MATH_fsqrt(float x)
+{
+  asm volatile ("fsqrt %[floatx]\n"
+    : [floatx] "+f" (x) // outputs, "+" means r/w
+    : // no inputs
+    : // no clobbers
+  );
+
+  return x;
+}
+
+// a*b+c
+static inline __attribute__((always_inline)) float MATH_fmac(float a, float b, float c)
+{
+  asm volatile ("fmac fr0, %[floatb], %[floatc]\n"
+    : [floatc] "+f" (c) // outputs, "+" means r/w
+    : "w" (a), [floatb] "f" (b) // inputs
+    : // no clobbers
+  );
+
+  return c;
+}
+
+// a*b-c
+static inline __attribute__((always_inline)) float MATH_fmac_Dec(float a, float b, float c)
+{
+  asm volatile ("fneg %[floatc]\n\t"
+    "fmac fr0, %[floatb], %[floatc]\n"
+    : [floatc] "+&f" (c) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
+    : "w" (a), [floatb] "f" (b) // inputs
+    : // no clobbers
+  );
+
+  return c;
+}
+
+// Fast fminf() - return the min of two floats
+// This doesn't check for NaN
+static inline __attribute__((always_inline)) float MATH_Fast_Fminf(float a, float b)
+{
+  float output_float;
+
+  asm volatile (
+    "fcmp/gt %[floata], %[floatb]\n\t" // b > a (NaN evaluates to !GT; 0 -> T)
+    "bt.s 1f\n\t" // yes, a is smaller
+    " fmov %[floata], %[float_out]\n\t" // so return a
+    "fmov %[floatb], %[float_out]\n" // no, either b is smaller or they're equal and it doesn't matter
+  "1:\n"
+    : [float_out] "=&f" (output_float) // outputs
+    : [floata] "f" (a), [floatb] "f" (b) // inputs
+    : "t" // clobbers
+  );
+
+  return output_float;
+}
+
+// Fast fmaxf() - return the max of two floats
+// This doesn't check for NaN
+static inline __attribute__((always_inline)) float MATH_Fast_Fmaxf(float a, float b)
+{
+  float output_float;
+
+  asm volatile (
+    "fcmp/gt %[floata], %[floatb]\n\t" // b > a (NaN evaluates to !GT; 0 -> T)
+    "bt.s 1f\n\t" // yes, a is smaller
+    " fmov %[floatb], %[float_out]\n\t" // so return b
+    "fmov %[floata], %[float_out]\n" // no, either a is bigger or they're equal and it doesn't matter
+  "1:\n"
+    : [float_out] "=&f" (output_float) // outputs
+    : [floata] "f" (a), [floatb] "f" (b) // inputs
+    : "t" // clobbers
+  );
+
+  return output_float;
+}
+
+// Fast floorf() - return the nearest integer <= x as a float
+// This doesn't check for NaN
+static inline __attribute__((always_inline)) float MATH_Fast_Floorf(float x)
+{
+  float output_float;
+
+  // To hold -1.0f
+  float minus_one;
+
+  asm volatile (
+    "fldi1 %[minus_1]\n\t"
+    "fneg %[minus_1]\n\t"
+    "fcmp/gt %[minus_1], %[floatx]\n\t" // x >= 0
+    "ftrc %[floatx], fpul\n\t" // convert float to int
+    "bt.s 1f\n\t"
+    " float fpul, %[float_out]\n\t" // convert int to float
+    "fadd %[minus_1], %[float_out]\n" // if input x < 0, subtract 1.0
+  "1:\n"
+    : [minus_1] "=&f" (minus_one), [float_out] "=f" (output_float)
+    : [floatx] "f" (x)
+    : "fpul", "t"
+  );
+
+  return output_float;
+}
+
+// Fast ceilf() - return the nearest integer >= x as a float
+// This doesn't check for NaN
+static inline __attribute__((always_inline)) float MATH_Fast_Ceilf(float x)
+{
+  float output_float;
+
+  // To hold 0.0f and 1.0f
+  float zero_one;
+
+  asm volatile (
+    "fldi0 %[zero_1]\n\t"
+    "fcmp/gt %[zero_1], %[floatx]\n\t" // x > 0
+    "ftrc %[floatx], fpul\n\t" // convert float to int
+    "bf.s 1f\n\t"
+    " float fpul, %[float_out]\n\t" // convert int to float
+    "fldi1 %[zero_1]\n\t"
+    "fadd %[zero_1], %[float_out]\n" // if input x > 0, add 1.0
+  "1:\n"
+    : [zero_1] "=&f" (zero_one), [float_out] "=f" (output_float)
+    : [floatx] "f" (x)
+    : "fpul", "t"
+  );
+
+  return output_float;
+}
+
+// Very fast floorf() - return the nearest integer <= x as a float
+// Inspired by a cool trick I came across here:
+// https://www.codeproject.com/Tips/700780/Fast-floor-ceiling-functions
+// This doesn't check for NaN
+static inline __attribute__((always_inline)) float MATH_Very_Fast_Floorf(float x)
+{
+  float output_float;
+  unsigned int scratch_reg;
+  unsigned int scratch_reg2;
+
+  // 0x4f000000 == 2^31 in float -- 0x4f << 24 is INT_MAX + 1.0f
+  // 0x80000000 == -2^31 == INT_MIN == -(INT_MAX + 1.0f)
+
+  // floor = (float)( (int)(x + (float)2^31) - 2^31)
+
+  asm volatile (
+    "mov #0x4f, %[scratch]\n\t" // Build float INT_MAX + 1 as a float using only regs (EX)
+    "shll16 %[scratch]\n\t" // (EX)
+    "shll8 %[scratch]\n\t" // (EX)
+    "lds %[scratch], fpul\n\t" // move float INT_MAX + 1 to float regs (LS)
+    "mov #1, %[scratch2]\n\t" // Build INT_MIN from scratch in parallel (EX)
+    "fsts fpul, %[float_out]\n\t" // (LS)
+    "fadd %[floatx], %[float_out]\n\t" // float-add float INT_MAX + 1 to x (FE)
+    "rotr %[scratch2]\n\t" // rotate the 1 in bit 0 from LSB to MSB for INT_MIN, clobber T (EX)
+    "ftrc %[float_out], fpul\n\t" // convert float to int (FE) -- ftrc -> sts is special combo
+    "sts fpul, %[scratch]\n\t" // move back to int regs (LS)
+    "add %[scratch2], %[scratch]\n\t" // Add INT_MIN to int (EX)
+    "lds %[scratch], fpul\n\t" // (LS) -- lds -> float is a special combo
+    "float fpul, %[float_out]\n" // convert back to float (FE)
+    : [scratch] "=&r" (scratch_reg), [scratch2] "=&r" (scratch_reg2), [float_out] "=&f" (output_float)
+    : [floatx] "f" (x)
+    : "fpul", "t"
+  );
+
+  return output_float;
+}
+
+// Very fast ceilf() - return the nearest integer >= x as a float
+// Inspired by a cool trick I came across here:
+// https://www.codeproject.com/Tips/700780/Fast-floor-ceiling-functions
+// This doesn't check for NaN
+static inline __attribute__((always_inline)) float MATH_Very_Fast_Ceilf(float x)
+{
+  float output_float;
+  unsigned int scratch_reg;
+  unsigned int scratch_reg2;
+
+  // 0x4f000000 == 2^31 in float -- 0x4f << 24 is INT_MAX + 1.0f
+  // 0x80000000 == -2^31 == INT_MIN == -(INT_MAX + 1.0f)
+
+  // Ceiling is the inverse of floor such that f^-1(x) = -f(-x)
+  // To make very fast ceiling have as wide a range as very fast floor,
+  // use this property to subtract x from INT_MAX + 1 and get the negative of the
+  // ceiling, and then negate the final output. This allows ceiling to use
+  // -2^31 and have the same range as very fast floor.
+
+  // Given:
+  // floor = (float)( (int)(x + (float)2^31) - 2^31 )
+  // We can do:
+  // ceiling = -( (float)( (int)((float)2^31 - x) - 2^31 ) )
+  // or (slower on SH4 since 'fneg' is faster than 'neg'):
+  // ceiling = (float) -( (int)((float)2^31 - x) - 2^31 )
+  // Since mathematically these functions are related by f^-1(x) = -f(-x).
+
+  asm volatile (
+    "mov #0x4f, %[scratch]\n\t" // Build float INT_MAX + 1 as a float using only regs (EX)
+    "shll16 %[scratch]\n\t" // (EX)
+    "shll8 %[scratch]\n\t" // (EX)
+    "lds %[scratch], fpul\n\t" // move float INT_MAX + 1 to float regs (LS)
+    "mov #1, %[scratch2]\n\t" // Build INT_MIN from scratch in parallel (EX)
+    "fsts fpul, %[float_out]\n\t" // (LS)
+    "fsub %[floatx], %[float_out]\n\t" // float-sub x from float INT_MAX + 1 (FE)
+    "rotr %[scratch2]\n\t" // rotate the 1 in bit 0 from LSB to MSB for INT_MIN, clobber T (EX)
+    "ftrc %[float_out], fpul\n\t" // convert float to int (FE) -- ftrc -> sts is special combo
+    "sts fpul, %[scratch]\n\t" // move back to int regs (LS)
+    "add %[scratch2], %[scratch]\n\t" // Add INT_MIN to int (EX)
+    "lds %[scratch], fpul\n\t" // (LS) -- lds -> float is a special combo
+    "float fpul, %[float_out]\n\t" // convert back to float (FE)
+    "fneg %[float_out]\n"
+    : [scratch] "=&r" (scratch_reg), [scratch2] "=&r" (scratch_reg2), [float_out] "=&f" (output_float)
+    : [floatx] "f" (x)
+    : "fpul", "t"
+  );
+
+  return output_float;
+}
+
+//==============================================================================
+// Fun with fsrra, which does 1/sqrt(x) in one cycle
+//==============================================================================
+//
+// Error of 'fsrra' is +/- 2^-21 per http://www.shared-ptr.com/sh_insns.html
+//
+// The following functions are available.
+// Please see their definitions for other usage info, otherwise they may not
+// work for you.
+//
+/*
+  // 1/sqrt(x)
+  float MATH_fsrra(float x)
+
+  // 1/x
+  float MATH_Fast_Invert(float x)
+
+  // A faster divide than the 'fdiv' instruction
+  float MATH_Fast_Divide(float numerator, float denominator)
+
+  // A faster square root then the 'fsqrt' instruction
+  float MATH_Fast_Sqrt(float x)
+
+  // Standard, accurate, and slow float divide. Use this if MATH_Fast_Divide() gives you issues.
+  float MATH_Slow_Divide(float numerator, float denominator)
+*/
+
+// 1/sqrt(x)
+static inline __attribute__((always_inline)) float MATH_fsrra(float x)
+{
+  asm volatile ("fsrra %[one_div_sqrt]\n"
+  : [one_div_sqrt] "+f" (x) // outputs, "+" means r/w
+  : // no inputs
+  : // no clobbers
+  );
+
+  return x;
+}
+
+// 1/x
+// 1.0f / sqrt(x^2)
+static inline __attribute__((always_inline)) float MATH_Fast_Invert(float x)
+{
+  int neg = 0;
+
+  if(x < 0.0f)
+  {
+    neg = 1;
+  }
+
+  x = MATH_fsrra(x*x); // 1.0f / sqrt(x^2)
+
+  if(neg)
+  {
+    return -x;
+  }
+  else
+  {
+    return x;
+  }
+}
+
+// It's faster to do this than to use 'fdiv'.
+// Only fdiv can do doubles, however.
+// Of course, not having to divide at all is generally the best way to go. :P
+static inline __attribute__((always_inline)) float MATH_Fast_Divide(float numerator, float denominator)
+{
+  denominator = MATH_Fast_Invert(denominator);
+  return numerator * denominator;
+}
+
+// fast sqrt(x)
+// Crazy thing: invert(fsrra(x)) is actually about 3x faster than fsqrt.
+static inline __attribute__((always_inline)) float MATH_Fast_Sqrt(float x)
+{
+  return MATH_Fast_Invert(MATH_fsrra(x));
+}
+
+// Standard, accurate, and slow float divide. Use this if MATH_Fast_Divide() gives you issues.
+// This DOES work on negatives.
+static inline __attribute__((always_inline)) float MATH_Slow_Divide(float numerator, float denominator)
+{
+  asm volatile ("fdiv %[div_denom], %[div_numer]\n"
+  : [div_numer] "+f" (numerator) // outputs, "+" means r/w
+  : [div_denom] "f" (denominator) // inputs
+  : // clobbers
+  );
+
+  return numerator;
+}
+
+//==============================================================================
+// Fun with fsca, which does simultaneous sine and cosine in 3 cycles
+//==============================================================================
+//
+// NOTE: GCC 4.7 has a bug that prevents it from working with fsca and complex
+// numbers in m4-single-only mode, so GCC 4 users will get a RETURN_FSCA_STRUCT
+// instead of a _Complex float. This may be much slower in some instances.
+//
+// VERY IMPORTANT USAGE INFORMATION (sine and cosine functions):
+//
+// Due to the nature in which the fsca instruction behaves, you MUST do the
+// following in your code to get sine and cosine from these functions:
+//
+//  _Complex float sine_cosine = [Call the fsca function here]
+//  float sine_value = __real__ sine_cosine;
+//  float cosine_value = __imag__ sine_cosine;
+//  Your output is now in sine_value and cosine_value.
+//
+// This is necessary because fsca outputs both sine and cosine simultaneously
+// and uses a double register to do so. The fsca functions do not actually
+// return a double--they return two floats--and using a complex float here is
+// just a bit of hacking the C language to make GCC do something that's legal in
+// assembly according to the SH4 calling convention (i.e. multiple return values
+// stored in floating point registers FR0-FR3). This is better than using a
+// struct of floats for optimization purposes--this will operate at peak
+// performance even at -O0, whereas a struct will not be fast at low
+// optimization levels due to memory accesses.
+//
+// Technically you may be able to use the complex return values as a complex
+// number if you wanted to, but that's probably not what you're after and they'd
+// be flipped anyways (in mathematical convention, sine is the imaginary part).
+//
+
+// Notes:
+// - From http://www.shared-ptr.com/sh_insns.html:
+//      The input angle is specified as a signed fraction in twos complement.
+//      The result of sin and cos is a single-precision floating-point number.
+//      0x7FFFFFFF to 0x00000001: 360×2^15−360/2^16 to 360/2^16 degrees
+//      0x00000000: 0 degree
+//      0xFFFFFFFF to 0x80000000: −360/2^16 to −360×2^15 degrees
+// - fsca format is 2^16 is 360 degrees, so a value of 1 is actually
+//  1/182.044444444 of a degree or 1/10430.3783505 of a radian
+// - fsca does a %360 automatically for values over 360 degrees
+//
+// Also:
+// In order to make the best use of fsca units, a program must expect them from
+// the outset and not "make them" by dividing radians or degrees to get them,
+// otherwise it's just giving the 'fsca' instruction radians or degrees!
+//
+
+// The following functions are available.
+// Please see their definitions for other usage info, otherwise they may not
+// work for you.
+//
+/*
+  // For integer input in native fsca units (fastest)
+  _Complex float MATH_fsca_Int(unsigned int input_int)
+
+  // For integer input in degrees
+  _Complex float MATH_fsca_Int_Deg(unsigned int input_int)
+
+  // For integer input in radians
+  _Complex float MATH_fsca_Int_Rad(unsigned int input_int)
+
+  // For float input in native fsca units
+  _Complex float MATH_fsca_Float(float input_float)
+
+  // For float input in degrees
+  _Complex float MATH_fsca_Float_Deg(float input_float)
+
+  // For float input in radians
+  _Complex float MATH_fsca_Float_Rad(float input_float)
+*/
+
+//------------------------------------------------------------------------------
+#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
+//------------------------------------------------------------------------------
+//
+// This set of fsca functions is specifically for old versions of GCC.
+// See later for functions for newer versions of GCC.
+//
+
+//
+// Integer input (faster)
+//
+
+// For int input, input_int is in native fsca units (fastest)
+static inline __attribute__((always_inline)) RETURN_FSCA_STRUCT MATH_fsca_Int(unsigned int input_int)
+{
+  register float __sine __asm__("fr0");
+  register float __cosine __asm__("fr1");
+
+  asm volatile ("lds %[input_number], FPUL\n\t" // load int from register (1 cycle)
+    "fsca FPUL, DR0\n" // 3 cycle simultaneous sine/cosine
+    : "=w" (__sine), "=f" (__cosine) // outputs
+    : [input_number] "r" (input_int)  // inputs
+    : "fpul" // clobbers
+  );
+
+  RETURN_FSCA_STRUCT output = {__sine, __cosine};
+  return output;
+}
+
+// For int input, input_int is in degrees
+static inline __attribute__((always_inline)) RETURN_FSCA_STRUCT MATH_fsca_Int_Deg(unsigned int input_int)
+{
+  // normalize whole number input degrees to fsca format
+  input_int = ((1527099483ULL * input_int) >> 23);
+
+  register float __sine __asm__("fr0");
+  register float __cosine __asm__("fr1");
+
+  asm volatile ("lds %[input_number], FPUL\n\t" // load int from register (1 cycle)
+    "fsca FPUL, DR0\n" // 3 cycle simultaneous sine/cosine
+    : "=w" (__sine), "=f" (__cosine) // outputs
+    : [input_number] "r" (input_int)  // inputs
+    : "fpul" // clobbers
+  );
+
+  RETURN_FSCA_STRUCT output = {__sine, __cosine};
+  return output;
+}
+
+// For int input, input_int is in radians
+static inline __attribute__((always_inline)) RETURN_FSCA_STRUCT MATH_fsca_Int_Rad(unsigned int input_int)
+{
+  // normalize whole number input rads to fsca format
+  input_int = ((2734261102ULL * input_int) >> 18);
+
+  register float __sine __asm__("fr0");
+  register float __cosine __asm__("fr1");
+
+  asm volatile ("lds %[input_number], FPUL\n\t" // load int from register (1 cycle)
+    "fsca FPUL, DR0\n" // 3 cycle simultaneous sine/cosine
+    : "=w" (__sine), "=f" (__cosine) // outputs
+    : [input_number] "r" (input_int)  // inputs
+    : "fpul" // clobbers
+  );
+
+  RETURN_FSCA_STRUCT output = {__sine, __cosine};
+  return output;
+}
+
+//
+// Float input (slower)
+//
+
+// For float input, input_float is in native fsca units
+static inline __attribute__((always_inline)) RETURN_FSCA_STRUCT MATH_fsca_Float(float input_float)
+{
+  register float __sine __asm__("fr0");
+  register float __cosine __asm__("fr1");
+
+  asm volatile ("ftrc %[input_number], FPUL\n\t" // convert float to int. takes 3 cycles
+    "fsca FPUL, DR0\n" // 3 cycle simultaneous sine/cosine
+    : "=w" (__sine), "=f" (__cosine) // outputs
+    : [input_number] "f" (input_float)  // inputs
+    : "fpul" // clobbers
+  );
+
+  RETURN_FSCA_STRUCT output = {__sine, __cosine};
+  return output;
+}
+
+// For float input, input_float is in degrees
+static inline __attribute__((always_inline)) RETURN_FSCA_STRUCT MATH_fsca_Float_Deg(float input_float)
+{
+  input_float *= 182.044444444f;
+
+  register float __sine __asm__("fr0");
+  register float __cosine __asm__("fr1");
+
+  asm volatile ("ftrc %[input_number], FPUL\n\t" // convert float to int. takes 3 cycles
+    "fsca FPUL, DR0\n" // 3 cycle simultaneous sine/cosine
+    : "=w" (__sine), "=f" (__cosine) // outputs
+    : [input_number] "f" (input_float)  // inputs
+    : "fpul" // clobbers
+  );
+
+  RETURN_FSCA_STRUCT output = {__sine, __cosine};
+  return output;
+}
+
+// For float input, input_float is in radians
+static inline __attribute__((always_inline)) RETURN_FSCA_STRUCT MATH_fsca_Float_Rad(float input_float)
+{
+  input_float *= 10430.3783505f;
+
+  register float __sine __asm__("fr0");
+  register float __cosine __asm__("fr1");
+
+  asm volatile ("ftrc %[input_number], FPUL\n\t" // convert float to int. takes 3 cycles
+    "fsca FPUL, DR0\n" // 3 cycle simultaneous sine/cosine
+    : "=w" (__sine), "=f" (__cosine) // outputs
+    : [input_number] "f" (input_float)  // inputs
+    : "fpul" // clobbers
+  );
+
+  RETURN_FSCA_STRUCT output = {__sine, __cosine};
+  return output;
+}
+
+//------------------------------------------------------------------------------
+#else
+//------------------------------------------------------------------------------
+//
+// This set of fsca functions is specifically for newer versions of GCC. They
+// work fine under GCC 9.2.0.
+//
+
+//
+// Integer input (faster)
+//
+
+// For int input, input_int is in native fsca units (fastest)
+static inline __attribute__((always_inline)) _Complex float MATH_fsca_Int(unsigned int input_int)
+{
+  _Complex float output;
+
+  asm volatile ("lds %[input_number], FPUL\n\t" // load int from register (1 cycle)
+    "fsca FPUL, %[out]\n" // 3 cycle simultaneous sine/cosine
+    : [out] "=d" (output) // outputs
+    : [input_number] "r" (input_int)  // inputs
+    : "fpul" // clobbers
+  );
+
+  return output;
+}
+
+// For int input, input_int is in degrees
+static inline __attribute__((always_inline)) _Complex float MATH_fsca_Int_Deg(unsigned int input_int)
+{
+  // normalize whole number input degrees to fsca format
+  input_int = ((1527099483ULL * input_int) >> 23);
+
+  _Complex float output;
+
+  asm volatile ("lds %[input_number], FPUL\n\t" // load int from register (1 cycle)
+    "fsca FPUL, %[out]\n" // 3 cycle simultaneous sine/cosine
+    : [out] "=d" (output) // outputs
+    : [input_number] "r" (input_int)  // inputs
+    : "fpul" // clobbers
+  );
+
+  return output;
+}
+
+// For int input, input_int is in radians
+static inline __attribute__((always_inline)) _Complex float MATH_fsca_Int_Rad(unsigned int input_int)
+{
+  // normalize whole number input rads to fsca format
+  input_int = ((2734261102ULL * input_int) >> 18);
+
+  _Complex float output;
+
+  asm volatile ("lds %[input_number], FPUL\n\t" // load int from register (1 cycle)
+    "fsca FPUL, %[out]\n" // 3 cycle simultaneous sine/cosine
+    : [out] "=d" (output) // outputs
+    : [input_number] "r" (input_int)  // inputs
+    : "fpul" // clobbers
+  );
+
+  return output;
+}
+
+//
+// Float input (slower)
+//
+
+// For float input, input_float is in native fsca units
+static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float(float input_float)
+{
+  _Complex float output;
+
+  asm volatile ("ftrc %[input_number], FPUL\n\t" // convert float to int. takes 3 cycles
+    "fsca FPUL, %[out]\n" // 3 cycle simultaneous sine/cosine
+    : [out] "=d" (output) // outputs
+    : [input_number] "f" (input_float)  // inputs
+    : "fpul" // clobbers
+  );
+
+  return output;
+}
+
+// For float input, input_float is in degrees
+static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Deg(float input_float)
+{
+  input_float *= 182.044444444f;
+
+  _Complex float output;
+
+  asm volatile ("ftrc %[input_number], FPUL\n\t" // convert float to int. takes 3 cycles
+    "fsca FPUL, %[out]\n" // 3 cycle simultaneous sine/cosine
+    : [out] "=d" (output) // outputs
+    : [input_number] "f" (input_float)  // inputs
+    : "fpul" // clobbers
+  );
+
+  return output;
+}
+
+// For float input, input_float is in radians
+static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad(float input_float)
+{
+  input_float *= 10430.3783505f;
+
+  _Complex float output;
+
+  asm volatile ("ftrc %[input_number], FPUL\n\t" // convert float to int. takes 3 cycles
+    "fsca FPUL, %[out]\n" // 3 cycle simultaneous sine/cosine
+    : [out] "=d" (output) // outputs
+    : [input_number] "f" (input_float)  // inputs
+    : "fpul" // clobbers
+  );
+
+  return output;
+}
+
+//------------------------------------------------------------------------------
+#endif
+//------------------------------------------------------------------------------
+
+//==============================================================================
+// Hardware vector and matrix operations
+//==============================================================================
+//
+// These functions each have very specific usage instructions. Please be sure to
+// read them before use or else they won't seem to work right!
+//
+// The following functions are available.
+// Please see their definitions for important usage info, otherwise they may not
+// work for you.
+//
+/*
+
+  //------------------------------------------------------------------------------
+  // Vector and matrix math operations
+  //------------------------------------------------------------------------------
+
+  // Inner/dot product (4x1 vec . 4x1 vec = scalar)
+  float MATH_fipr(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
+
+  // Sum of Squares (w^2 + x^2 + y^2 + z^2)
+  float MATH_Sum_of_Squares(float w, float x, float y, float z)
+
+  // Cross product with bonus multiply (vec X vec = orthogonal vec, with an extra a*b=c)
+  RETURN_VECTOR_STRUCT MATH_Cross_Product_with_Mult(float x1, float x2, float x3, float y1, float y2, float y3, float a, float b)
+
+  // Cross product (vec X vec = orthogonal vec)
+  RETURN_VECTOR_STRUCT MATH_Cross_Product(float x1, float x2, float x3, float y1, float y2, float y3)
+
+  // Outer product (vec (X) vec = 4x4 matrix)
+  void MATH_Outer_Product(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
+
+  // Matrix transform (4x4 matrix * 4x1 vec = 4x1 vec)
+  RETURN_VECTOR_STRUCT MATH_Matrix_Transform(float x1, float x2, float x3, float x4)
+
+  // 4x4 Matrix transpose (XMTRX^T)
+  void MATH_Matrix_Transpose(void)
+
+  // 4x4 Matrix product (XMTRX and one from memory)
+  void MATH_Matrix_Product(ALL_FLOATS_STRUCT * front_matrix)
+
+  // 4x4 Matrix product (two from memory)
+  void MATH_Load_Matrix_Product(ALL_FLOATS_STRUCT * matrix1, ALL_FLOATS_STRUCT * matrix2)
+
+  //------------------------------------------------------------------------------
+  // Matrix load and store operations
+  //------------------------------------------------------------------------------
+
+  // Load 4x4 XMTRX from memory
+  void MATH_Load_XMTRX(ALL_FLOATS_STRUCT * back_matrix)
+
+  // Store 4x4 XMTRX to memory
+  ALL_FLOATS_STRUCT * MATH_Store_XMTRX(ALL_FLOATS_STRUCT * destination)
+*/
+
+//------------------------------------------------------------------------------
+// Vector and matrix math operations
+//------------------------------------------------------------------------------
+
+// Inner/dot product: vec . vec = scalar
+//                       _    _
+//                      |  y1  |
+//  [ x1 x2 x3 x4 ]  .  |  y2  | = scalar
+//                      |  y3  |
+//                      |_ y4 _|
+//
+// SH4 calling convention states we get 8 float arguments. Perfect!
+static inline __attribute__((always_inline)) float MATH_fipr(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
+{
+  // FR4-FR11 are the regs that are passed in, aka vectors FV4 and FV8.
+  // Just need to make sure GCC doesn't modify anything, and these register vars do that job.
+
+  // Temporary variables are necessary per GCC to avoid clobbering:
+  // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+  float tx1 = x1;
+  float tx2 = x2;
+  float tx3 = x3;
+  float tx4 = x4;
+
+  float ty1 = y1;
+  float ty2 = y2;
+  float ty3 = y3;
+  float ty4 = y4;
+
+  // vector FV4
+  register float __x1 __asm__("fr4") = tx1;
+  register float __x2 __asm__("fr5") = tx2;
+  register float __x3 __asm__("fr6") = tx3;
+  register float __x4 __asm__("fr7") = tx4;
+
+  // vector FV8
+  register float __y1 __asm__("fr8") = ty1;
+  register float __y2 __asm__("fr9") = ty2;
+  register float __y3 __asm__("fr10") = ty3;
+  register float __y4 __asm__("fr11") = ty4;
+
+  // take care of all the floats in one fell swoop
+  asm volatile ("fipr FV4, FV8\n"
+  : "+f" (__y4) // output (gets written to FR11)
+  : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__x4), "f" (__y1), "f" (__y2), "f" (__y3) // inputs
+  : // clobbers
+  );
+
+  return __y4;
+}
+
+// Sum of Squares
+//                   _   _
+//                  |  w  |
+//  [ w x y z ]  .  |  x  | = w^2 + x^2 + y^2 + z^2 = scalar
+//                  |  y  |
+//                  |_ z _|
+//
+static inline __attribute__((always_inline)) float MATH_Sum_of_Squares(float w, float x, float y, float z)
+{
+  // FR4-FR7 are the regs that are passed in, aka vector FV4.
+  // Just need to make sure GCC doesn't modify anything, and these register vars do that job.
+
+  // Temporary variables are necessary per GCC to avoid clobbering:
+  // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+  float tw = w;
+  float tx = x;
+  float ty = y;
+  float tz = z;
+
+  // vector FV4
+  register float __w __asm__("fr4") = tw;
+  register float __x __asm__("fr5") = tx;
+  register float __y __asm__("fr6") = ty;
+  register float __z __asm__("fr7") = tz;
+
+  // take care of all the floats in one fell swoop
+  asm volatile ("fipr FV4, FV4\n"
+  : "+f" (__z) // output (gets written to FR7)
+  : "f" (__w), "f" (__x), "f" (__y) // inputs
+  : // clobbers
+  );
+
+  return __z;
+}
+
+// Cross product: vec X vec = orthogonal vec
+//   _    _       _    _       _    _
+//  |  x1  |     |  y1  |     |  z1  |
+//  |  x2  |  X  |  y2  |  =  |  z2  |
+//  |_ x3 _|     |_ y3 _|     |_ z3 _|
+//
+// With bonus multiply:
+//
+//      a     *     b      =      c
+//
+// IMPORTANT USAGE INFORMATION (cross product):
+//
+// Return vector struct maps as below to the above diagram:
+//
+//  typedef struct {
+//   float z1;
+//   float z2;
+//   float z3;
+//   float z4; // c is stored in z4, and c = a*b if using 'with mult' version (else c = 0)
+// } RETURN_VECTOR_STRUCT;
+//
+//  For people familiar with the unit vector notation, z1 == 'i', z2 == 'j',
+//  and z3 == 'k'.
+//
+// The cross product matrix will also be stored in XMTRX after this, so calling
+// MATH_Matrix_Transform() on a vector after using this function will do a cross
+// product with the same x1-x3 values and a multiply with the same 'a' value
+// as used in this function. In this a situation, 'a' will be multiplied with
+// the x4 parameter of MATH_Matrix_Transform(). a = 0 if not using the 'with mult'
+// version of the cross product function.
+//
+// For reference, XMTRX will look like this:
+//
+//  [  0 -x3 x2 0 ]
+//  [  x3 0 -x1 0 ]
+//  [ -x2 x1 0  0 ]
+//  [  0  0  0  a ] (<-- a = 0 if not using 'with mult')
+//
+// Similarly to how the sine and cosine functions use fsca and return 2 floats,
+// the cross product functions actually return 4 floats. The first 3 are the
+// cross product output, and the 4th is a*b. The SH4 only multiplies 4x4
+// matrices with 4x1 vectors, which is why the output is like that--but it means
+// we also get a bonus float multiplication while we do our cross product!
+//
+
+// Please do not call this function directly (notice the weird syntax); call
+// MATH_Cross_Product() or MATH_Cross_Product_with_Mult() instead.
+static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT xMATH_do_Cross_Product_with_Mult(float x3, float a, float y3, float b, float x2, float x1, float y1, float y2)
+{
+  // FR4-FR11 are the regs that are passed in, in that order.
+  // Just need to make sure GCC doesn't modify anything, and these register vars do that job.
+
+  // Temporary variables are necessary per GCC to avoid clobbering:
+  // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+  float tx1 = x1;
+  float tx2 = x2;
+  float tx3 = x3;
+  float ta = a;
+
+  float ty1 = y1;
+  float ty2 = y2;
+  float ty3 = y3;
+  float tb = b;
+
+  register float __x1 __asm__("fr9") = tx1; // need to negate (need to move to fr6, then negate fr9)
+  register float __x2 __asm__("fr8") = tx2; // in place for matrix (need to move to fr2 then negate fr2)
+  register float __x3 __asm__("fr4") = tx3; // need to negate (move to fr1 first, then negate fr4)
+  register float __a __asm__("fr5") = ta;
+
+  register float __y1 __asm__("fr10") = ty1;
+  register float __y2 __asm__("fr11") = ty2;
+  register float __y3 __asm__("fr6") = ty3;
+  register float __b __asm__("fr7") = tb;
+
+  register float __z1 __asm__("fr0") = 0.0f; // z1
+  register float __z2 __asm__("fr1") = 0.0f; // z2 (not moving x3 here yet since a double 0 is needed)
+  register float __z3 __asm__("fr2") = tx2; // z3 (this handles putting x2 in fr2)
+  register float __c __asm__("fr3") = 0.0f; // c
+
+  // This actually does a matrix transform to do the cross product.
+  // It's this:
+  //                   _    _       _            _
+  //  [  0 -x3 x2 0 ] |  y1  |     | -x3y2 + x2y3 |
+  //  [  x3 0 -x1 0 ] |  y2  |  =  |  x3y1 - x1y3 |
+  //  [ -x2 x1 0  0 ] |  y3  |     | -x2y1 + x1y2 |
+  //  [  0  0  0  a ] |_ b  _|     |_      c     _|
+  //
+
+  asm volatile (
+    // set up back bank's FV0
+    "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs)
+
+    // Save FR12-FR15, which are supposed to be preserved across functions calls.
+    // This stops them from getting clobbered and saves 4 stack pushes (memory accesses).
+    "fmov DR12, XD12\n\t"
+    "fmov DR14, XD14\n\t"
+
+    "fmov DR10, XD0\n\t" // fmov 'y1' and 'y2' from FR10, FR11 into position (XF0, XF1)
+    "fmov DR6, XD2\n\t" // fmov 'y3' and 'b' from FR6, FR7 into position (XF2, XF3)
+
+    // pair move zeros for some speed in setting up front bank for matrix
+    "fmov DR0, DR10\n\t" // clear FR10, FR11
+    "fmov DR0, DR12\n\t" // clear FR12, FR13
+    "fschg\n\t" // switch back to single moves
+    // prepare front bank for XMTRX
+    "fmov FR5, FR15\n\t" // fmov 'a' into position
+    "fmov FR0, FR14\n\t" // clear out FR14
+    "fmov FR0, FR7\n\t" // clear out FR7
+    "fmov FR0, FR5\n\t" // clear out FR5
+
+    "fneg FR2\n\t" // set up 'x2'
+    "fmov FR9, FR6\n\t" // set up 'x1'
+    "fneg FR9\n\t"
+    "fmov FR4, FR1\n\t" // set up 'x3'
+    "fneg FR4\n\t"
+    // flip banks and matrix multiply
+    "frchg\n\t"
+    "ftrv XMTRX, FV0\n"
+  : "+&w" (__z1), "+&f" (__z2), "+&f" (__z3), "+&f" (__c) // output (using FV0)
+  : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__y1), "f" (__y2), "f" (__y3), "f" (__a), "f" (__b) // inputs
+  : // clobbers (all of the float regs get clobbered, except for FR12-FR15 which were specially preserved)
+  );
+
+  RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __c};
+  return output;
+}
+
+// Please do not call this function directly (notice the weird syntax); call
+// MATH_Cross_Product() or MATH_Cross_Product_with_Mult() instead.
+static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT xMATH_do_Cross_Product(float x3, float zero, float x1, float y3, float x2, float x1_2, float y1, float y2)
+{
+  // FR4-FR11 are the regs that are passed in, in that order.
+  // Just need to make sure GCC doesn't modify anything, and these register vars do that job.
+
+  // Temporary variables are necessary per GCC to avoid clobbering:
+  // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+  float tx1 = x1;
+  float tx2 = x2;
+  float tx3 = x3;
+  float tx1_2 = x1_2;
+
+  float ty1 = y1;
+  float ty2 = y2;
+  float ty3 = y3;
+  float tzero = zero;
+
+  register float __x1 __asm__("fr6") = tx1; // in place
+  register float __x2 __asm__("fr8") = tx2; // in place (fmov to fr2, negate fr2)
+  register float __x3 __asm__("fr4") = tx3; // need to negate (fmov to fr1, negate fr4)
+
+  register float __zero __asm__("fr5") = tzero; // in place
+  register float __x1_2 __asm__("fr9") = tx1_2; // need to negate
+
+  register float __y1 __asm__("fr10") = ty1;
+  register float __y2 __asm__("fr11") = ty2;
+  // no __y3 needed in this function
+
+  register float __z1 __asm__("fr0") = tzero; // z1
+  register float __z2 __asm__("fr1") = tzero; // z2
+  register float __z3 __asm__("fr2") = ty3; // z3
+  register float __c __asm__("fr3") = tzero; // c
+
+  // This actually does a matrix transform to do the cross product.
+  // It's this:
+  //                   _    _       _            _
+  //  [  0 -x3 x2 0 ] |  y1  |     | -x3y2 + x2y3 |
+  //  [  x3 0 -x1 0 ] |  y2  |  =  |  x3y1 - x1y3 |
+  //  [ -x2 x1 0  0 ] |  y3  |     | -x2y1 + x1y2 |
+  //  [  0  0  0  0 ] |_ 0  _|     |_      0     _|
+  //
+
+  asm volatile (
+    // zero out FR7. For some reason, if this is done in C after __z3 is set:
+    // register float __y3 __asm__("fr7") = tzero;
+    // then GCC will emit a spurious stack push (pushing FR12). So just zero it here.
+    "fmov FR5, FR7\n\t"
+    // set up back bank's FV0
+    "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs)
+
+    // Save FR12-FR15, which are supposed to be preserved across functions calls.
+    // This stops them from getting clobbered and saves 4 stack pushes (memory accesses).
+    "fmov DR12, XD12\n\t"
+    "fmov DR14, XD14\n\t"
+
+    "fmov DR10, XD0\n\t" // fmov 'y1' and 'y2' from FR10, FR11 into position (XF0, XF1)
+    "fmov DR2, XD2\n\t" // fmov 'y3' and '0' from FR2, FR3 into position (XF2, XF3)
+
+    // pair move zeros for some speed in setting up front bank for matrix
+    "fmov DR0, DR10\n\t" // clear FR10, FR11
+    "fmov DR0, DR12\n\t" // clear FR12, FR13
+    "fmov DR0, DR14\n\t" // clear FR14, FR15
+    "fschg\n\t" // switch back to single moves
+    // prepare front bank for XMTRX
+    "fneg FR9\n\t" // set up 'x1'
+    "fmov FR8, FR2\n\t" // set up 'x2'
+    "fneg FR2\n\t"
+    "fmov FR4, FR1\n\t" // set up 'x3'
+    "fneg FR4\n\t"
+    // flip banks and matrix multiply
+    "frchg\n\t"
+    "ftrv XMTRX, FV0\n"
+  : "+&w" (__z1), "+&f" (__z2), "+&f" (__z3), "+&f" (__c) // output (using FV0)
+  : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__y1), "f" (__y2), "f" (__zero), "f" (__x1_2) // inputs
+  : "fr7" // clobbers (all of the float regs get clobbered, except for FR12-FR15 which were specially preserved)
+  );
+
+  RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __c};
+  return output;
+}
+
+//------------------------------------------------------------------------------
+// Functions that wrap the xMATH_do_Cross_Product[_with_Mult]() functions to make
+// it easier to organize parameters
+//------------------------------------------------------------------------------
+
+// Cross product with a bonus float multiply (c = a * b)
+static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Cross_Product_with_Mult(float x1, float x2, float x3, float y1, float y2, float y3, float a, float b)
+{
+  return xMATH_do_Cross_Product_with_Mult(x3, a, y3, b, x2, x1, y1, y2);
+}
+
+// Plain cross product; does not use the bonus float multiply (c = 0 and a in the cross product matrix will be 0)
+// This is a tiny bit faster than 'with_mult' (about 2 cycles faster)
+static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Cross_Product(float x1, float x2, float x3, float y1, float y2, float y3)
+{
+  return xMATH_do_Cross_Product(x3, 0.0f, x1, y3, x2, x1, y1, y2);
+}
+
+// Outer product: vec (X) vec = matrix
+//   _    _
+//  |  x1  |
+//  |  x2  |  (X)  [ y1 y2 y3 y4 ] = 4x4 matrix
+//  |  x3  |
+//  |_ x4 _|
+//
+// This returns the floats in the back bank (XF0-15), which are inaccessible
+// outside of using frchg or paired-move fmov. It's meant to set up a matrix for
+// use with other matrix functions. GCC also does not touch the XFn bank.
+// This will also wipe out anything stored in the float registers, as it uses the
+// whole FPU register file (all 32 of the float registers).
+static inline __attribute__((always_inline)) void MATH_Outer_Product(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
+{
+  // FR4-FR11 are the regs that are passed in, in that order.
+  // Just need to make sure GCC doesn't modify anything, and these register vars do that job.
+
+  // Temporary variables are necessary per GCC to avoid clobbering:
+  // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+  float tx1 = x1;
+  float tx2 = x2;
+  float tx3 = x3;
+  float tx4 = x4;
+
+  float ty1 = y1;
+  float ty2 = y2;
+  float ty3 = y3;
+  float ty4 = y4;
+
+  // vector FV4
+  register float __x1 __asm__("fr4") = tx1;
+  register float __x2 __asm__("fr5") = tx2;
+  register float __x3 __asm__("fr6") = tx3;
+  register float __x4 __asm__("fr7") = tx4;
+
+  // vector FV8
+  register float __y1 __asm__("fr8") = ty1;
+  register float __y2 __asm__("fr9") = ty2;
+  register float __y3 __asm__("fr10") = ty3; // in place already
+  register float __y4 __asm__("fr11") = ty4;
+
+  // This actually does a 4x4 matrix multiply to do the outer product.
+  // It's this:
+  //
+  //  [ x1 x1 x1 x1 ] [ y1 0 0 0 ]     [ x1y1 x1y2 x1y3 x1y4 ]
+  //  [ x2 x2 x2 x2 ] [ 0 y2 0 0 ]  =  [ x2y1 x2y2 x2y3 x2y4 ]
+  //  [ x3 x3 x3 x3 ] [ 0 0 y3 0 ]     [ x3y1 x3y2 x3y3 x3y4 ]
+  //  [ x4 x4 x4 x4 ] [ 0 0 0 y4 ]     [ x4y1 x4y2 x4y3 x4y4 ]
+  //
+
+  asm volatile (
+    // zero out unoccupied front floats to make a double 0 in DR2
+    "fldi0 FR2\n\t"
+    "fmov FR2, FR3\n\t"
+    "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs)
+    // fmov 'x1' and 'x2' from FR4, FR5 into position (XF0,4,8,12, XF1,5,9,13)
+    "fmov DR4, XD0\n\t"
+    "fmov DR4, XD4\n\t"
+    "fmov DR4, XD8\n\t"
+    "fmov DR4, XD12\n\t"
+    // fmov 'x3' and 'x4' from FR6, FR7 into position (XF2,6,10,14, XF3,7,11,15)
+    "fmov DR6, XD2\n\t"
+    "fmov DR6, XD6\n\t"
+    "fmov DR6, XD10\n\t"
+    "fmov DR6, XD14\n\t"
+    // set up front floats (y1-y4)
+    "fmov DR8, DR0\n\t"
+    "fmov DR8, DR4\n\t"
+    "fmov DR10, DR14\n\t"
+    // finish zeroing out front floats
+    "fmov DR2, DR6\n\t"
+    "fmov DR2, DR8\n\t"
+    "fmov DR2, DR12\n\t"
+    "fschg\n\t" // switch back to single-move mode
+    // zero out remaining values and matrix multiply 4x4
+    "fmov FR2, FR1\n\t"
+    "ftrv XMTRX, FV0\n\t"
+
+    "fmov FR6, FR4\n\t"
+    "ftrv XMTRX, FV4\n\t"
+
+    "fmov FR8, FR11\n\t"
+    "ftrv XMTRX, FV8\n\t"
+
+    "fmov FR12, FR14\n\t"
+    "ftrv XMTRX, FV12\n\t"
+    // Save output in XF regs
+    "frchg\n"
+  : // no outputs
+  : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__x4), "f" (__y1), "f" (__y2), "f" (__y3), "f" (__y4) // inputs
+  : "fr0", "fr1", "fr2", "fr3", "fr12", "fr13", "fr14", "fr15" // clobbers, can't avoid it
+  );
+  // GCC will restore FR12-FR15 from the stack after this, so we really can't keep the output in the front bank.
+}
+
+// Matrix transform: matrix * vector = vector
+//                   _    _       _    _
+//  [ ----------- ] |  x1  |     |  z1  |
+//  [ ---XMTRX--- ] |  x2  |  =  |  z2  |
+//  [ ----------- ] |  x3  |     |  z3  |
+//  [ ----------- ] |_ x4 _|     |_ z4 _|
+//
+// IMPORTANT USAGE INFORMATION (matrix transform):
+//
+// Return vector struct maps 1:1 to the above diagram:
+//
+//  typedef struct {
+//   float z1;
+//   float z2;
+//   float z3;
+//   float z4;
+// } RETURN_VECTOR_STRUCT;
+//
+// Similarly to how the sine and cosine functions use fsca and return 2 floats,
+// the matrix transform function actually returns 4 floats. The SH4 only multiplies
+// 4x4 matrices with 4x1 vectors, which is why the output is like that.
+//
+// Multiply a matrix stored in the back bank (XMTRX) with an input vector
+static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Matrix_Transform(float x1, float x2, float x3, float x4)
+{
+  // The floats comprising FV4 are the regs that are passed in.
+  // Just need to make sure GCC doesn't modify anything, and these register vars do that job.
+
+  // Temporary variables are necessary per GCC to avoid clobbering:
+  // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+  float tx1 = x1;
+  float tx2 = x2;
+  float tx3 = x3;
+  float tx4 = x4;
+
+  // output vector FV0
+  register float __z1 __asm__("fr0") = tx1;
+  register float __z2 __asm__("fr1") = tx2;
+  register float __z3 __asm__("fr2") = tx3;
+  register float __z4 __asm__("fr3") = tx4;
+
+  asm volatile ("ftrv XMTRX, FV0\n\t"
+    // have to do this to obey SH4 calling convention--output returned in FV0
+    : "+w" (__z1), "+f" (__z2), "+f" (__z3), "+f" (__z4) // outputs, "+" means r/w
+    : // no inputs
+    : // no clobbers
+  );
+
+  RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __z4};
+  return output;
+}
+
+// Matrix Transpose
+//
+// This does a matrix transpose on the matrix in XMTRX, which swaps rows with
+// columns as follows (math notation is [XMTRX]^T):
+//
+//  [ a b c d ] T   [ a e i m ]
+//  [ e f g h ]  =  [ b f j n ]
+//  [ i j k l ]     [ c g k o ]
+//  [ m n o p ]     [ d h l p ]
+//
+// PLEASE NOTE: It is faster to avoid the need for a transpose altogether by
+// structuring matrices and vectors accordingly.
+static inline __attribute__((always_inline)) void MATH_Matrix_Transpose(void)
+{
+  asm volatile (
+    "frchg\n\t" // fmov for singles only works on front bank
+    // FR0, FR5, FR10, and FR15 are already in place
+    // swap FR1 and FR4
+    "flds FR1, FPUL\n\t"
+    "fmov FR4, FR1\n\t"
+    "fsts FPUL, FR4\n\t"
+    // swap FR2 and FR8
+    "flds FR2, FPUL\n\t"
+    "fmov FR8, FR2\n\t"
+    "fsts FPUL, FR8\n\t"
+    // swap FR3 and FR12
+    "flds FR3, FPUL\n\t"
+    "fmov FR12, FR3\n\t"
+    "fsts FPUL, FR12\n\t"
+    // swap FR6 and FR9
+    "flds FR6, FPUL\n\t"
+    "fmov FR9, FR6\n\t"
+    "fsts FPUL, FR9\n\t"
+    // swap FR7 and FR13
+    "flds FR7, FPUL\n\t"
+    "fmov FR13, FR7\n\t"
+    "fsts FPUL, FR13\n\t"
+    // swap FR11 and FR14
+    "flds FR11, FPUL\n\t"
+    "fmov FR14, FR11\n\t"
+    "fsts FPUL, FR14\n\t"
+    // restore XMTRX to back bank
+    "frchg\n"
+    : // no outputs
+    : // no inputs
+    : "fpul" // clobbers
+  );
+}
+
+// Matrix product: matrix * matrix = matrix
+//
+// These use the whole dang floating point unit.
+//
+//  [ ----------- ] [ ----------- ]     [ ----------- ]
+//  [ ---Back---- ] [ ---Front--- ]  =  [ ---XMTRX--- ]
+//  [ ---Matrix-- ] [ ---Matrix-- ]     [ ----------- ]
+//  [ --(XMTRX)-- ] [ ----------- ]     [ ----------- ]
+//
+// Multiply a matrix stored in the back bank with a matrix loaded from memory
+// Output is stored in the back bank (XMTRX)
+static inline __attribute__((always_inline)) void MATH_Matrix_Product(ALL_FLOATS_STRUCT * front_matrix)
+{
+  /*
+    // This prefetching should help a bit if placed suitably far enough in advance (not here)
+    // Possibly right before this function call. Change the "front_matrix" variable appropriately.
+    // SH4 does not support r/w or temporal prefetch hints, so we only need to pass in an address.
+    __builtin_prefetch(front_matrix);
+  */
+
+  unsigned int prefetch_scratch;
+
+  asm volatile (
+    "mov %[fmtrx], %[pref_scratch]\n\t" // parallel-load address for prefetching (MT)
+    "add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
+    "fschg\n\t" // switch fmov to paired moves (FE)
+    "pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
+    // interleave loads and matrix multiply 4x4
+    "fmov.d @%[fmtrx]+, DR0\n\t" // (LS)
+    "fmov.d @%[fmtrx]+, DR2\n\t"
+    "fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
+    "ftrv XMTRX, FV0\n\t" // (FE)
+
+    "fmov.d @%[fmtrx]+, DR6\n\t"
+    "fmov.d @%[fmtrx]+, DR8\n\t" // prefetch should work for here
+    "ftrv XMTRX, FV4\n\t"
+
+    "fmov.d @%[fmtrx]+, DR10\n\t"
+    "fmov.d @%[fmtrx]+, DR12\n\t"
+    "ftrv XMTRX, FV8\n\t"
+
+    "fmov.d @%[fmtrx], DR14\n\t" // (LS, but this will stall 'ftrv' for 3 cycles)
+    "fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
+    "ftrv XMTRX, FV12\n\t" // (FE)
+    // Save output in XF regs
+    "frchg\n"
+    : [fmtrx] "+r" ((unsigned int)front_matrix), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w
+    : // no inputs
+    : "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
+  );
+}
+
+// Load two 4x4 matrices and multiply them, storing the output into the back bank (XMTRX)
+//
+// MATH_Load_Matrix_Product() is slightly faster than doing this:
+//    MATH_Load_XMTRX(matrix1)
+//    MATH_Matrix_Product(matrix2)
+// as it saves having to do 2 extraneous 'fschg' instructions.
+//
+static inline __attribute__((always_inline)) void MATH_Load_Matrix_Product(ALL_FLOATS_STRUCT * matrix1, ALL_FLOATS_STRUCT * matrix2)
+{
+  /*
+    // This prefetching should help a bit if placed suitably far enough in advance (not here)
+    // Possibly right before this function call. Change the "matrix1" variable appropriately.
+    // SH4 does not support r/w or temporal prefetch hints, so we only need to pass in an address.
+    __builtin_prefetch(matrix1);
+  */
+
+  unsigned int prefetch_scratch;
+
+  asm volatile (
+    "mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
+    "add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
+    "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
+    "pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
+    // back matrix
+    "fmov.d @%[bmtrx]+, XD0\n\t" // (LS)
+    "fmov.d @%[bmtrx]+, XD2\n\t"
+    "fmov.d @%[bmtrx]+, XD4\n\t"
+    "fmov.d @%[bmtrx]+, XD6\n\t"
+    "pref @%[fmtrx]\n\t" // prefetch fmtrx now while we wait (LS)
+    "fmov.d @%[bmtrx]+, XD8\n\t" // bmtrx prefetch should work for here
+    "fmov.d @%[bmtrx]+, XD10\n\t"
+    "fmov.d @%[bmtrx]+, XD12\n\t"
+    "mov %[fmtrx], %[pref_scratch]\n\t" // (MT)
+    "add #32, %[pref_scratch]\n\t" // store offset by 32 in r0 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
+    "fmov.d @%[bmtrx], XD14\n\t"
+    "pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
+    // front matrix
+    // interleave loads and matrix multiply 4x4
+    "fmov.d @%[fmtrx]+, DR0\n\t"
+    "fmov.d @%[fmtrx]+, DR2\n\t"
+    "fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
+    "ftrv XMTRX, FV0\n\t" // (FE)
+
+    "fmov.d @%[fmtrx]+, DR6\n\t"
+    "fmov.d @%[fmtrx]+, DR8\n\t"
+    "ftrv XMTRX, FV4\n\t"
+
+    "fmov.d @%[fmtrx]+, DR10\n\t"
+    "fmov.d @%[fmtrx]+, DR12\n\t"
+    "ftrv XMTRX, FV8\n\t"
+
+    "fmov.d @%[fmtrx], DR14\n\t" // (LS, but this will stall 'ftrv' for 3 cycles)
+    "fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
+    "ftrv XMTRX, FV12\n\t" // (FE)
+    // Save output in XF regs
+    "frchg\n"
+    : [bmtrx] "+&r" ((unsigned int)matrix1), [fmtrx] "+r" ((unsigned int)matrix2), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
+    : // no inputs
+    : "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
+  );
+}
+
+//------------------------------------------------------------------------------
+// Matrix load and store operations
+//------------------------------------------------------------------------------
+
+// Load a matrix from memory into the back bank (XMTRX)
+static inline __attribute__((always_inline)) void MATH_Load_XMTRX(ALL_FLOATS_STRUCT * back_matrix)
+{
+  /*
+    // This prefetching should help a bit if placed suitably far enough in advance (not here)
+    // Possibly right before this function call. Change the "back_matrix" variable appropriately.
+    // SH4 does not support r/w or temporal prefetch hints, so we only need to pass in an address.
+    __builtin_prefetch(back_matrix);
+  */
+
+  unsigned int prefetch_scratch;
+
+  asm volatile (
+    "mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
+    "add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
+    "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
+    "pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
+    "fmov.d @%[bmtrx]+, XD0\n\t"
+    "fmov.d @%[bmtrx]+, XD2\n\t"
+    "fmov.d @%[bmtrx]+, XD4\n\t"
+    "fmov.d @%[bmtrx]+, XD6\n\t"
+    "fmov.d @%[bmtrx]+, XD8\n\t"
+    "fmov.d @%[bmtrx]+, XD10\n\t"
+    "fmov.d @%[bmtrx]+, XD12\n\t"
+    "fmov.d @%[bmtrx], XD14\n\t"
+    "fschg\n" // switch back to single moves
+    : [bmtrx] "+r" ((unsigned int)back_matrix), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w
+    : // no inputs
+    : // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
+  );
+}
+
+// Store XMTRX to memory
+static inline __attribute__((always_inline)) ALL_FLOATS_STRUCT * MATH_Store_XMTRX(ALL_FLOATS_STRUCT * destination)
+{
+  /*
+    // This prefetching should help a bit if placed suitably far enough in advance (not here)
+    // Possibly right before this function call. Change the "destination" variable appropriately.
+    // SH4 does not support r/w or temporal prefetch hints, so we only need to pass in an address.
+    __builtin_prefetch( (ALL_FLOATS_STRUCT*)((unsigned char*)destination + 32) ); // Store works backwards, so note the '+32' here
+  */
+
+  char * output = ((char*)destination) + sizeof(ALL_FLOATS_STRUCT) + 8; // ALL_FLOATS_STRUCT should be 64 bytes
+
+  asm volatile (
+    "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
+    "pref @%[dest_base]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
+    "fmov.d XD0, @-%[out_mtrx]\n\t" // These do *(--output) = XDn (LS)
+    "fmov.d XD2, @-%[out_mtrx]\n\t"
+    "fmov.d XD4, @-%[out_mtrx]\n\t"
+    "fmov.d XD6, @-%[out_mtrx]\n\t"
+    "fmov.d XD8, @-%[out_mtrx]\n\t"
+    "fmov.d XD10, @-%[out_mtrx]\n\t"
+    "fmov.d XD12, @-%[out_mtrx]\n\t"
+    "fmov.d XD14, @-%[out_mtrx]\n\t"
+    "fschg\n" // switch back to single moves
+    : [out_mtrx] "+&r" ((unsigned int)output) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
+    : [dest_base] "r" ((unsigned int)destination) // inputs
+    : "memory" // clobbers
+  );
+
+  return destination;
+}
+
+
+// In general, writing the entire required math routine in one asm function is
+// the best way to go for performance reasons anyways, and in that situation one
+// can just throw calling convention to the wind until returning back to C.
+
+//==============================================================================
+// Miscellaneous Functions
+//==============================================================================
+//
+// The following functions are provided as examples of ways in which these math
+// functions can be used.
+//
+// Reminder: 1 fsca unit = 1/182.044444444 of a degree or 1/10430.3783505 of a radian
+// In order to make the best use of fsca units, a program must expect them from
+// the outset and not "make them" by dividing radians or degrees to get them,
+// otherwise it's just giving the 'fsca' instruction radians or degrees!
+//
+/*
+
+  //------------------------------------------------------------------------------
+  // Commonly useful functions
+  //------------------------------------------------------------------------------
+
+  // Returns 1 if point 't' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not
+  int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty)
+
+  //------------------------------------------------------------------------------
+  // Interpolation
+  //------------------------------------------------------------------------------
+
+  // Linear interpolation
+  float MATH_Lerp(float a, float b, float t)
+
+  // Speherical interpolation ('theta' in fsca units)
+  float MATH_Slerp(float a, float b, float t, float theta)
+
+  //------------------------------------------------------------------------------
+  // Fast Sinc functions (unnormalized, sin(x)/x version)
+  //------------------------------------------------------------------------------
+  // Just pass in MATH_pi * x for normalized versions :)
+
+  // Sinc function (fsca units)
+  float MATH_Fast_Sincf(float x)
+
+  // Sinc function (degrees)
+  float MATH_Fast_Sincf_Deg(float x)
+
+  // Sinc function (rads)
+  float MATH_Fast_Sincf_Rad(float x)
+
+*/
+
+//------------------------------------------------------------------------------
+// Commonly useful functions
+//------------------------------------------------------------------------------
+
+// Returns 1 if point 'pt' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not
+// Determines triangle center using barycentric coordinate transformation
+// Adapted from: https://stackoverflow.com/questions/2049582/how-to-determine-if-a-point-is-in-a-2d-triangle
+// Specifically the answer by user 'adreasdr' in addition to the comment by user 'urraka' on the answer from user 'Andreas Brinck'
+//
+// The notation here assumes v0x is the x-component of v0, v0y is the y-component of v0, etc.
+//
+static inline __attribute__((always_inline)) int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty)
+{
+  float sdot = MATH_fipr(v0y, -v0x, v2y - v0y, v0x - v2x, v2x, v2y, ptx, pty);
+  float tdot = MATH_fipr(v0x, -v0y, v0y - v1y, v1x - v0x, v1y, v1x, ptx, pty);
+
+  float areadot = MATH_fipr(-v1y, v0y, v0x, v1x, v2x, -v1x + v2x, v1y - v2y, v2y);
+
+  // 'areadot' could be negative depending on the winding of the triangle
+  if(areadot < 0.0f)
+  {
+    sdot *= -1.0f;
+    tdot *= -1.0f;
+    areadot *= -1.0f;
+  }
+
+  if( (sdot > 0.0f) && (tdot > 0.0f) && (areadot > (sdot + tdot)) )
+  {
+    return 1;
+  }
+
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Interpolation
+//------------------------------------------------------------------------------
+
+// Linear interpolation
+static inline __attribute__((always_inline)) float MATH_Lerp(float a, float b, float t)
+{
+  return MATH_fmac(t, (b-a), a);
+}
+
+// Speherical interpolation ('theta' in fsca units)
+static inline __attribute__((always_inline)) float MATH_Slerp(float a, float b, float t, float theta)
+{
+  // a is an element of v0, b is an element of v1
+  // v = ( v0 * sin(theta - t * theta) + v1 * sin(t * theta) ) / sin(theta)
+  // by using sine/cosine identities and properties, this can be optimized to:
+  // v = v0 * cos(-t * theta) + ( v0 * ( cos(theta) * sin(-t * theta) ) - sin(-t * theta) * v1 ) / sin(theta)
+  // which only requires two calls to fsca.
+  // Specifically, sin(a + b) = sin(a)cos(b) + cos(a)sin(b) & sin(-a) = -sin(a)
+
+  // MATH_fsca_* functions return reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part)
+  // This could be made even faster by using MATH_fsca_Int() with 'theta' and 't' as unsigned ints
+
+#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
+
+  RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(theta);
+  float sine_value_theta = sine_cosine.sine;
+  float cosine_value_theta = sine_cosine.cosine;
+
+  RETURN_FSCA_STRUCT sine_cosine2 = MATH_fsca_Float(-t * theta);
+  float sine_value_minus_t_theta = sine_cosine2.sine;
+  float cosine_value_minus_t_theta = sine_cosine2.cosine;
+
+#else
+
+  _Complex float sine_cosine = MATH_fsca_Float(theta);
+  float sine_value_theta = __real__ sine_cosine;
+  float cosine_value_theta = __imag__ sine_cosine;
+
+  _Complex float sine_cosine2 = MATH_fsca_Float(-t * theta);
+  float sine_value_minus_t_theta = __real__ sine_cosine2;
+  float cosine_value_minus_t_theta = __imag__ sine_cosine2;
+
+#endif
+
+  float numer = a * cosine_value_theta * sine_value_minus_t_theta - sine_value_minus_t_theta * b;
+  float output_float = a * cosine_value_minus_t_theta + MATH_Fast_Divide(numer, sine_value_theta);
+
+  return output_float;
+}
+
+//------------------------------------------------------------------------------
+// Fast Sinc (unnormalized, sin(x)/x version)
+//------------------------------------------------------------------------------
+//
+// Just pass in MATH_pi * x for normalized versions :)
+//
+
+// Sinc function (fsca units)
+static inline __attribute__((always_inline)) float MATH_Fast_Sincf(float x)
+{
+  if(x == 0.0f)
+  {
+    return 1.0f;
+  }
+
+#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
+
+  RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x);
+  float sine_value = sine_cosine.sine;
+
+#else
+
+  _Complex float sine_cosine = MATH_fsca_Float(x);
+  float sine_value = __real__ sine_cosine;
+
+#endif
+
+  return MATH_Fast_Divide(sine_value, x);
+}
+
+// Sinc function (degrees)
+static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Deg(float x)
+{
+  if(x == 0.0f)
+  {
+    return 1.0f;
+  }
+
+#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
+
+  RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Deg(x);
+  float sine_value = sine_cosine.sine;
+
+#else
+
+  _Complex float sine_cosine = MATH_fsca_Float_Deg(x);
+  float sine_value = __real__ sine_cosine;
+
+#endif
+
+  return MATH_Fast_Divide(sine_value, x);
+}
+
+// Sinc function (rads)
+static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Rad(float x)
+{
+  if(x == 0.0f)
+  {
+    return 1.0f;
+  }
+
+#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
+
+  RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x);
+  float sine_value = sine_cosine.sine;
+
+#else
+
+  _Complex float sine_cosine = MATH_fsca_Float_Rad(x);
+  float sine_value = __real__ sine_cosine;
+
+#endif
+
+  return MATH_Fast_Divide(sine_value, x);
+}
+
+//==============================================================================
+// Miscellaneous Snippets
+//==============================================================================
+//
+// The following snippets are best implemented manually in user code (they can't
+// be put into their own functions without incurring performance penalties).
+//
+// They also serve as examples of how one might use the functions in this header.
+//
+/*
+  Normalize a vector (x, y, z) and get its pre-normalized magnitude (length)
+*/
+
+//
+// Normalize a vector (x, y, z) and get its pre-normalized magnitude (length)
+//
+// magnitude = sqrt(x^2 + y^2 + z^2)
+// (x, y, z) = 1/magnitude * (x, y, z)
+//
+// x, y, z, and magnitude are assumed already existing floats
+//
+
+/* -- start --
+
+  // Don't need an 'else' with this (if length is 0, x = y = z = 0)
+  magnitude = 0;
+
+  if(__builtin_expect(x || y || z, 1))
+  {
+    temp = MATH_Sum_of_Squares(x, y, z, 0); // temp = x^2 + y^2 + z^2 + 0^2
+    float normalizer = MATH_fsrra(temp); // 1/sqrt(temp)
+    x = normalizer * x;
+    y = normalizer * y;
+    z = normalizer * z;
+    magnitude = MATH_Fast_Invert(normalizer);
+  }
+
+-- end -- */
+
+
+#endif /* __SH4_MATH_H_ */
+
diff --git a/third_party/gldc/src/state.c b/third_party/gldc/src/state.c
new file mode 100644
index 0000000..a9b2a72
--- /dev/null
+++ b/third_party/gldc/src/state.c
@@ -0,0 +1,236 @@
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "private.h"
+
+GLboolean STATE_DIRTY = GL_TRUE;
+
+GLboolean DEPTH_TEST_ENABLED = GL_FALSE;
+GLboolean DEPTH_MASK_ENABLED = GL_FALSE;
+
+GLboolean CULLING_ENABLED = GL_FALSE;
+
+GLboolean FOG_ENABLED        = GL_FALSE;
+GLboolean ALPHA_TEST_ENABLED = GL_FALSE;
+
+GLboolean SCISSOR_TEST_ENABLED = GL_FALSE;
+GLenum SHADE_MODEL = PVR_SHADE_GOURAUD;
+
+GLboolean BLEND_ENABLED = GL_FALSE;
+
+GLboolean TEXTURES_ENABLED = GL_FALSE;
+GLboolean AUTOSORT_ENABLED = GL_FALSE;
+
+static struct {
+    int x;
+    int y;
+    int width;
+    int height;
+    GLboolean applied;
+} scissor_rect = {0, 0, 640, 480, false};
+
+void _glInitContext() {
+    scissor_rect.x = 0;
+    scissor_rect.y = 0;
+    scissor_rect.width  = vid_mode->width;
+    scissor_rect.height = vid_mode->height;
+}
+
+/* Depth Testing */
+void glClearDepth(float depth) {
+    /* We reverse because using invW means that farther Z == lower number */
+    pvr_set_zclip(MIN(1.0f - depth, PVR_MIN_Z));
+}
+
+void glScissor(int x, int y, int width, int height) {
+
+    if(scissor_rect.x == x &&
+        scissor_rect.y == y &&
+        scissor_rect.width == width &&
+        scissor_rect.height == height) {
+        return;
+    }
+
+    scissor_rect.x = x;
+    scissor_rect.y = y;
+    scissor_rect.width = width;
+    scissor_rect.height = height;
+    scissor_rect.applied = false;
+    STATE_DIRTY = GL_TRUE; // FIXME: do we need this?
+
+    _glApplyScissor(false);
+}
+
+/* Setup the hardware user clip rectangle.
+
+   The minimum clip rectangle is a 32x32 area which is dependent on the tile
+   size use by the tile accelerator. The PVR swithes off rendering to tiles
+   outside or inside the defined rectangle dependant upon the 'clipmode'
+   bits in the polygon header.
+
+   Clip rectangles therefore must have a size that is some multiple of 32.
+
+    glScissor(0, 0, 32, 32) allows only the 'tile' in the lower left
+    hand corner of the screen to be modified and glScissor(0, 0, 0, 0)
+    disallows modification to all 'tiles' on the screen.
+
+    We call this in the following situations:
+
+     - glEnable(GL_SCISSOR_TEST) is called
+     - glScissor() is called
+     - After glKosSwapBuffers()
+
+    This ensures that a clip command is added to every vertex list
+    at the right place, either when enabling the scissor test, or
+    when the scissor test changes.
+*/
+void _glApplyScissor(int force) {
+    /* Don't do anyting if clipping is disabled */
+    if(!SCISSOR_TEST_ENABLED) {
+        return;
+    }
+
+    /* Don't apply if we already applied - nothing changed */
+    if(scissor_rect.applied && !force) {
+        return;
+    }
+
+    PVRTileClipCommand c;
+
+    int miny, maxx, maxy;
+
+    int scissor_width  = MAX(MIN(scissor_rect.width,  vid_mode->width),  0);
+    int scissor_height = MAX(MIN(scissor_rect.height, vid_mode->height), 0);
+
+    /* force the origin to the lower left-hand corner of the screen */
+    miny = (vid_mode->height - scissor_height) - scissor_rect.y;
+    maxx = (scissor_width + scissor_rect.x);
+    maxy = (scissor_height + miny);
+
+    /* load command structure while mapping screen coords to TA tiles */
+    c.flags = PVR_CMD_USERCLIP;
+    c.d1 = c.d2 = c.d3 = 0;
+
+    uint16_t vw = vid_mode->width >> 5;
+    uint16_t vh = vid_mode->height >> 5;
+
+    c.sx = CLAMP(scissor_rect.x >> 5, 0, vw);
+    c.sy = CLAMP(miny >> 5, 0, vh);
+    c.ex = CLAMP((maxx >> 5) - 1, 0, vw);
+    c.ey = CLAMP((maxy >> 5) - 1, 0, vh);
+
+    aligned_vector_push_back(&OP_LIST.vector, &c, 1);
+    aligned_vector_push_back(&PT_LIST.vector, &c, 1);
+    aligned_vector_push_back(&TR_LIST.vector, &c, 1);
+
+    scissor_rect.applied = true;
+}
+
+Viewport VIEWPORT;
+
+/* Set the GL viewport */
+void glViewport(int x, int y, int width, int height) {
+    VIEWPORT.hwidth  = width  *  0.5f;
+    VIEWPORT.hheight = height * -0.5f;
+    VIEWPORT.x_plus_hwidth  = x + width  * 0.5f;
+    VIEWPORT.y_plus_hheight = y + height * 0.5f;
+}
+
+
+void apply_poly_header(PolyHeader* dst, PolyList* activePolyList) {
+    const TextureObject *tx1 = TEXTURE_ACTIVE;
+    uint32_t txr_base;
+    TRACE();
+
+    int list_type = activePolyList->list_type;
+    int gen_color_clamp = PVR_CLRCLAMP_DISABLE;
+
+    int gen_culling = CULLING_ENABLED    ? PVR_CULLING_CW : PVR_CULLING_SMALL;
+    int depth_comp  = DEPTH_TEST_ENABLED ? PVR_DEPTHCMP_GEQUAL : PVR_DEPTHCMP_ALWAYS;
+    int depth_write = DEPTH_MASK_ENABLED ? PVR_DEPTHWRITE_ENABLE : PVR_DEPTHWRITE_DISABLE;
+
+    int gen_shading   = SHADE_MODEL;
+    int gen_clip_mode = SCISSOR_TEST_ENABLED       ? PVR_USERCLIP_INSIDE : PVR_USERCLIP_DISABLE;
+    int gen_fog_type  = FOG_ENABLED                ? PVR_FOG_TABLE : PVR_FOG_DISABLE;
+
+    int gen_alpha = (BLEND_ENABLED || ALPHA_TEST_ENABLED) ? PVR_ALPHA_ENABLE : PVR_ALPHA_DISABLE;
+    int blend_src = PVR_BLEND_SRCALPHA;
+    int blend_dst = PVR_BLEND_INVSRCALPHA;
+
+    if (list_type == PVR_LIST_OP_POLY) {
+        /* Opaque polys are always one/zero */
+        blend_src  = PVR_BLEND_ONE;
+        blend_dst  = PVR_BLEND_ZERO;
+    } else if (list_type == PVR_LIST_PT_POLY) {
+        /* Punch-through polys require fixed blending and depth modes */
+        blend_src  = PVR_BLEND_SRCALPHA;
+        blend_dst  = PVR_BLEND_INVSRCALPHA;
+        depth_comp = PVR_DEPTHCMP_LEQUAL;
+    } else if (list_type == PVR_LIST_TR_POLY && AUTOSORT_ENABLED) {
+        /* Autosort mode requires this mode for transparent polys */
+        depth_comp = PVR_DEPTHCMP_GEQUAL;
+    }
+
+    int txr_enable, txr_alpha;
+    if (!TEXTURES_ENABLED || !tx1 || !tx1->data) {
+        /* Disable all texturing to start with */
+        txr_enable = PVR_TEXTURE_DISABLE;
+    } else {
+        txr_alpha  = (BLEND_ENABLED || ALPHA_TEST_ENABLED) ? PVR_TXRALPHA_ENABLE : PVR_TXRALPHA_DISABLE;
+        txr_enable = PVR_TEXTURE_ENABLE;
+    }
+
+    /* The base values for CMD */
+    dst->cmd = PVR_CMD_POLYHDR;
+    dst->cmd |= txr_enable << 3;
+    /* Force bits 18 and 19 on to switch to 6 triangle strips */
+    dst->cmd |= 0xC0000;
+
+    /* Or in the list type, shading type, color and UV formats */
+    dst->cmd |= (list_type             << PVR_TA_CMD_TYPE_SHIFT)     & PVR_TA_CMD_TYPE_MASK;
+    dst->cmd |= (PVR_CLRFMT_ARGBPACKED << PVR_TA_CMD_CLRFMT_SHIFT)   & PVR_TA_CMD_CLRFMT_MASK;
+    dst->cmd |= (gen_shading           << PVR_TA_CMD_SHADE_SHIFT)    & PVR_TA_CMD_SHADE_MASK;
+    dst->cmd |= (PVR_UVFMT_32BIT       << PVR_TA_CMD_UVFMT_SHIFT)    & PVR_TA_CMD_UVFMT_MASK;
+    dst->cmd |= (gen_clip_mode         << PVR_TA_CMD_USERCLIP_SHIFT) & PVR_TA_CMD_USERCLIP_MASK;
+
+    /* Polygon mode 1 */
+    dst->mode1  = (depth_comp  << PVR_TA_PM1_DEPTHCMP_SHIFT)   & PVR_TA_PM1_DEPTHCMP_MASK;
+    dst->mode1 |= (gen_culling << PVR_TA_PM1_CULLING_SHIFT)    & PVR_TA_PM1_CULLING_MASK;
+    dst->mode1 |= (depth_write << PVR_TA_PM1_DEPTHWRITE_SHIFT) & PVR_TA_PM1_DEPTHWRITE_MASK;
+    dst->mode1 |= (txr_enable  << PVR_TA_PM1_TXRENABLE_SHIFT)  & PVR_TA_PM1_TXRENABLE_MASK;
+
+    /* Polygon mode 2 */
+    dst->mode2  = (blend_src       << PVR_TA_PM2_SRCBLEND_SHIFT) & PVR_TA_PM2_SRCBLEND_MASK;
+    dst->mode2 |= (blend_dst       << PVR_TA_PM2_DSTBLEND_SHIFT) & PVR_TA_PM2_DSTBLEND_MASK;
+    dst->mode2 |= (gen_fog_type    << PVR_TA_PM2_FOG_SHIFT)      & PVR_TA_PM2_FOG_MASK;
+    dst->mode2 |= (gen_color_clamp << PVR_TA_PM2_CLAMP_SHIFT)    & PVR_TA_PM2_CLAMP_MASK;
+    dst->mode2 |= (gen_alpha       << PVR_TA_PM2_ALPHA_SHIFT)    & PVR_TA_PM2_ALPHA_MASK;
+
+    if (txr_enable == PVR_TEXTURE_DISABLE) {
+        dst->mode3 = 0;
+    } else {
+        GLuint filter = PVR_FILTER_NEAREST;
+        if (tx1->minFilter == GL_LINEAR && tx1->magFilter == GL_LINEAR) filter = PVR_FILTER_BILINEAR;
+
+        dst->mode2 |= (txr_alpha                << PVR_TA_PM2_TXRALPHA_SHIFT) & PVR_TA_PM2_TXRALPHA_MASK;
+        dst->mode2 |= (filter                   << PVR_TA_PM2_FILTER_SHIFT)   & PVR_TA_PM2_FILTER_MASK;
+        dst->mode2 |= (tx1->mipmap_bias         << PVR_TA_PM2_MIPBIAS_SHIFT)  & PVR_TA_PM2_MIPBIAS_MASK;
+        dst->mode2 |= (PVR_TXRENV_MODULATEALPHA << PVR_TA_PM2_TXRENV_SHIFT)   & PVR_TA_PM2_TXRENV_MASK;
+
+        dst->mode2 |= (DimensionFlag(tx1->width)  << PVR_TA_PM2_USIZE_SHIFT) & PVR_TA_PM2_USIZE_MASK;
+        dst->mode2 |= (DimensionFlag(tx1->height) << PVR_TA_PM2_VSIZE_SHIFT) & PVR_TA_PM2_VSIZE_MASK;
+
+        /* Polygon mode 3 */
+        dst->mode3  = (GL_FALSE   << PVR_TA_PM3_MIPMAP_SHIFT) & PVR_TA_PM3_MIPMAP_MASK;
+        dst->mode3 |= (tx1->color << PVR_TA_PM3_TXRFMT_SHIFT) & PVR_TA_PM3_TXRFMT_MASK;
+
+        /* Convert the texture address */
+        txr_base = (uint32_t)tx1->data;
+        txr_base = (txr_base & 0x00fffff8) >> 3;
+        dst->mode3 |= txr_base;
+    }
+
+    dst->d1 = dst->d2 = 0xffffffff;
+    dst->d3 = dst->d4 = 0xffffffff;
+}
diff --git a/third_party/gldc/src/texture.c b/third_party/gldc/src/texture.c
new file mode 100644
index 0000000..5fc2875
--- /dev/null
+++ b/third_party/gldc/src/texture.c
@@ -0,0 +1,235 @@
+#include "private.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sh4.h"
+#include "yalloc/yalloc.h"
+
+#ifndef NDEBUG
+/* We're debugging, use normal assert */
+#include <assert.h>
+#define gl_assert assert
+#else
+/* Release mode, use our custom assert */
+#include <stdio.h>
+#include <stdlib.h>
+
+#define gl_assert(x) \
+    do {\
+        if(!(x)) {\
+            fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
+            exit(1);\
+        }\
+    } while(0); \
+
+#endif
+
+
+/* We always leave this amount of vram unallocated to prevent
+ * issues with the allocator */
+#define PVR_MEM_BUFFER_SIZE (64 * 1024)
+
+TextureObject* TEXTURE_ACTIVE = NULL;
+static TextureObject TEXTURE_LIST[MAX_TEXTURE_COUNT];
+static unsigned char TEXTURE_USED[MAX_TEXTURE_COUNT / 8];
+
+static int texture_id_map_used(unsigned int id) {
+    unsigned int i = id / 8;
+    unsigned int j = id % 8;
+
+    return TEXTURE_USED[i] & (unsigned char)(1 << j);
+}
+
+static void texture_id_map_reserve(unsigned int id) {
+    unsigned int i = id / 8;
+    unsigned int j = id % 8;
+    TEXTURE_USED[i] |= (unsigned char)(1 << j);
+}
+
+static void texture_id_map_release(unsigned int id) {
+    unsigned int i = id / 8;
+    unsigned int j = id % 8;
+    TEXTURE_USED[i] &= (unsigned char)~(1 << j);
+}
+
+unsigned int texture_id_map_alloc(void) {
+    unsigned int id;
+    
+    // ID 0 is reserved for default texture
+    for(id = 1; id < MAX_TEXTURE_COUNT; ++id) {
+        if(!texture_id_map_used(id)) {
+            texture_id_map_reserve(id);
+            return id;
+        }
+    }
+    return 0;
+}
+
+
+static void* YALLOC_BASE = NULL;
+static size_t YALLOC_SIZE = 0;
+
+static void* yalloc_alloc_and_defrag(size_t size) {
+    void* ret = yalloc_alloc(YALLOC_BASE, size);
+
+    if(!ret) {
+        /* Tried to allocate, but out of room, let's try defragging
+         * and repeating the alloc */
+        fprintf(stderr, "Ran out of memory, defragmenting\n");
+        glDefragmentTextureMemory_KOS();
+        ret = yalloc_alloc(YALLOC_BASE, size);
+    }
+
+    gl_assert(ret && "Out of PVR memory!");
+
+    return ret;
+}
+
+#define GL_KOS_INTERNAL_DEFAULT_MIPMAP_LOD_BIAS 4
+static void _glInitializeTextureObject(TextureObject* txr, unsigned int id) {
+    txr->index  = id;
+    txr->width  = txr->height = 0;
+    txr->mipmap = 0;
+    txr->data   = NULL;
+    txr->minFilter = GL_NEAREST;
+    txr->magFilter = GL_NEAREST;
+    txr->mipmap_bias = GL_KOS_INTERNAL_DEFAULT_MIPMAP_LOD_BIAS;
+}
+
+void _glInitTextures() {
+    memset(TEXTURE_USED, 0, sizeof(TEXTURE_USED));
+
+    // Initialize zero as an actual texture object though because apparently it is!
+    TextureObject* default_tex = &TEXTURE_LIST[0];
+    _glInitializeTextureObject(default_tex, 0);
+    texture_id_map_reserve(0);
+    TEXTURE_ACTIVE = default_tex;
+
+    size_t vram_free = pvr_mem_available();
+    YALLOC_SIZE = vram_free - PVR_MEM_BUFFER_SIZE; /* Take all but 64kb VRAM */
+    YALLOC_BASE = pvr_mem_malloc(YALLOC_SIZE);
+
+#ifdef __DREAMCAST__
+    /* Ensure memory is aligned */
+    gl_assert((uintptr_t) YALLOC_BASE % 32 == 0);
+#endif
+
+    yalloc_init(YALLOC_BASE, YALLOC_SIZE);
+}
+
+GLuint gldcGenTexture(void) {
+    GLuint id = texture_id_map_alloc();
+    gl_assert(id);  // Generated IDs must never be zero
+    
+    TextureObject* txr = &TEXTURE_LIST[id];
+    _glInitializeTextureObject(txr, id);
+
+    gl_assert(txr->index == id);
+    
+    return id;
+}
+
+void gldcDeleteTexture(GLuint id) {
+    if(id == 0) return;
+    /* Zero is the "default texture" and we never allow deletion of it */
+
+    if(texture_id_map_used(id)) {
+    	TextureObject* txr = &TEXTURE_LIST[id];
+        gl_assert(txr->index == id);
+
+        if(txr == TEXTURE_ACTIVE) {
+            // Reset to the default texture
+            TEXTURE_ACTIVE = &TEXTURE_LIST[0];
+        }
+
+        if(txr->data) {
+            yalloc_free(YALLOC_BASE, txr->data);
+            txr->data = NULL;
+        }
+
+        texture_id_map_release(id);
+    }
+}
+
+void gldcBindTexture(GLuint id) {
+    gl_assert(texture_id_map_used(id));
+    TextureObject* txr = &TEXTURE_LIST[id];
+
+    TEXTURE_ACTIVE = txr;
+    gl_assert(TEXTURE_ACTIVE->index == id);
+
+    STATE_DIRTY = GL_TRUE;
+}
+
+int gldcAllocTexture(int w, int h, int format) {
+    TextureObject* active = TEXTURE_ACTIVE;
+
+    if (active->data) {
+        /* pre-existing texture - check if changed */
+        if (active->width != w || active->height != h) {
+            /* changed - free old texture memory */
+            yalloc_free(YALLOC_BASE, active->data);
+            active->data = NULL;
+            active->mipmap = 0;
+        }
+    }
+
+    /* All colour formats are represented as shorts internally. */
+    GLuint bytes   = w * h * 2;
+    active->width  = w;
+    active->height = h;
+    active->color  = format;
+
+    if(!active->data) {
+        /* need texture memory */
+        active->data = yalloc_alloc_and_defrag(bytes);
+    }
+    if (!active->data) return GL_OUT_OF_MEMORY;
+
+    /* Mark level 0 as set in the mipmap bitmask */
+    active->mipmap |= (1 << 0);
+    return 0;
+}
+
+void gldcGetTexture(void** data, int* width, int* height) {
+    TextureObject* active = TEXTURE_ACTIVE;
+    *data   = active->data;
+    *width  = active->width;
+    *height = active->height;
+}
+
+GLuint _glMaxTextureMemory() {
+    return YALLOC_SIZE;
+}
+
+GLuint _glFreeTextureMemory() {
+    return yalloc_count_free(YALLOC_BASE);
+}
+
+GLuint _glUsedTextureMemory() {
+    return YALLOC_SIZE - _glFreeTextureMemory();
+}
+
+GLuint _glFreeContiguousTextureMemory() {
+    return yalloc_count_continuous(YALLOC_BASE);
+}
+
+void glDefragmentTextureMemory_KOS(void) {
+    yalloc_defrag_start(YALLOC_BASE);
+
+    GLuint id;
+
+    /* Replace all texture pointers */
+    for(id = 0; id < MAX_TEXTURE_COUNT; id++){
+        if(texture_id_map_used(id)){
+            TextureObject* txr = &TEXTURE_LIST[id];
+            gl_assert(txr->index == id);
+            txr->data = yalloc_defrag_address(YALLOC_BASE, txr->data);
+        }
+    }
+
+    yalloc_defrag_commit(YALLOC_BASE);
+}
diff --git a/third_party/gldc/src/types.h b/third_party/gldc/src/types.h
new file mode 100644
index 0000000..85df8ba
--- /dev/null
+++ b/third_party/gldc/src/types.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <stdint.h>
+
+typedef struct {
+    /* Same 32 byte layout as pvr_vertex_t */
+    uint32_t flags;
+    float xyz[3];
+    float uv[2];
+    uint8_t bgra[4];
+
+    /* In the pvr_vertex_t structure, this next 4 bytes is oargb
+     * but we're not using that for now, so having W here makes the code
+     * simpler */
+    float w;
+} __attribute__ ((aligned (32))) Vertex;
diff --git a/third_party/gldc/src/yalloc/LICENSE b/third_party/gldc/src/yalloc/LICENSE
new file mode 100644
index 0000000..8aa2645
--- /dev/null
+++ b/third_party/gldc/src/yalloc/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) [year] [fullname]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/third_party/gldc/src/yalloc/README.md b/third_party/gldc/src/yalloc/README.md
new file mode 100644
index 0000000..ca23ec2
--- /dev/null
+++ b/third_party/gldc/src/yalloc/README.md
@@ -0,0 +1,158 @@
+# Summary
+
+yalloc is a memory efficient allocator which is intended for embedded
+applications that only have a low amount of RAM and want to maximize its
+utilization. Properties of the allocator:
+
+ - pools can be up to 128k
+ - user data is 32bit aligned
+ - 4 bytes overhead per allocation
+ - supports defragmentation
+ - uses a free list for first fit allocation strategy (most recently freed
+   blocks are used first)
+ - extensively tested (see section below)
+ - MIT license
+
+# Defragmentation
+
+This feature was the initial motivation for this implementation. Especially
+when dealing with highly memory constrained environments fragmenting memory
+pools can be annoying. For this reason this implementation supports
+defragmentation which moves all allocated blocks into a contiguous range at the
+beginning of the pool, leaving a maximized free range at the end.
+
+As there is no garbage collector or other runtime system involved that updates
+the references, the application must do so. This is done in three steps:
+
+ 1. yalloc_defrag_start() is called. This calculates the new
+    post-defragmentation-addresses for all allocations, but otherwise leaves
+    the allocations untouched.
+
+ 2. yalloc_defrag_address() is called by the application for every pointer that
+    points to an allocation. It returns the post-defragmentation-address for
+    the allocation. The application must update all its relevant pointers this
+    way. Care must be taken not not yet dereference that moved pointers. If the
+    application works with hierarchical data then this can easily be done by
+    updating the pointers button up (first the leafs then their parents).
+
+ 3. yalloc_defrag_commit() is called to finally perform the defragmentation.
+    All allocated blocks are moved to their post-defragmentation-address and
+    the application can continue using the pool the normal way.
+
+It is up to the application when (and if) it performs defragmentation. One
+strategy would be to delay it until an allocation failure. Another approach
+would be to perform the defragmentation regularly when there is nothing else to
+do.
+
+# Configurable Defines
+
+INTERNAL_VALIDATE
+
+If this is not defined on the compiler commandline it will be defined as 0 if
+NDEBUG is defined and otherwise as 1. If you want to disable internal
+validation when NDEBUG is not defined then define INERNAL_VALIDATE as 0 on the
+compiler commandline.
+
+If it is nonzero the heap will be validated via a bunch of assert() calls at
+the end of every function that modifies the heap. This has roughly O(N*M)
+overhead where N is the number of allocated blocks and M the number of free
+blocks in a heap. For applications with enough live allocations this will get
+significant.
+
+YALLOC_VALGRIND
+
+If this is defined in yalloc.c and NVALGRIND is not defined then
+valgrind/memcheck.h is included and the the allocator functions tell valgrind
+about the pool, the allocations and makes the block headers inaccessible outside
+of yalloc-functions. This allows valgrind to detect a lot of the accidents that
+can happen when dealing dynamic memory. This also adds some overhead for every
+yalloc-call because most of them will "unprotect" the internal structure on
+entry and "protect" it again (marking it as inaccessible for valgrind) before
+returning.
+
+# Tests
+
+The tests rely on internal validation of the pool (see INTERNAL_VALIDATE) to
+check that no assumptions about the internal structure of the pool are
+violated. They additionally check for correctness of observations that can be
+made by using the public functions of the allocator (like checking if user data
+stays unmodified). There are a few different scripts that run tests:
+
+ - run_coverage.sh runs a bunch of testfunctions that are carefully crafted to
+   cover all code paths. Coverage data is generated by clang and a summary is
+   shown at the end of the test.
+
+ - run_valgrind.sh tests if the valgrind integration is working as expected,
+   runs the functions from the coverage test and some randomly generated
+   testcases under valgrind.
+
+ - run_libfuzzer.sh uses libfuzzer from clang to generate interesting testcases
+   and runs them in multiple jobs in parallel for 10 seconds. It also generates
+   coverage data at the end (it always got 100% coverage in my testruns).
+
+All tests exit with 0 and print "All fine!" at the end if there where no
+errors. Coverage deficits are not counted as error, so you have to look at the
+summary (they should show 100% coverage!).
+
+
+# Implementation Details
+
+The Headers and the user data are 32bit aligned. Headers have two 16bit fields
+where the high 15 bits represent offsets (relative to the pools address) to the
+previous/next block. The macros HDR_PTR() and HDR_OFFSET() are used to
+translate an offset to an address and back. The 32bit alignment is exploited to
+allow pools of up to 128k with that 15 significant bits.
+
+A pool is always occupied by non-overlapping blocks that link to their
+previous/next block in address order via the prev/next field of Header.
+
+Free blocks are always joined: No two free blocks will ever be neighbors.
+
+Free blocks have an additional header of the same structure. This additional
+header is used to build a list of free blocks (independent of their address
+order).
+
+yalloc_free() will insert the freed block to the front of the free list.
+yalloc_alloc() searches that list front to back and takes the first block that
+is big enough to satisfy the allocation.
+
+There is always a Header at the front and at the end of the pool. The Header at
+the end is degenerate: It is marked as "used" but has no next block (which is
+usually used to determine the size of a block).
+
+The prev-field of the very first block in the pool has special meaning: It
+points to the first free block in the pool. Or, if the pool is currently
+defragmenting (after yalloc_defrag_start() and before yalloc_defrag_commit()),
+points to the last header of the pool. This state can be recognized by checking
+if it points to an empty block (normal pool state) or a used block
+(defragmentation in progress). This logic can be seen in
+yalloc_defrag_in_progress().
+
+The lowest bit of next/prev have special meaning:
+
+ - low bit of prev is set for free blocks
+
+ - low bit of next is set for blocks with 32bit padding after the user data.
+   This is needed when a block is allocated from a free block that leaves only
+   4 free bytes after the user data... which is not enough to insert a
+   free-header (which is needs 8 bytes). The padding will be reclaimed when
+   that block is freed or when the pool is defragmented. The predicate
+   isPadded() can be used to test if a block is padded. Free blocks are never
+   padded.
+
+The predicate isNil() can be used to test if an offset points nowhere (it tests
+if all 15 high bits of an offset are 1). The constant NIL has all but the
+lowest bit set. It is used to set offsets to point to nowhere, and in some
+places it is used to mask out the actual address bits of an offset. This should
+be kept in mind when modifying the code and updating prev/next: Think carefully
+if you have to preserve the low bit when updating an offset!
+
+Defragmentation is done in two phases: First the user calls
+yalloc_defrag_start(). This will put the pool in a special state where no
+alloc/free-calls are allowed. In this state the prev-fields of the used blocks
+have a special meaning: They store the offset that the block will have after
+defragmentation finished. This information is used by yalloc_defrag_address()
+which can be called by the application to query the new addresses for its
+allocations. After the application has updated all its pointers it must call
+yalloc_defrag_commit() which moves all used blocks in contiguous space at the
+beginning of the pool, leaving one maximized free block at the end.
diff --git a/third_party/gldc/src/yalloc/yalloc.c b/third_party/gldc/src/yalloc/yalloc.c
new file mode 100644
index 0000000..6dcf0e5
--- /dev/null
+++ b/third_party/gldc/src/yalloc/yalloc.c
@@ -0,0 +1,803 @@
+#include "yalloc.h"
+#include "yalloc_internals.h"
+
+#include <assert.h>
+#include <string.h>
+
+#define ALIGN(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
+
+#if defined(YALLOC_VALGRIND) && !defined(NVALGRIND)
+# define USE_VALGRIND 1
+#else
+# define USE_VALGRIND 0
+#endif
+
+#if USE_VALGRIND
+# include <valgrind/memcheck.h>
+#else
+# define VALGRIND_MAKE_MEM_UNDEFINED(p, s) ((void)0)
+# define VALGRIND_MAKE_MEM_DEFINED(p, s) ((void)0)
+# define VALGRIND_MAKE_MEM_NOACCESS(p, s) ((void)0)
+# define VALGRIND_CREATE_MEMPOOL(pool, rz, z) ((void)0)
+# define VALGRIND_MEMPOOL_ALLOC(pool, p, s) ((void)0)
+# define VALGRIND_MEMPOOL_FREE(pool, p)  ((void)0)
+# define VALGRIND_MEMPOOL_CHANGE(pool, a, b, s)  ((void)0)
+#endif
+
+#define MARK_NEW_FREE_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header) * 2)
+#define MARK_NEW_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header))
+#define PROTECT_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header))
+#define PROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header) * 2)
+#define UNPROTECT_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header))
+#define UNPROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header) * 2)
+
+
+#if USE_VALGRIND
+static void _unprotect_pool(void * pool)
+{
+  Header * cur = (Header*)pool;
+  for (;;)
+  {
+    UNPROTECT_HDR(cur);
+    if (isFree(cur))
+      UNPROTECT_HDR(cur + 1);
+
+    if (isNil(cur->next))
+      break;
+
+    cur = HDR_PTR(cur->next);
+  }
+}
+
+static void _protect_pool(void * pool)
+{
+  Header * cur = (Header*)pool;
+  while (cur)
+  {
+    Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
+
+    if (isFree(cur))
+      VALGRIND_MAKE_MEM_NOACCESS(cur, (char*)next - (char*)cur);
+    else
+      PROTECT_HDR(cur);
+
+    cur = next;
+  }
+}
+#define assert_is_pool(pool) assert(VALGRIND_MEMPOOL_EXISTS(pool));
+
+#else
+
+static void _unprotect_pool(void * pool){(void)pool;}
+static void _protect_pool(void * pool){(void)pool;}
+#define assert_is_pool(pool) ((void)0)
+#endif
+
+// internal version that does not unprotect/protect the pool
+static int _yalloc_defrag_in_progress(void * pool)
+{
+  // fragmentation is indicated by a free list with one entry: the last block of the pool, which has its "free"-bit cleared.
+  Header * p = (Header*)pool;
+  if (isNil(p->prev))
+    return 0;
+
+  return !(HDR_PTR(p->prev)->prev & 1);
+}
+
+int yalloc_defrag_in_progress(void * pool)
+{
+  _unprotect_pool(pool);
+  int ret = _yalloc_defrag_in_progress(pool);
+  _protect_pool(pool);
+  return ret;
+}
+
+#if YALLOC_INTERNAL_VALIDATE
+
+static size_t _count_free_list_occurences(Header * pool, Header * blk)
+{
+  int n = 0;
+  if (!isNil(pool->prev))
+  {
+    Header * cur = HDR_PTR(pool->prev);
+    for (;;)
+    {
+      if (cur == blk)
+        ++n;
+
+      if (isNil(cur[1].next))
+        break;
+
+      cur = HDR_PTR(cur[1].next);
+    }
+  }
+  return n;
+}
+
+static size_t _count_addr_list_occurences(Header * pool, Header * blk)
+{
+  size_t n = 0;
+  Header * cur = pool;
+  for (;;)
+  {
+    if (cur == blk)
+      ++n;
+
+    if (isNil(cur->next))
+      break;
+
+    cur = HDR_PTR(cur->next);
+  }
+  return n;
+}
+
+static void _validate_user_ptr(void * pool, void * p)
+{
+  Header * hdr = (Header*)p - 1;
+  size_t n = _count_addr_list_occurences((Header*)pool, hdr);
+  assert(n == 1 && !isFree(hdr));
+}
+
+/**
+Validates if all the invariants of a pool are intact.
+
+This is very expensive when there are enough blocks in the heap (quadratic complexity!).
+*/
+static void _yalloc_validate(void * pool_)
+{
+  Header * pool = (Header*)pool_;
+  Header * cur = pool;
+
+  assert(!isNil(pool->next)); // there must always be at least two blocks: a free/used one and the final block at the end
+
+  if (_yalloc_defrag_in_progress(pool))
+  {
+    Header * prevUsed = NULL;
+    while (!isNil(cur->next))
+    {
+      if (!isFree(cur))
+      { // it is a used block
+        Header * newAddr = cur == pool ? pool : HDR_PTR(cur->prev);
+        assert(newAddr <= cur);
+        assert(newAddr >= pool);
+
+        if (prevUsed)
+        {
+          Header * prevNewAddr = prevUsed == pool ? pool : HDR_PTR(prevUsed->prev);
+          size_t prevBruttoSize = (char*)HDR_PTR(prevUsed->next) - (char*)prevUsed;
+          if (isPadded(prevUsed))
+            prevBruttoSize -= 4; // remove padding
+          assert((char*)newAddr == (char*)prevNewAddr + prevBruttoSize);
+        }
+        else
+        {
+          assert(newAddr == pool);
+        }
+
+        prevUsed = cur;
+      }
+
+      cur = HDR_PTR(cur->next);
+    }
+
+    assert(cur == HDR_PTR(pool->prev)); // the free-list should point to the last block
+    assert(!isFree(cur)); // the last block must not be free
+  }
+  else
+  {
+    Header * prev = NULL;
+
+    // iterate blocks in address order
+    for (;;)
+    {
+      if (prev)
+      {
+        Header * x = HDR_PTR(cur->prev);
+        assert(x == prev);
+      }
+
+      int n = _count_free_list_occurences(pool, cur);
+      if (isFree(cur))
+      { // it is a free block
+        assert(n == 1);
+        assert(!isPadded(cur)); // free blocks must have a zero padding-bit
+
+        if (prev)
+        {
+          assert(!isFree(prev)); // free blocks must not be direct neighbours
+        }
+      }
+      else
+      {
+        assert(n == 0);
+      }
+
+      if (isNil(cur->next))
+        break;
+
+      Header * next = HDR_PTR(cur->next);
+      assert((char*)next >= (char*)cur + sizeof(Header) * 2);
+      prev = cur;
+      cur = next;
+    }
+
+    assert(isNil(cur->next));
+
+    if (!isNil(pool->prev))
+    {
+      // iterate free-list
+      Header * f = HDR_PTR(pool->prev);
+      assert(isNil(f[1].prev));
+      for (;;)
+      {
+        assert(isFree(f)); // must be free
+
+        int n = _count_addr_list_occurences(pool, f);
+        assert(n == 1);
+
+        if (isNil(f[1].next))
+          break;
+
+        f = HDR_PTR(f[1].next);
+      }
+    }
+  }
+}
+
+#else
+static void _yalloc_validate(void * pool){(void)pool;}
+static void _validate_user_ptr(void * pool, void * p){(void)pool; (void)p;}
+#endif
+
+int yalloc_init(void * pool, size_t size)
+{
+  if (size > MAX_POOL_SIZE)
+    return -1;
+
+  // TODO: Error when pool is not properly aligned
+
+  // TODO: Error when size is not a multiple of the alignment?
+  while (size % sizeof(Header))
+    --size;
+
+  if(size < sizeof(Header) * 3)
+    return -1;
+
+  VALGRIND_CREATE_MEMPOOL(pool, 0, 0);
+
+  Header * first = (Header*)pool;
+  Header * last = (Header*)((char*)pool + size) - 1;
+
+  MARK_NEW_FREE_HDR(first);
+  MARK_NEW_HDR(first);
+
+  first->prev = HDR_OFFSET(first) | 1;
+  first->next = HDR_OFFSET(last);
+  first[1].prev = NIL;
+  first[1].next = NIL;
+
+  last->prev = HDR_OFFSET(first);
+  last->next = NIL;
+
+  _unprotect_pool(pool);
+  _yalloc_validate(pool);
+  _protect_pool(pool);
+  return 0;
+}
+
+void yalloc_deinit(void * pool)
+{
+#if USE_VALGRIND
+  VALGRIND_DESTROY_MEMPOOL(pool);
+
+  Header * last = (Header*)pool;
+  UNPROTECT_HDR(last);
+  while (!isNil(last->next))
+  {
+    Header * next = HDR_PTR(last->next);
+    UNPROTECT_HDR(next);
+    last = next;
+  }
+
+  VALGRIND_MAKE_MEM_UNDEFINED(pool, (char*)(last + 1) - (char*)pool);
+#else
+  (void)pool;
+#endif
+}
+
+
+void * yalloc_alloc(void * pool, size_t size)
+{
+  assert_is_pool(pool);
+  _unprotect_pool(pool);
+  assert(!_yalloc_defrag_in_progress(pool));
+  _yalloc_validate(pool);
+  if (!size)
+  {
+    _protect_pool(pool);
+    return NULL;
+  }
+
+  Header * root = (Header*)pool;
+  if (isNil(root->prev))
+  {
+    _protect_pool(pool);
+    return NULL; /* no free block, no chance to allocate anything */ // TODO: Just read up which C standard supports single line comments and then fucking use them!
+  }
+
+  /* round up to alignment */
+  size = ALIGN(size, 32);
+
+  size_t bruttoSize = size + sizeof(Header);
+  Header * prev = NULL;
+  Header * cur = HDR_PTR(root->prev);
+  for (;;)
+  {
+    size_t curSize = (char*)HDR_PTR(cur->next) - (char*)cur; /* size of the block, including its header */
+
+    if (curSize >= bruttoSize) // it is big enough
+    {
+      // take action for unused space in the free block
+      if (curSize >= bruttoSize + sizeof(Header) * 2)
+      { // the leftover space is big enough to make it a free block
+        // Build a free block from the unused space and insert it into the list of free blocks after the current free block
+        Header * tail = (Header*)((char*)cur + bruttoSize);
+        MARK_NEW_FREE_HDR(tail);
+
+        // update address-order-list
+        tail->next = cur->next;
+        tail->prev = HDR_OFFSET(cur) | 1;
+        HDR_PTR(cur->next)->prev = HDR_OFFSET(tail); // NOTE: We know the next block is used because free blocks are never neighbours. So we don't have to care about the lower bit which would be set for the prev of a free block.
+        cur->next = HDR_OFFSET(tail);
+
+        // update list of free blocks
+        tail[1].next = cur[1].next;
+        // NOTE: tail[1].prev is updated in the common path below (assignment to "HDR_PTR(cur[1].next)[1].prev")
+
+        if (!isNil(cur[1].next))
+          HDR_PTR(cur[1].next)[1].prev = HDR_OFFSET(tail);
+        cur[1].next = HDR_OFFSET(tail);
+      }
+      else if (curSize > bruttoSize)
+      { // there will be unused space, but not enough to insert a free header
+        internal_assert(curSize - bruttoSize == sizeof(Header)); // unused space must be enough to build a free-block or it should be exactly the size of a Header
+        cur->next |= 1; // set marker for "has unused trailing space"
+      }
+      else
+      {
+        internal_assert(curSize == bruttoSize);
+      }
+
+      cur->prev &= NIL; // clear marker for "is a free block"
+
+      // remove from linked list of free blocks
+      if (prev)
+        prev[1].next = cur[1].next;
+      else
+      {
+        uint32_t freeBit = isFree(root);
+        root->prev = (cur[1].next & NIL) | freeBit;
+      }
+
+      if (!isNil(cur[1].next))
+        HDR_PTR(cur[1].next)[1].prev = prev ? HDR_OFFSET(prev) : NIL;
+
+      _yalloc_validate(pool);
+      VALGRIND_MEMPOOL_ALLOC(pool, cur + 1, size);
+      _protect_pool(pool);
+      return cur + 1; // return address after the header
+    }
+
+    if (isNil(cur[1].next))
+      break;
+
+    prev = cur;
+    cur = HDR_PTR(cur[1].next);
+  }
+
+  _yalloc_validate(pool);
+  _protect_pool(pool);
+  return NULL;
+}
+
+// Removes a block from the free-list and moves the pools first-free-bock pointer to its successor if it pointed to that block.
+static void unlink_from_free_list(Header * pool, Header * blk)
+{
+  // update the pools pointer to the first block in the free list if necessary
+  if (isNil(blk[1].prev))
+  { // the block is the first in the free-list
+    // make the pools first-free-pointer point to the next in the free list
+    uint32_t freeBit = isFree(pool);
+    pool->prev = (blk[1].next & NIL) | freeBit;
+  }
+  else
+    HDR_PTR(blk[1].prev)[1].next = blk[1].next;
+
+  if (!isNil(blk[1].next))
+    HDR_PTR(blk[1].next)[1].prev = blk[1].prev;
+}
+
+size_t yalloc_block_size(void * pool, void * p)
+{
+  Header * a = (Header*)p - 1;
+  UNPROTECT_HDR(a);
+  Header * b = HDR_PTR(a->next);
+  size_t payloadSize = (char*)b - (char*)p;
+  if (isPadded(a))
+    payloadSize -= sizeof(Header);
+  PROTECT_HDR(a);
+  return payloadSize;
+}
+
+void yalloc_free(void * pool_, void * p)
+{
+  assert_is_pool(pool_);
+  assert(!yalloc_defrag_in_progress(pool_));
+  if (!p)
+    return;
+
+  _unprotect_pool(pool_);
+
+  Header * pool = (Header*)pool_;
+  Header * cur = (Header*)p - 1;
+
+  // get pointers to previous/next block in address order
+  Header * prev = cur == pool || isNil(cur->prev) ? NULL : HDR_PTR(cur->prev);
+  Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
+
+  int prevFree = prev && isFree(prev);
+  int nextFree = next && isFree(next);
+
+#if USE_VALGRIND
+  {
+    unsigned errs = VALGRIND_COUNT_ERRORS;
+    VALGRIND_MEMPOOL_FREE(pool, p);
+    if (VALGRIND_COUNT_ERRORS > errs)
+    { // early exit if the free was invalid (so we get a valgrind error and don't mess up the pool, which is helpful for testing if invalid frees are detected by valgrind)
+      _protect_pool(pool_);
+      return;
+    }
+  }
+#endif
+
+  _validate_user_ptr(pool_, p);
+
+  if (prevFree && nextFree)
+  { // the freed block has two free neighbors
+    unlink_from_free_list(pool, prev);
+    unlink_from_free_list(pool, next);
+
+    // join prev, cur and next
+    prev->next = next->next;
+    HDR_PTR(next->next)->prev = cur->prev;
+
+    // prev is now the block we want to push onto the free-list
+    cur = prev;
+  }
+  else if (prevFree)
+  {
+    unlink_from_free_list(pool, prev);
+
+    // join prev and cur
+    prev->next = cur->next;
+    HDR_PTR(cur->next)->prev = cur->prev;
+
+    // prev is now the block we want to push onto the free-list
+    cur = prev;
+  }
+  else if (nextFree)
+  {
+    unlink_from_free_list(pool, next);
+
+    // join cur and next
+    cur->next = next->next;
+    HDR_PTR(next->next)->prev = next->prev & NIL;
+  }
+
+  // if there is a previous block and that block has padding then we want to grow the new free block into that padding
+  if (cur != pool && !isNil(cur->prev))
+  { // there is a previous block
+    Header * left = HDR_PTR(cur->prev);
+    if (isPadded(left))
+    { // the previous block has padding, so extend the current block to consume move the padding to the current free block
+      Header * grown = cur - 1;
+      MARK_NEW_HDR(grown);
+      grown->next = cur->next;
+      grown->prev = cur->prev;
+      left->next = HDR_OFFSET(grown);
+      if (!isNil(cur->next))
+        HDR_PTR(cur->next)->prev = HDR_OFFSET(grown);
+
+      cur = grown;
+    }
+  }
+
+  cur->prev |= 1; // it becomes a free block
+  cur->next &= NIL; // reset padding-bit
+  UNPROTECT_HDR(cur + 1);
+  cur[1].prev = NIL; // it will be the first free block in the free list, so it has no prevFree
+
+  if (!isNil(pool->prev))
+  { // the free-list was already non-empty
+    HDR_PTR(pool->prev)[1].prev = HDR_OFFSET(cur); // make the first entry in the free list point back to the new free block (it will become the first one)
+    cur[1].next = pool->prev; // the next free block is the first of the old free-list
+  }
+  else
+    cur[1].next = NIL; // free-list was empty, so there is no successor
+
+  VALGRIND_MAKE_MEM_NOACCESS(cur + 2, (char*)HDR_PTR(cur->next) - (char*)(cur + 2));
+
+  // now the freed block is the first in the free-list
+
+  // update the offset to the first element of the free list
+  uint32_t freeBit = isFree(pool); // remember the free-bit of the offset
+  pool->prev = HDR_OFFSET(cur) | freeBit; // update the offset and restore the free-bit
+  _yalloc_validate(pool);
+  _protect_pool(pool);
+}
+
+size_t yalloc_count_free(void * pool_)
+{
+  assert_is_pool(pool_);
+  _unprotect_pool(pool_);
+  assert(!_yalloc_defrag_in_progress(pool_));
+  Header * pool = (Header*)pool_;
+  size_t bruttoFree = 0;
+  Header * cur = pool;
+
+  _yalloc_validate(pool);
+
+  for (;;)
+  {
+    if (isFree(cur))
+    { // it is a free block
+      bruttoFree += (char*)HDR_PTR(cur->next) - (char*)cur;
+    }
+    else
+    { // it is a used block
+      if (isPadded(cur))
+      { // the used block is padded
+        bruttoFree += sizeof(Header);
+      }
+    }
+
+    if (isNil(cur->next))
+      break;
+
+    cur = HDR_PTR(cur->next);
+  }
+
+  _protect_pool(pool);
+
+  if (bruttoFree < sizeof(Header))
+  {
+    internal_assert(!bruttoFree); // free space should always be a multiple of sizeof(Header)
+    return 0;
+  }
+
+  return bruttoFree - sizeof(Header);
+}
+
+size_t yalloc_count_continuous(void * pool_)
+{
+  assert_is_pool(pool_);
+  _unprotect_pool(pool_);
+  assert(!_yalloc_defrag_in_progress(pool_));
+  Header * pool = (Header*)pool_;
+  size_t largestFree = 0;
+  Header * cur = pool;
+
+  _yalloc_validate(pool);
+
+  for (;;)
+  {
+    if (isFree(cur))
+    { // it is a free block
+      size_t temp = (uintptr_t)HDR_PTR(cur->next) - (uintptr_t)cur;
+      if(temp > largestFree)
+        largestFree = temp;
+    }
+
+    if (isNil(cur->next))
+      break;
+
+    cur = HDR_PTR(cur->next);
+  }
+
+  _protect_pool(pool);
+
+  if (largestFree < sizeof(Header))
+  {
+    internal_assert(!largestFree); // free space should always be a multiple of sizeof(Header)
+    return 0;
+  }
+
+  return largestFree - sizeof(Header);
+}
+
+void * yalloc_first_used(void * pool)
+{
+  assert_is_pool(pool);
+  _unprotect_pool(pool);
+  Header * blk = (Header*)pool;
+  while (!isNil(blk->next))
+  {
+    if (!isFree(blk))
+    {
+      _protect_pool(pool);
+      return blk + 1;
+    }
+
+    blk = HDR_PTR(blk->next);
+  }
+
+  _protect_pool(pool);
+  return NULL;
+}
+
+void * yalloc_next_used(void * pool, void * p)
+{
+  assert_is_pool(pool);
+  _unprotect_pool(pool);
+  _validate_user_ptr(pool, p);
+  Header * prev = (Header*)p - 1;
+  assert(!isNil(prev->next)); // the last block should never end up as input to this function (because it is not user-visible)
+
+  Header * blk = HDR_PTR(prev->next);
+  while (!isNil(blk->next))
+  {
+    if (!isFree(blk))
+    {
+      _protect_pool(pool);
+      return blk + 1;
+    }
+
+    blk = HDR_PTR(blk->next);
+  }
+
+  _protect_pool(pool);
+  return NULL;
+}
+
+void yalloc_defrag_start(void * pool_)
+{
+  assert_is_pool(pool_);
+  _unprotect_pool(pool_);
+  assert(!_yalloc_defrag_in_progress(pool_));
+  Header * pool = (Header*)pool_;
+
+  // iterate over all blocks in address order and store the post-defragment address of used blocks in their "prev" field
+  size_t end = 0; // offset for the next used block
+  Header * blk = (Header*)pool;
+  for (; !isNil(blk->next); blk = HDR_PTR(blk->next))
+  {
+    if (!isFree(blk))
+    { // it is a used block
+      blk->prev = end >> 1;
+      internal_assert((char*)HDR_PTR(blk->prev) == (char*)pool + end);
+
+      size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
+
+      if (isPadded(blk))
+      { // the block is padded
+        bruttoSize -= sizeof(Header);
+      }
+
+      end += bruttoSize;
+      internal_assert(end % sizeof(Header) == 0);
+    }
+  }
+
+  // blk is now the last block (the dummy "used" block at the end of the pool)
+  internal_assert(isNil(blk->next));
+  internal_assert(!isFree(blk));
+
+  // mark the pool as "defragementation in progress"
+  uint32_t freeBit = isFree(pool);
+  pool->prev = (HDR_OFFSET(blk) & NIL) | freeBit;
+
+  _yalloc_validate(pool);
+  internal_assert(yalloc_defrag_in_progress(pool));
+  _protect_pool(pool);
+}
+
+void * yalloc_defrag_address(void * pool_, void * p)
+{
+  assert_is_pool(pool_);
+  assert(yalloc_defrag_in_progress(pool_));
+  if (!p)
+    return NULL;
+
+  Header * pool = (Header*)pool_;
+
+  _unprotect_pool(pool);
+  _validate_user_ptr(pool_, p);
+
+  if (pool + 1 == p)
+    return pool + 1; // "prev" of the first block points to the last used block to mark the pool as "defragmentation in progress"
+
+  Header * blk = (Header*)p - 1;
+
+  void * defragP = HDR_PTR(blk->prev) + 1;
+
+  _protect_pool(pool);
+  return defragP;
+}
+
+void yalloc_defrag_commit(void * pool_)
+{
+  assert_is_pool(pool_);
+  _unprotect_pool(pool_);
+  assert(_yalloc_defrag_in_progress(pool_));
+  Header * pool = (Header*)pool_;
+
+  // iterate over all blocks in address order and move them
+  size_t end = 0; // offset for the next used block
+  Header * blk = pool;
+  Header * lastUsed = NULL;
+  while (!isNil(blk->next))
+  {
+    if (!isFree(blk))
+    { // it is a used block
+      size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
+
+      if (isPadded(blk))
+      { // the block is padded
+        bruttoSize -= sizeof(Header);
+      }
+
+      Header * next = HDR_PTR(blk->next);
+
+      blk->prev = lastUsed ? HDR_OFFSET(lastUsed) : NIL;
+      blk->next = (end + bruttoSize) >> 1;
+
+      lastUsed = (Header*)((char*)pool + end);
+      VALGRIND_MAKE_MEM_UNDEFINED(lastUsed, (char*)blk - (char*)lastUsed);
+      memmove(lastUsed, blk, bruttoSize);
+      VALGRIND_MEMPOOL_CHANGE(pool, blk + 1, lastUsed + 1, bruttoSize - sizeof(Header));
+
+      end += bruttoSize;
+      blk = next;
+    }
+    else
+      blk = HDR_PTR(blk->next);
+  }
+
+  // blk is now the last block (the dummy "used" block at the end of the pool)
+  internal_assert(isNil(blk->next));
+  internal_assert(!isFree(blk));
+
+  if (lastUsed)
+  {
+    Header * gap = HDR_PTR(lastUsed->next);
+    if (gap == blk)
+    { // there is no gap
+      pool->prev = NIL; // the free list is empty
+      blk->prev = HDR_OFFSET(lastUsed);
+    }
+    else if (blk - gap > 1)
+    { // the gap is big enouogh for a free Header
+
+      // set a free list that contains the gap as only element
+      gap->prev = HDR_OFFSET(lastUsed) | 1;
+      gap->next = HDR_OFFSET(blk);
+      gap[1].prev = NIL;
+      gap[1].next = NIL;
+      pool->prev = blk->prev = HDR_OFFSET(gap);
+    }
+    else
+    { // there is a gap, but it is too small to be used as free-list-node, so just make it padding of the last used block
+      lastUsed->next = HDR_OFFSET(blk) | 1;
+      pool->prev = NIL;
+      blk->prev = HDR_OFFSET(lastUsed);
+    }
+  }
+  else
+  { // the pool is empty
+    pool->prev = 1;
+  }
+
+  internal_assert(!_yalloc_defrag_in_progress(pool));
+  _yalloc_validate(pool);
+  _protect_pool(pool);
+}
diff --git a/third_party/gldc/src/yalloc/yalloc.h b/third_party/gldc/src/yalloc/yalloc.h
new file mode 100644
index 0000000..4349eb9
--- /dev/null
+++ b/third_party/gldc/src/yalloc/yalloc.h
@@ -0,0 +1,176 @@
+/**
+@file
+
+API of the yalloc allocator.
+*/
+
+#ifndef YALLOC_H
+#define YALLOC_H
+
+#include <stddef.h>
+
+/**
+Maximum supported pool size. yalloc_init() will fail for larger pools.
+*/
+#define MAX_POOL_SIZE ((2 << 24) - 4)
+
+/**
+Creates a pool inside a given buffer.
+
+Pools must be deinitialized with yalloc_deinit() when they are no longer needed.
+
+@param pool The starting address of the pool. It must have at least 16bit
+alignment (internal structure uses 16bit integers). Allocations are placed at
+32bit boundaries starting from this address, so if the user data should be
+32bit aligned then this address has to be 32bit aligned. Typically an address
+of static memory, or an array on the stack is used if the pool is only used
+temporarily.
+@param size Size of the pool.
+@return 0 on success, nonzero if the size is not supported.
+ */
+int yalloc_init(void * pool, size_t size);
+
+/**
+Deinitializes the buffer that is used by the pool and makes it available for other use.
+
+The content of the buffer is undefined after this.
+
+@param pool The starting address of an initialized pool.
+*/
+void yalloc_deinit(void * pool);
+
+/**
+Allocates a block of memory from a pool.
+
+This function mimics malloc().
+
+The pool must not be in the "defragmenting" state when this function is called.
+
+@param pool The starting address of an initialized pool.
+@param size Number of bytes to allocate.
+@return Allocated buffer or \c NULL if there was no free range that could serve
+the allocation. See @ref yalloc_defrag_start() for a way to remove
+fragmentation which may cause allocations to fail even when there is enough
+space in total.
+*/
+void * yalloc_alloc(void * pool, size_t size);
+
+/**
+Returns an allocation to a pool.
+
+This function mimics free().
+
+The pool must not be in the "defragmenting" state when this function is called.
+
+@param pool The starting address of the initialized pool the allocation comes from.
+@param p An address that was returned from yalloc_alloc() of the same pool.
+*/
+void yalloc_free(void * pool, void * p);
+
+/**
+Returns the maximum size of a successful allocation (assuming a completely unfragmented heap).
+
+After defragmentation the first allocation with the returned size is guaranteed to succeed.
+
+@param pool The starting address of an initialized pool.
+@return Number of bytes that can be allocated (assuming the pool is defragmented).
+*/
+size_t yalloc_count_free(void * pool);
+
+/**
+Returns the maximum continuous free area.
+
+@param pool The starting address of an initialized pool.
+@return Number of free bytes that exist continuously.
+*/
+size_t yalloc_count_continuous(void * pool_);
+
+/**
+Queries the usable size of an allocated block.
+
+@param pool The starting address of the initialized pool the allocation comes from.
+@param p An address that was returned from yalloc_alloc() of the same pool.
+@return Size of the memory block. This is the size passed to @ref yalloc_alloc() rounded up to 4.
+*/
+size_t yalloc_block_size(void * pool, void * p);
+
+/**
+Finds the first (in address order) allocation of a pool.
+
+@param pool The starting address of an initialized pool.
+@return Address of the allocation the lowest address inside the pool (this is
+what @ref yalloc_alloc() returned), or \c NULL if there is no used block.
+*/
+void * yalloc_first_used(void * pool);
+
+/**
+Given a pointer to an allocation finds the next (in address order) used block of a pool.
+
+@param pool The starting address of the initialized pool the allocation comes from.
+@param p Pointer to an allocation in that pool, typically comes from a previous
+call to @ref yalloc_first_used()
+*/
+void * yalloc_next_used(void * pool, void * p);
+
+/**
+Starts defragmentation for a pool.
+
+Allocations will stay where they are. But the pool is put in the "defagmenting"
+state (see @ref yalloc_defrag_in_progress()).
+
+The pool must not be in the "defragmenting" state when this function is called.
+The pool is put into the "defragmenting" state by this function.
+
+@param pool The starting address of an initialized pool.
+*/
+void yalloc_defrag_start(void * pool);
+
+/**
+Returns the address that an allocation will have after @ref yalloc_defrag_commit() is called.
+
+The pool must be in the "defragmenting" state when this function is called.
+
+@param pool The starting address of the initialized pool the allocation comes from.
+@param p Pointer to an allocation in that pool.
+@return The address the alloation will have after @ref yalloc_defrag_commit() is called.
+*/
+void * yalloc_defrag_address(void * pool, void * p);
+
+/**
+Finishes the defragmentation.
+
+The content of all allocations in the pool will be moved to the address that
+was reported by @ref yalloc_defrag_address(). The pool will then have only one
+free block. This means that an <tt>yalloc_alloc(pool, yalloc_count_free(pool))</tt>
+will succeed.
+
+The pool must be in the "defragmenting" state when this function is called. The
+pool is put back to normal state by this function.
+
+@param pool The starting address of an initialized pool.
+*/
+void yalloc_defrag_commit(void * pool);
+
+/**
+Tells if the pool is in the "defragmenting" state (after a @ref yalloc_defrag_start() and before a @ref yalloc_defrag_commit()).
+
+@param pool The starting address of an initialized pool.
+@return Nonzero if the pool is currently in the "defragmenting" state.
+*/
+int yalloc_defrag_in_progress(void * pool);
+
+
+/**
+Helper function that dumps the state of the pool to stdout.
+
+This function is only available if build with <tt>yalloc_dump.c</tt>. This
+function only exists for debugging purposes and can be ignored by normal users
+that are not interested in the internal structure of the implementation.
+
+@param pool The starting address of an initialized pool.
+@param name A string that is used as "Title" for the output.
+*/
+void yalloc_dump(void * pool, char * name);
+
+
+#endif // YALLOC_H
diff --git a/third_party/gldc/src/yalloc/yalloc_dump.c b/third_party/gldc/src/yalloc/yalloc_dump.c
new file mode 100644
index 0000000..f0dfdcb
--- /dev/null
+++ b/third_party/gldc/src/yalloc/yalloc_dump.c
@@ -0,0 +1,39 @@
+#include "yalloc_internals.h"
+
+#include <stdio.h>
+
+static void printOffset(void * pool, char * name, uint16_t offset)
+{
+  if (isNil(offset))
+    printf("  %s: nil\n", name);
+  else
+    printf("  %s: %td\n", name, (char*)HDR_PTR(offset) - (char*)pool);
+}
+
+void yalloc_dump(void * pool, char * name)
+{
+  printf("---- %s ----\n", name);
+  Header * cur = (Header*)pool;
+  for (;;)
+  {
+    printf(isFree(cur) ? "%td: free @%p\n" : "%td: used @%p\n", (char*)cur - (char*)pool, cur);
+    printOffset(pool, cur == pool ? "first free" : "prev", cur->prev);
+    printOffset(pool, "next", cur->next);
+    if (isFree(cur))
+    {
+      printOffset(pool, "prevFree", cur[1].prev);
+      printOffset(pool, "nextFree", cur[1].next);
+    }
+    else
+      printf("  payload includes padding: %i\n", isPadded(cur));
+
+    if (isNil(cur->next))
+      break;
+
+    printf("  %td bytes payload\n", (char*)HDR_PTR(cur->next) - (char*)cur - sizeof(Header));
+
+    cur = HDR_PTR(cur->next);
+  }
+
+  fflush(stdout);
+}
diff --git a/third_party/gldc/src/yalloc/yalloc_internals.h b/third_party/gldc/src/yalloc/yalloc_internals.h
new file mode 100644
index 0000000..ffb70cb
--- /dev/null
+++ b/third_party/gldc/src/yalloc/yalloc_internals.h
@@ -0,0 +1,63 @@
+#ifndef YALLOC_INTERNALS_H
+#define YALLOC_INTERNALS_H
+
+#include <stdint.h>
+
+typedef struct
+{
+  uint32_t prev; // low bit set if free
+  uint32_t next; // for used blocks: low bit set if unused header at the end
+
+  /* We need user data to be 32-byte aligned, so the header needs
+   * to be 32 bytes in size (as user data follows the header) */
+  uint8_t padding[32 - (sizeof(uint32_t) * 2)];
+} Header;
+
+// NOTE: We have 32bit aligned data and 16bit offsets where the lowest bit is used as flag. So we remove the low bit and shift by 1 to address 128k bytes with the 15bit significant offset bits.
+
+#define NIL 0xFFFFFFFEu
+
+// return Header-address for a prev/next
+#define HDR_PTR(offset) ((Header*)((char*)pool + (((offset) & NIL)<<1)))
+
+// return a prev/next for a Header-address
+#define HDR_OFFSET(blockPtr) ((uint32_t)(((char*)blockPtr - (char*)pool) >> 1))
+
+#ifndef YALLOC_INTERNAL_VALIDATE
+# ifdef NDEBUG
+#   define YALLOC_INTERNAL_VALIDATE 0
+# else
+#   define YALLOC_INTERNAL_VALIDATE 1
+#endif
+#endif
+
+
+/*
+internal_assert() is used in some places to check internal expections.
+Activate this if you modify the code to detect problems as early as possible.
+In other cases this should be deactivated.
+*/
+#if 0
+#define internal_assert assert
+#else
+#define internal_assert(condition)((void) 0)
+#endif
+
+// detects offsets that point nowhere
+static inline int isNil(uint32_t offset)
+{
+  return (offset | 1) == 0xFFFFFFFF;
+}
+
+static inline int isFree(Header * hdr)
+{
+  return hdr->prev & 1;
+}
+
+static inline int isPadded(Header * hdr)
+{
+  return hdr->next & 1;
+}
+
+
+#endif // YALLOC_INTERNALS_H