diff options
Diffstat (limited to 'third_party/bearssl/src')
283 files changed, 58637 insertions, 0 deletions
diff --git a/third_party/bearssl/src/aes_big_cbcdec.c b/third_party/bearssl/src/aes_big_cbcdec.c new file mode 100644 index 0000000..d969a3b --- /dev/null +++ b/third_party/bearssl/src/aes_big_cbcdec.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_big_cbcdec_vtable; + ctx->num_rounds = br_aes_big_keysched_inv(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + + ivbuf = iv; + buf = data; + while (len > 0) { + unsigned char tmp[16]; + int i; + + memcpy(tmp, buf, 16); + br_aes_big_decrypt(ctx->num_rounds, ctx->skey, buf); + for (i = 0; i < 16; i ++) { + buf[i] ^= ivbuf[i]; + } + memcpy(ivbuf, tmp, 16); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_aes_big_cbcdec_vtable = { + sizeof(br_aes_big_cbcdec_keys), + 16, + 4, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_aes_big_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_aes_big_cbcdec_run +}; diff --git a/third_party/bearssl/src/aes_big_cbcenc.c b/third_party/bearssl/src/aes_big_cbcenc.c new file mode 100644 index 0000000..265e53b --- /dev/null +++ b/third_party/bearssl/src/aes_big_cbcenc.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_big_cbcenc_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + + ivbuf = iv; + buf = data; + while (len > 0) { + int i; + + for (i = 0; i < 16; i ++) { + buf[i] ^= ivbuf[i]; + } + br_aes_big_encrypt(ctx->num_rounds, ctx->skey, buf); + memcpy(ivbuf, buf, 16); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_aes_big_cbcenc_vtable = { + sizeof(br_aes_big_cbcenc_keys), + 16, + 4, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_aes_big_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_aes_big_cbcenc_run +}; diff --git a/third_party/bearssl/src/aes_big_ctr.c b/third_party/bearssl/src/aes_big_ctr.c new file mode 100644 index 0000000..18fbb84 --- /dev/null +++ b/third_party/bearssl/src/aes_big_ctr.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_big_ctr_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +uint32_t +br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + + buf = data; + while (len > 0) { + unsigned char tmp[16]; + + memcpy(tmp, iv, 12); + br_enc32be(tmp + 12, cc ++); + br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp); + if (len <= 16) { + xorbuf(buf, tmp, len); + break; + } + xorbuf(buf, tmp, 16); + buf += 16; + len -= 16; + } + return cc; +} + +/* see bearssl_block.h */ +const br_block_ctr_class br_aes_big_ctr_vtable = { + sizeof(br_aes_big_ctr_keys), + 16, + 4, + (void (*)(const br_block_ctr_class **, const void *, size_t)) + &br_aes_big_ctr_init, + (uint32_t (*)(const br_block_ctr_class *const *, + const void *, uint32_t, void *, size_t)) + &br_aes_big_ctr_run +}; diff --git a/third_party/bearssl/src/aes_big_ctrcbc.c b/third_party/bearssl/src/aes_big_ctrcbc.c new file mode 100644 index 0000000..d45ca76 --- /dev/null +++ b/third_party/bearssl/src/aes_big_ctrcbc.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_big_ctrcbc_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf, *bctr; + uint32_t cc0, cc1, cc2, cc3; + + buf = data; + bctr = ctr; + cc3 = br_dec32be(bctr + 0); + cc2 = br_dec32be(bctr + 4); + cc1 = br_dec32be(bctr + 8); + cc0 = br_dec32be(bctr + 12); + while (len > 0) { + unsigned char tmp[16]; + uint32_t carry; + + br_enc32be(tmp + 0, cc3); + br_enc32be(tmp + 4, cc2); + br_enc32be(tmp + 8, cc1); + br_enc32be(tmp + 12, cc0); + br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp); + xorbuf(buf, tmp, 16); + buf += 16; + len -= 16; + cc0 ++; + carry = (~(cc0 | -cc0)) >> 31; + cc1 += carry; + carry &= (~(cc1 | -cc1)) >> 31; + cc2 += carry; + carry &= (~(cc2 | -cc2)) >> 31; + cc3 += carry; + } + br_enc32be(bctr + 0, cc3); + br_enc32be(bctr + 4, cc2); + br_enc32be(bctr + 8, cc1); + br_enc32be(bctr + 12, cc0); +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + + buf = data; + while (len > 0) { + xorbuf(cbcmac, buf, 16); + br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_big_ctrcbc_ctr(ctx, ctr, data, len); + br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len); +} + +/* see bearssl_block.h */ +void +br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len); + br_aes_big_ctrcbc_ctr(ctx, ctr, data, len); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = { + sizeof(br_aes_big_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_big_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_big_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_big_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_big_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_big_ctrcbc_mac +}; diff --git a/third_party/bearssl/src/aes_big_dec.c b/third_party/bearssl/src/aes_big_dec.c new file mode 100644 index 0000000..a5d0e3c --- /dev/null +++ b/third_party/bearssl/src/aes_big_dec.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Inverse S-box (used in key schedule for decryption). + */ +static const unsigned char iS[] = { + 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, + 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, + 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32, + 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, + 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, + 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50, + 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, + 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, + 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, + 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41, + 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, + 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, + 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, + 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B, + 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, + 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, + 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, + 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D, + 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, + 0x55, 0x21, 0x0C, 0x7D +}; + +static const uint32_t iSsm0[] = { + 0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1, + 0xACFA58AB, 0x4BE30393, 0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25, + 0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F, 0xDEB15A49, 0x25BA1B67, + 0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6, + 0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3, + 0x49E06929, 0x8EC9C844, 0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD, + 0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4, 0x63DF4A18, 0xE51A3182, + 0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94, + 0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2, + 0xE31F8F57, 0x6655AB2A, 0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5, + 0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C, 0x8ACF1C2B, 0xA779B492, + 0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A, + 0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA, + 0x5E719F06, 0xBD6E1051, 0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46, + 0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF, 0x1998FB24, 0xD6BDE997, + 0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB, + 0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48, + 0x1E1170AC, 0x6C5A724E, 0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927, + 0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A, 0x0C0A67B1, 0x9357E70F, + 0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16, + 0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD, + 0x2DB6A8B9, 0x141EA9C8, 0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD, + 0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34, 0x8B432976, 0xCB23C6DC, + 0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120, + 0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3, + 0x0D8652EC, 0x77C1E3D0, 0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422, + 0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF, 0x87494EC7, 0xD938D1C1, + 0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4, + 0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8, + 0x2E39F75E, 0x82C3AFF5, 0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3, + 0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B, 0xCD267809, 0x6E5918F4, + 0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6, + 0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331, + 0xC6A59430, 0x35A266C0, 0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815, + 0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F, 0x764DD68D, 0x43EFB04D, + 0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F, + 0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252, + 0xE9105633, 0x6DD64713, 0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89, + 0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C, 0x9CD2DF59, 0x55F2733F, + 0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86, + 0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C, + 0x283C498B, 0xFF0D9541, 0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190, + 0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742 +}; + +static unsigned +mul2(unsigned x) +{ + x <<= 1; + return x ^ ((unsigned)(-(int)(x >> 8)) & 0x11B); +} + +static unsigned +mul9(unsigned x) +{ + return x ^ mul2(mul2(mul2(x))); +} + +static unsigned +mulb(unsigned x) +{ + unsigned x2; + + x2 = mul2(x); + return x ^ x2 ^ mul2(mul2(x2)); +} + +static unsigned +muld(unsigned x) +{ + unsigned x4; + + x4 = mul2(mul2(x)); + return x ^ x4 ^ mul2(x4); +} + +static unsigned +mule(unsigned x) +{ + unsigned x2, x4; + + x2 = mul2(x); + x4 = mul2(x2); + return x2 ^ x4 ^ mul2(x4); +} + +/* see inner.h */ +unsigned +br_aes_big_keysched_inv(uint32_t *skey, const void *key, size_t key_len) +{ + unsigned num_rounds; + int i, m; + + /* + * Sub-keys for decryption are distinct from encryption sub-keys + * in that InvMixColumns() is already applied for the inner + * rounds. + */ + num_rounds = br_aes_keysched(skey, key, key_len); + m = (int)(num_rounds << 2); + for (i = 4; i < m; i ++) { + uint32_t p; + unsigned p0, p1, p2, p3; + uint32_t q0, q1, q2, q3; + + p = skey[i]; + p0 = p >> 24; + p1 = (p >> 16) & 0xFF; + p2 = (p >> 8) & 0xFF; + p3 = p & 0xFF; + q0 = mule(p0) ^ mulb(p1) ^ muld(p2) ^ mul9(p3); + q1 = mul9(p0) ^ mule(p1) ^ mulb(p2) ^ muld(p3); + q2 = muld(p0) ^ mul9(p1) ^ mule(p2) ^ mulb(p3); + q3 = mulb(p0) ^ muld(p1) ^ mul9(p2) ^ mule(p3); + skey[i] = (q0 << 24) | (q1 << 16) | (q2 << 8) | q3; + } + return num_rounds; +} + +static inline uint32_t +rotr(uint32_t x, int n) +{ + return (x << (32 - n)) | (x >> n); +} + +#define iSboxExt0(x) (iSsm0[x]) +#define iSboxExt1(x) (rotr(iSsm0[x], 8)) +#define iSboxExt2(x) (rotr(iSsm0[x], 16)) +#define iSboxExt3(x) (rotr(iSsm0[x], 24)) + +/* see bearssl.h */ +void +br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data) +{ + unsigned char *buf; + uint32_t s0, s1, s2, s3; + uint32_t t0, t1, t2, t3; + unsigned u; + + buf = data; + s0 = br_dec32be(buf); + s1 = br_dec32be(buf + 4); + s2 = br_dec32be(buf + 8); + s3 = br_dec32be(buf + 12); + s0 ^= skey[(num_rounds << 2) + 0]; + s1 ^= skey[(num_rounds << 2) + 1]; + s2 ^= skey[(num_rounds << 2) + 2]; + s3 ^= skey[(num_rounds << 2) + 3]; + for (u = num_rounds - 1; u > 0; u --) { + uint32_t v0 = iSboxExt0(s0 >> 24) + ^ iSboxExt1((s3 >> 16) & 0xFF) + ^ iSboxExt2((s2 >> 8) & 0xFF) + ^ iSboxExt3(s1 & 0xFF); + uint32_t v1 = iSboxExt0(s1 >> 24) + ^ iSboxExt1((s0 >> 16) & 0xFF) + ^ iSboxExt2((s3 >> 8) & 0xFF) + ^ iSboxExt3(s2 & 0xFF); + uint32_t v2 = iSboxExt0(s2 >> 24) + ^ iSboxExt1((s1 >> 16) & 0xFF) + ^ iSboxExt2((s0 >> 8) & 0xFF) + ^ iSboxExt3(s3 & 0xFF); + uint32_t v3 = iSboxExt0(s3 >> 24) + ^ iSboxExt1((s2 >> 16) & 0xFF) + ^ iSboxExt2((s1 >> 8) & 0xFF) + ^ iSboxExt3(s0 & 0xFF); + s0 = v0; + s1 = v1; + s2 = v2; + s3 = v3; + s0 ^= skey[u << 2]; + s1 ^= skey[(u << 2) + 1]; + s2 ^= skey[(u << 2) + 2]; + s3 ^= skey[(u << 2) + 3]; + } + t0 = ((uint32_t)iS[s0 >> 24] << 24) + | ((uint32_t)iS[(s3 >> 16) & 0xFF] << 16) + | ((uint32_t)iS[(s2 >> 8) & 0xFF] << 8) + | (uint32_t)iS[s1 & 0xFF]; + t1 = ((uint32_t)iS[s1 >> 24] << 24) + | ((uint32_t)iS[(s0 >> 16) & 0xFF] << 16) + | ((uint32_t)iS[(s3 >> 8) & 0xFF] << 8) + | (uint32_t)iS[s2 & 0xFF]; + t2 = ((uint32_t)iS[s2 >> 24] << 24) + | ((uint32_t)iS[(s1 >> 16) & 0xFF] << 16) + | ((uint32_t)iS[(s0 >> 8) & 0xFF] << 8) + | (uint32_t)iS[s3 & 0xFF]; + t3 = ((uint32_t)iS[s3 >> 24] << 24) + | ((uint32_t)iS[(s2 >> 16) & 0xFF] << 16) + | ((uint32_t)iS[(s1 >> 8) & 0xFF] << 8) + | (uint32_t)iS[s0 & 0xFF]; + s0 = t0 ^ skey[0]; + s1 = t1 ^ skey[1]; + s2 = t2 ^ skey[2]; + s3 = t3 ^ skey[3]; + br_enc32be(buf, s0); + br_enc32be(buf + 4, s1); + br_enc32be(buf + 8, s2); + br_enc32be(buf + 12, s3); +} diff --git a/third_party/bearssl/src/aes_big_enc.c b/third_party/bearssl/src/aes_big_enc.c new file mode 100644 index 0000000..bbabb9a --- /dev/null +++ b/third_party/bearssl/src/aes_big_enc.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define S br_aes_S + +static const uint32_t Ssm0[] = { + 0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD, + 0xDE6F6FB1, 0x91C5C554, 0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D, + 0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A, 0x8FCACA45, 0x1F82829D, + 0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B, + 0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7, + 0xE4727296, 0x9BC0C05B, 0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A, + 0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F, 0x6834345C, 0x51A5A5F4, + 0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F, + 0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1, + 0x0A05050F, 0x2F9A9AB5, 0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D, + 0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F, 0x1209091B, 0x1D83839E, + 0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB, + 0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E, + 0x5E2F2F71, 0x13848497, 0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C, + 0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED, 0xD46A6ABE, 0x8DCBCB46, + 0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A, + 0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7, + 0x66333355, 0x11858594, 0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81, + 0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3, 0xA25151F3, 0x5DA3A3FE, + 0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504, + 0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A, + 0xFDF3F30E, 0xBFD2D26D, 0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F, + 0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739, 0x93C4C457, 0x55A7A7F2, + 0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395, + 0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E, + 0x3B9090AB, 0x0B888883, 0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C, + 0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76, 0xDBE0E03B, 0x64323256, + 0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4, + 0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4, + 0xD3E4E437, 0xF279798B, 0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7, + 0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0, 0xD86C6CB4, 0xAC5656FA, + 0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818, + 0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1, + 0x73B4B4C7, 0x97C6C651, 0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21, + 0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85, 0xE0707090, 0x7C3E3E42, + 0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12, + 0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158, + 0x3A1D1D27, 0x279E9EB9, 0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133, + 0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7, 0x2D9B9BB6, 0x3C1E1E22, + 0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A, + 0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631, + 0x844242C6, 0xD06868B8, 0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11, + 0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A +}; + +static inline uint32_t +rotr(uint32_t x, int n) +{ + return (x << (32 - n)) | (x >> n); +} + +#define SboxExt0(x) (Ssm0[x]) +#define SboxExt1(x) (rotr(Ssm0[x], 8)) +#define SboxExt2(x) (rotr(Ssm0[x], 16)) +#define SboxExt3(x) (rotr(Ssm0[x], 24)) + + +/* see bearssl.h */ +void +br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data) +{ + unsigned char *buf; + uint32_t s0, s1, s2, s3; + uint32_t t0, t1, t2, t3; + unsigned u; + + buf = data; + s0 = br_dec32be(buf); + s1 = br_dec32be(buf + 4); + s2 = br_dec32be(buf + 8); + s3 = br_dec32be(buf + 12); + s0 ^= skey[0]; + s1 ^= skey[1]; + s2 ^= skey[2]; + s3 ^= skey[3]; + for (u = 1; u < num_rounds; u ++) { + uint32_t v0, v1, v2, v3; + + v0 = SboxExt0(s0 >> 24) + ^ SboxExt1((s1 >> 16) & 0xFF) + ^ SboxExt2((s2 >> 8) & 0xFF) + ^ SboxExt3(s3 & 0xFF); + v1 = SboxExt0(s1 >> 24) + ^ SboxExt1((s2 >> 16) & 0xFF) + ^ SboxExt2((s3 >> 8) & 0xFF) + ^ SboxExt3(s0 & 0xFF); + v2 = SboxExt0(s2 >> 24) + ^ SboxExt1((s3 >> 16) & 0xFF) + ^ SboxExt2((s0 >> 8) & 0xFF) + ^ SboxExt3(s1 & 0xFF); + v3 = SboxExt0(s3 >> 24) + ^ SboxExt1((s0 >> 16) & 0xFF) + ^ SboxExt2((s1 >> 8) & 0xFF) + ^ SboxExt3(s2 & 0xFF); + s0 = v0; + s1 = v1; + s2 = v2; + s3 = v3; + s0 ^= skey[u << 2]; + s1 ^= skey[(u << 2) + 1]; + s2 ^= skey[(u << 2) + 2]; + s3 ^= skey[(u << 2) + 3]; + } + t0 = ((uint32_t)S[s0 >> 24] << 24) + | ((uint32_t)S[(s1 >> 16) & 0xFF] << 16) + | ((uint32_t)S[(s2 >> 8) & 0xFF] << 8) + | (uint32_t)S[s3 & 0xFF]; + t1 = ((uint32_t)S[s1 >> 24] << 24) + | ((uint32_t)S[(s2 >> 16) & 0xFF] << 16) + | ((uint32_t)S[(s3 >> 8) & 0xFF] << 8) + | (uint32_t)S[s0 & 0xFF]; + t2 = ((uint32_t)S[s2 >> 24] << 24) + | ((uint32_t)S[(s3 >> 16) & 0xFF] << 16) + | ((uint32_t)S[(s0 >> 8) & 0xFF] << 8) + | (uint32_t)S[s1 & 0xFF]; + t3 = ((uint32_t)S[s3 >> 24] << 24) + | ((uint32_t)S[(s0 >> 16) & 0xFF] << 16) + | ((uint32_t)S[(s1 >> 8) & 0xFF] << 8) + | (uint32_t)S[s2 & 0xFF]; + s0 = t0 ^ skey[num_rounds << 2]; + s1 = t1 ^ skey[(num_rounds << 2) + 1]; + s2 = t2 ^ skey[(num_rounds << 2) + 2]; + s3 = t3 ^ skey[(num_rounds << 2) + 3]; + br_enc32be(buf, s0); + br_enc32be(buf + 4, s1); + br_enc32be(buf + 8, s2); + br_enc32be(buf + 12, s3); +} diff --git a/third_party/bearssl/src/aes_common.c b/third_party/bearssl/src/aes_common.c new file mode 100644 index 0000000..72c64fb --- /dev/null +++ b/third_party/bearssl/src/aes_common.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const uint32_t Rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, + 0x40000000, 0x80000000, 0x1B000000, 0x36000000 +}; + +#define S br_aes_S + +/* see inner.h */ +const unsigned char br_aes_S[] = { + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, + 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, + 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26, + 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, + 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, + 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, + 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, + 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, + 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, + 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, + 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, + 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D, + 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, + 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, + 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, + 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, + 0xB0, 0x54, 0xBB, 0x16 +}; + +static uint32_t +SubWord(uint32_t x) +{ + return ((uint32_t)S[x >> 24] << 24) + | ((uint32_t)S[(x >> 16) & 0xFF] << 16) + | ((uint32_t)S[(x >> 8) & 0xFF] << 8) + | (uint32_t)S[x & 0xFF]; +} + +/* see inner.h */ +unsigned +br_aes_keysched(uint32_t *skey, const void *key, size_t key_len) +{ + unsigned num_rounds; + int i, j, k, nk, nkf; + + switch (key_len) { + case 16: + num_rounds = 10; + break; + case 24: + num_rounds = 12; + break; + case 32: + num_rounds = 14; + break; + default: + /* abort(); */ + return 0; + } + nk = (int)(key_len >> 2); + nkf = (int)((num_rounds + 1) << 2); + for (i = 0; i < nk; i ++) { + skey[i] = br_dec32be((const unsigned char *)key + (i << 2)); + } + for (i = nk, j = 0, k = 0; i < nkf; i ++) { + uint32_t tmp; + + tmp = skey[i - 1]; + if (j == 0) { + tmp = (tmp << 8) | (tmp >> 24); + tmp = SubWord(tmp) ^ Rcon[k]; + } else if (nk > 6 && j == 4) { + tmp = SubWord(tmp); + } + skey[i] = skey[i - nk] ^ tmp; + if (++ j == nk) { + j = 0; + k ++; + } + } + return num_rounds; +} diff --git a/third_party/bearssl/src/aes_ct.c b/third_party/bearssl/src/aes_ct.c new file mode 100644 index 0000000..66776d9 --- /dev/null +++ b/third_party/bearssl/src/aes_ct.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_aes_ct_bitslice_Sbox(uint32_t *q) +{ + /* + * This S-box implementation is a straightforward translation of + * the circuit described by Boyar and Peralta in "A new + * combinational logic minimization technique with applications + * to cryptology" (https://eprint.iacr.org/2009/191.pdf). + * + * Note that variables x* (input) and s* (output) are numbered + * in "reverse" order (x0 is the high bit, x7 is the low bit). + */ + + uint32_t x0, x1, x2, x3, x4, x5, x6, x7; + uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; + uint32_t y20, y21; + uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9; + uint32_t z10, z11, z12, z13, z14, z15, z16, z17; + uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; + uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19; + uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; + uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39; + uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49; + uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59; + uint32_t t60, t61, t62, t63, t64, t65, t66, t67; + uint32_t s0, s1, s2, s3, s4, s5, s6, s7; + + x0 = q[7]; + x1 = q[6]; + x2 = q[5]; + x3 = q[4]; + x4 = q[3]; + x5 = q[2]; + x6 = q[1]; + x7 = q[0]; + + /* + * Top linear transformation. + */ + y14 = x3 ^ x5; + y13 = x0 ^ x6; + y9 = x0 ^ x3; + y8 = x0 ^ x5; + t0 = x1 ^ x2; + y1 = t0 ^ x7; + y4 = y1 ^ x3; + y12 = y13 ^ y14; + y2 = y1 ^ x0; + y5 = y1 ^ x6; + y3 = y5 ^ y8; + t1 = x4 ^ y12; + y15 = t1 ^ x5; + y20 = t1 ^ x1; + y6 = y15 ^ x7; + y10 = y15 ^ t0; + y11 = y20 ^ y9; + y7 = x7 ^ y11; + y17 = y10 ^ y11; + y19 = y10 ^ y8; + y16 = t0 ^ y11; + y21 = y13 ^ y16; + y18 = x0 ^ y16; + + /* + * Non-linear section. + */ + t2 = y12 & y15; + t3 = y3 & y6; + t4 = t3 ^ t2; + t5 = y4 & x7; + t6 = t5 ^ t2; + t7 = y13 & y16; + t8 = y5 & y1; + t9 = t8 ^ t7; + t10 = y2 & y7; + t11 = t10 ^ t7; + t12 = y9 & y11; + t13 = y14 & y17; + t14 = t13 ^ t12; + t15 = y8 & y10; + t16 = t15 ^ t12; + t17 = t4 ^ t14; + t18 = t6 ^ t16; + t19 = t9 ^ t14; + t20 = t11 ^ t16; + t21 = t17 ^ y20; + t22 = t18 ^ y19; + t23 = t19 ^ y21; + t24 = t20 ^ y18; + + t25 = t21 ^ t22; + t26 = t21 & t23; + t27 = t24 ^ t26; + t28 = t25 & t27; + t29 = t28 ^ t22; + t30 = t23 ^ t24; + t31 = t22 ^ t26; + t32 = t31 & t30; + t33 = t32 ^ t24; + t34 = t23 ^ t33; + t35 = t27 ^ t33; + t36 = t24 & t35; + t37 = t36 ^ t34; + t38 = t27 ^ t36; + t39 = t29 & t38; + t40 = t25 ^ t39; + + t41 = t40 ^ t37; + t42 = t29 ^ t33; + t43 = t29 ^ t40; + t44 = t33 ^ t37; + t45 = t42 ^ t41; + z0 = t44 & y15; + z1 = t37 & y6; + z2 = t33 & x7; + z3 = t43 & y16; + z4 = t40 & y1; + z5 = t29 & y7; + z6 = t42 & y11; + z7 = t45 & y17; + z8 = t41 & y10; + z9 = t44 & y12; + z10 = t37 & y3; + z11 = t33 & y4; + z12 = t43 & y13; + z13 = t40 & y5; + z14 = t29 & y2; + z15 = t42 & y9; + z16 = t45 & y14; + z17 = t41 & y8; + + /* + * Bottom linear transformation. + */ + t46 = z15 ^ z16; + t47 = z10 ^ z11; + t48 = z5 ^ z13; + t49 = z9 ^ z10; + t50 = z2 ^ z12; + t51 = z2 ^ z5; + t52 = z7 ^ z8; + t53 = z0 ^ z3; + t54 = z6 ^ z7; + t55 = z16 ^ z17; + t56 = z12 ^ t48; + t57 = t50 ^ t53; + t58 = z4 ^ t46; + t59 = z3 ^ t54; + t60 = t46 ^ t57; + t61 = z14 ^ t57; + t62 = t52 ^ t58; + t63 = t49 ^ t58; + t64 = z4 ^ t59; + t65 = t61 ^ t62; + t66 = z1 ^ t63; + s0 = t59 ^ t63; + s6 = t56 ^ ~t62; + s7 = t48 ^ ~t60; + t67 = t64 ^ t65; + s3 = t53 ^ t66; + s4 = t51 ^ t66; + s5 = t47 ^ t65; + s1 = t64 ^ ~s3; + s2 = t55 ^ ~t67; + + q[7] = s0; + q[6] = s1; + q[5] = s2; + q[4] = s3; + q[3] = s4; + q[2] = s5; + q[1] = s6; + q[0] = s7; +} + +/* see inner.h */ +void +br_aes_ct_ortho(uint32_t *q) +{ +#define SWAPN(cl, ch, s, x, y) do { \ + uint32_t a, b; \ + a = (x); \ + b = (y); \ + (x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \ + (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \ + } while (0) + +#define SWAP2(x, y) SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y) +#define SWAP4(x, y) SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y) +#define SWAP8(x, y) SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y) + + SWAP2(q[0], q[1]); + SWAP2(q[2], q[3]); + SWAP2(q[4], q[5]); + SWAP2(q[6], q[7]); + + SWAP4(q[0], q[2]); + SWAP4(q[1], q[3]); + SWAP4(q[4], q[6]); + SWAP4(q[5], q[7]); + + SWAP8(q[0], q[4]); + SWAP8(q[1], q[5]); + SWAP8(q[2], q[6]); + SWAP8(q[3], q[7]); +} + +static const unsigned char Rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36 +}; + +static uint32_t +sub_word(uint32_t x) +{ + uint32_t q[8]; + int i; + + for (i = 0; i < 8; i ++) { + q[i] = x; + } + br_aes_ct_ortho(q); + br_aes_ct_bitslice_Sbox(q); + br_aes_ct_ortho(q); + return q[0]; +} + +/* see inner.h */ +unsigned +br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len) +{ + unsigned num_rounds; + int i, j, k, nk, nkf; + uint32_t tmp; + uint32_t skey[120]; + + switch (key_len) { + case 16: + num_rounds = 10; + break; + case 24: + num_rounds = 12; + break; + case 32: + num_rounds = 14; + break; + default: + /* abort(); */ + return 0; + } + nk = (int)(key_len >> 2); + nkf = (int)((num_rounds + 1) << 2); + tmp = 0; + for (i = 0; i < nk; i ++) { + tmp = br_dec32le((const unsigned char *)key + (i << 2)); + skey[(i << 1) + 0] = tmp; + skey[(i << 1) + 1] = tmp; + } + for (i = nk, j = 0, k = 0; i < nkf; i ++) { + if (j == 0) { + tmp = (tmp << 24) | (tmp >> 8); + tmp = sub_word(tmp) ^ Rcon[k]; + } else if (nk > 6 && j == 4) { + tmp = sub_word(tmp); + } + tmp ^= skey[(i - nk) << 1]; + skey[(i << 1) + 0] = tmp; + skey[(i << 1) + 1] = tmp; + if (++ j == nk) { + j = 0; + k ++; + } + } + for (i = 0; i < nkf; i += 4) { + br_aes_ct_ortho(skey + (i << 1)); + } + for (i = 0, j = 0; i < nkf; i ++, j += 2) { + comp_skey[i] = (skey[j + 0] & 0x55555555) + | (skey[j + 1] & 0xAAAAAAAA); + } + return num_rounds; +} + +/* see inner.h */ +void +br_aes_ct_skey_expand(uint32_t *skey, + unsigned num_rounds, const uint32_t *comp_skey) +{ + unsigned u, v, n; + + n = (num_rounds + 1) << 2; + for (u = 0, v = 0; u < n; u ++, v += 2) { + uint32_t x, y; + + x = y = comp_skey[u]; + x &= 0x55555555; + skey[v + 0] = x | (x << 1); + y &= 0xAAAAAAAA; + skey[v + 1] = y | (y >> 1); + } +} diff --git a/third_party/bearssl/src/aes_ct64.c b/third_party/bearssl/src/aes_ct64.c new file mode 100644 index 0000000..1523811 --- /dev/null +++ b/third_party/bearssl/src/aes_ct64.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_aes_ct64_bitslice_Sbox(uint64_t *q) +{ + /* + * This S-box implementation is a straightforward translation of + * the circuit described by Boyar and Peralta in "A new + * combinational logic minimization technique with applications + * to cryptology" (https://eprint.iacr.org/2009/191.pdf). + * + * Note that variables x* (input) and s* (output) are numbered + * in "reverse" order (x0 is the high bit, x7 is the low bit). + */ + + uint64_t x0, x1, x2, x3, x4, x5, x6, x7; + uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; + uint64_t y20, y21; + uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9; + uint64_t z10, z11, z12, z13, z14, z15, z16, z17; + uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; + uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19; + uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; + uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39; + uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49; + uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59; + uint64_t t60, t61, t62, t63, t64, t65, t66, t67; + uint64_t s0, s1, s2, s3, s4, s5, s6, s7; + + x0 = q[7]; + x1 = q[6]; + x2 = q[5]; + x3 = q[4]; + x4 = q[3]; + x5 = q[2]; + x6 = q[1]; + x7 = q[0]; + + /* + * Top linear transformation. + */ + y14 = x3 ^ x5; + y13 = x0 ^ x6; + y9 = x0 ^ x3; + y8 = x0 ^ x5; + t0 = x1 ^ x2; + y1 = t0 ^ x7; + y4 = y1 ^ x3; + y12 = y13 ^ y14; + y2 = y1 ^ x0; + y5 = y1 ^ x6; + y3 = y5 ^ y8; + t1 = x4 ^ y12; + y15 = t1 ^ x5; + y20 = t1 ^ x1; + y6 = y15 ^ x7; + y10 = y15 ^ t0; + y11 = y20 ^ y9; + y7 = x7 ^ y11; + y17 = y10 ^ y11; + y19 = y10 ^ y8; + y16 = t0 ^ y11; + y21 = y13 ^ y16; + y18 = x0 ^ y16; + + /* + * Non-linear section. + */ + t2 = y12 & y15; + t3 = y3 & y6; + t4 = t3 ^ t2; + t5 = y4 & x7; + t6 = t5 ^ t2; + t7 = y13 & y16; + t8 = y5 & y1; + t9 = t8 ^ t7; + t10 = y2 & y7; + t11 = t10 ^ t7; + t12 = y9 & y11; + t13 = y14 & y17; + t14 = t13 ^ t12; + t15 = y8 & y10; + t16 = t15 ^ t12; + t17 = t4 ^ t14; + t18 = t6 ^ t16; + t19 = t9 ^ t14; + t20 = t11 ^ t16; + t21 = t17 ^ y20; + t22 = t18 ^ y19; + t23 = t19 ^ y21; + t24 = t20 ^ y18; + + t25 = t21 ^ t22; + t26 = t21 & t23; + t27 = t24 ^ t26; + t28 = t25 & t27; + t29 = t28 ^ t22; + t30 = t23 ^ t24; + t31 = t22 ^ t26; + t32 = t31 & t30; + t33 = t32 ^ t24; + t34 = t23 ^ t33; + t35 = t27 ^ t33; + t36 = t24 & t35; + t37 = t36 ^ t34; + t38 = t27 ^ t36; + t39 = t29 & t38; + t40 = t25 ^ t39; + + t41 = t40 ^ t37; + t42 = t29 ^ t33; + t43 = t29 ^ t40; + t44 = t33 ^ t37; + t45 = t42 ^ t41; + z0 = t44 & y15; + z1 = t37 & y6; + z2 = t33 & x7; + z3 = t43 & y16; + z4 = t40 & y1; + z5 = t29 & y7; + z6 = t42 & y11; + z7 = t45 & y17; + z8 = t41 & y10; + z9 = t44 & y12; + z10 = t37 & y3; + z11 = t33 & y4; + z12 = t43 & y13; + z13 = t40 & y5; + z14 = t29 & y2; + z15 = t42 & y9; + z16 = t45 & y14; + z17 = t41 & y8; + + /* + * Bottom linear transformation. + */ + t46 = z15 ^ z16; + t47 = z10 ^ z11; + t48 = z5 ^ z13; + t49 = z9 ^ z10; + t50 = z2 ^ z12; + t51 = z2 ^ z5; + t52 = z7 ^ z8; + t53 = z0 ^ z3; + t54 = z6 ^ z7; + t55 = z16 ^ z17; + t56 = z12 ^ t48; + t57 = t50 ^ t53; + t58 = z4 ^ t46; + t59 = z3 ^ t54; + t60 = t46 ^ t57; + t61 = z14 ^ t57; + t62 = t52 ^ t58; + t63 = t49 ^ t58; + t64 = z4 ^ t59; + t65 = t61 ^ t62; + t66 = z1 ^ t63; + s0 = t59 ^ t63; + s6 = t56 ^ ~t62; + s7 = t48 ^ ~t60; + t67 = t64 ^ t65; + s3 = t53 ^ t66; + s4 = t51 ^ t66; + s5 = t47 ^ t65; + s1 = t64 ^ ~s3; + s2 = t55 ^ ~t67; + + q[7] = s0; + q[6] = s1; + q[5] = s2; + q[4] = s3; + q[3] = s4; + q[2] = s5; + q[1] = s6; + q[0] = s7; +} + +/* see inner.h */ +void +br_aes_ct64_ortho(uint64_t *q) +{ +#define SWAPN(cl, ch, s, x, y) do { \ + uint64_t a, b; \ + a = (x); \ + b = (y); \ + (x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \ + (y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \ + } while (0) + +#define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y) +#define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y) +#define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y) + + SWAP2(q[0], q[1]); + SWAP2(q[2], q[3]); + SWAP2(q[4], q[5]); + SWAP2(q[6], q[7]); + + SWAP4(q[0], q[2]); + SWAP4(q[1], q[3]); + SWAP4(q[4], q[6]); + SWAP4(q[5], q[7]); + + SWAP8(q[0], q[4]); + SWAP8(q[1], q[5]); + SWAP8(q[2], q[6]); + SWAP8(q[3], q[7]); +} + +/* see inner.h */ +void +br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w) +{ + uint64_t x0, x1, x2, x3; + + x0 = w[0]; + x1 = w[1]; + x2 = w[2]; + x3 = w[3]; + x0 |= (x0 << 16); + x1 |= (x1 << 16); + x2 |= (x2 << 16); + x3 |= (x3 << 16); + x0 &= (uint64_t)0x0000FFFF0000FFFF; + x1 &= (uint64_t)0x0000FFFF0000FFFF; + x2 &= (uint64_t)0x0000FFFF0000FFFF; + x3 &= (uint64_t)0x0000FFFF0000FFFF; + x0 |= (x0 << 8); + x1 |= (x1 << 8); + x2 |= (x2 << 8); + x3 |= (x3 << 8); + x0 &= (uint64_t)0x00FF00FF00FF00FF; + x1 &= (uint64_t)0x00FF00FF00FF00FF; + x2 &= (uint64_t)0x00FF00FF00FF00FF; + x3 &= (uint64_t)0x00FF00FF00FF00FF; + *q0 = x0 | (x2 << 8); + *q1 = x1 | (x3 << 8); +} + +/* see inner.h */ +void +br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1) +{ + uint64_t x0, x1, x2, x3; + + x0 = q0 & (uint64_t)0x00FF00FF00FF00FF; + x1 = q1 & (uint64_t)0x00FF00FF00FF00FF; + x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF; + x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF; + x0 |= (x0 >> 8); + x1 |= (x1 >> 8); + x2 |= (x2 >> 8); + x3 |= (x3 >> 8); + x0 &= (uint64_t)0x0000FFFF0000FFFF; + x1 &= (uint64_t)0x0000FFFF0000FFFF; + x2 &= (uint64_t)0x0000FFFF0000FFFF; + x3 &= (uint64_t)0x0000FFFF0000FFFF; + w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16); + w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16); + w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16); + w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16); +} + +static const unsigned char Rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36 +}; + +static uint32_t +sub_word(uint32_t x) +{ + uint64_t q[8]; + + memset(q, 0, sizeof q); + q[0] = x; + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_Sbox(q); + br_aes_ct64_ortho(q); + return (uint32_t)q[0]; +} + +/* see inner.h */ +unsigned +br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len) +{ + unsigned num_rounds; + int i, j, k, nk, nkf; + uint32_t tmp; + uint32_t skey[60]; + + switch (key_len) { + case 16: + num_rounds = 10; + break; + case 24: + num_rounds = 12; + break; + case 32: + num_rounds = 14; + break; + default: + /* abort(); */ + return 0; + } + nk = (int)(key_len >> 2); + nkf = (int)((num_rounds + 1) << 2); + br_range_dec32le(skey, (key_len >> 2), key); + tmp = skey[(key_len >> 2) - 1]; + for (i = nk, j = 0, k = 0; i < nkf; i ++) { + if (j == 0) { + tmp = (tmp << 24) | (tmp >> 8); + tmp = sub_word(tmp) ^ Rcon[k]; + } else if (nk > 6 && j == 4) { + tmp = sub_word(tmp); + } + tmp ^= skey[i - nk]; + skey[i] = tmp; + if (++ j == nk) { + j = 0; + k ++; + } + } + + for (i = 0, j = 0; i < nkf; i += 4, j += 2) { + uint64_t q[8]; + + br_aes_ct64_interleave_in(&q[0], &q[4], skey + i); + q[1] = q[0]; + q[2] = q[0]; + q[3] = q[0]; + q[5] = q[4]; + q[6] = q[4]; + q[7] = q[4]; + br_aes_ct64_ortho(q); + comp_skey[j + 0] = + (q[0] & (uint64_t)0x1111111111111111) + | (q[1] & (uint64_t)0x2222222222222222) + | (q[2] & (uint64_t)0x4444444444444444) + | (q[3] & (uint64_t)0x8888888888888888); + comp_skey[j + 1] = + (q[4] & (uint64_t)0x1111111111111111) + | (q[5] & (uint64_t)0x2222222222222222) + | (q[6] & (uint64_t)0x4444444444444444) + | (q[7] & (uint64_t)0x8888888888888888); + } + return num_rounds; +} + +/* see inner.h */ +void +br_aes_ct64_skey_expand(uint64_t *skey, + unsigned num_rounds, const uint64_t *comp_skey) +{ + unsigned u, v, n; + + n = (num_rounds + 1) << 1; + for (u = 0, v = 0; u < n; u ++, v += 4) { + uint64_t x0, x1, x2, x3; + + x0 = x1 = x2 = x3 = comp_skey[u]; + x0 &= (uint64_t)0x1111111111111111; + x1 &= (uint64_t)0x2222222222222222; + x2 &= (uint64_t)0x4444444444444444; + x3 &= (uint64_t)0x8888888888888888; + x1 >>= 1; + x2 >>= 2; + x3 >>= 3; + skey[v + 0] = (x0 << 4) - x0; + skey[v + 1] = (x1 << 4) - x1; + skey[v + 2] = (x2 << 4) - x2; + skey[v + 3] = (x3 << 4) - x3; + } +} diff --git a/third_party/bearssl/src/aes_ct64_cbcdec.c b/third_party/bearssl/src/aes_ct64_cbcdec.c new file mode 100644 index 0000000..5a7360b --- /dev/null +++ b/third_party/bearssl/src/aes_ct64_cbcdec.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct64_cbcdec_vtable; + ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf; + uint64_t sk_exp[120]; + uint32_t ivw[4]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + br_range_dec32le(ivw, 4, iv); + buf = data; + while (len > 0) { + uint64_t q[8]; + uint32_t w1[16], w2[16]; + int i; + + if (len >= 64) { + br_range_dec32le(w1, 16, buf); + } else { + br_range_dec32le(w1, len >> 2, buf); + } + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_in( + &q[i], &q[i + 4], w1 + (i << 2)); + } + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_decrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_out( + w2 + (i << 2), q[i], q[i + 4]); + } + for (i = 0; i < 4; i ++) { + w2[i] ^= ivw[i]; + } + if (len >= 64) { + for (i = 4; i < 16; i ++) { + w2[i] ^= w1[i - 4]; + } + memcpy(ivw, w1 + 12, sizeof ivw); + br_range_enc32le(buf, w2, 16); + } else { + int j; + + j = (int)(len >> 2); + for (i = 4; i < j; i ++) { + w2[i] ^= w1[i - 4]; + } + memcpy(ivw, w1 + j - 4, sizeof ivw); + br_range_enc32le(buf, w2, j); + break; + } + buf += 64; + len -= 64; + } + br_range_enc32le(iv, ivw, 4); +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable = { + sizeof(br_aes_ct64_cbcdec_keys), + 16, + 4, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_aes_ct64_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_aes_ct64_cbcdec_run +}; diff --git a/third_party/bearssl/src/aes_ct64_cbcenc.c b/third_party/bearssl/src/aes_ct64_cbcenc.c new file mode 100644 index 0000000..6cb9dec --- /dev/null +++ b/third_party/bearssl/src/aes_ct64_cbcenc.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct64_cbcenc_vtable; + ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf; + uint64_t sk_exp[120]; + uint32_t ivw[4]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + br_range_dec32le(ivw, 4, iv); + buf = data; + while (len > 0) { + uint32_t w[4]; + uint64_t q[8]; + + w[0] = ivw[0] ^ br_dec32le(buf); + w[1] = ivw[1] ^ br_dec32le(buf + 4); + w[2] = ivw[2] ^ br_dec32le(buf + 8); + w[3] = ivw[3] ^ br_dec32le(buf + 12); + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + memcpy(ivw, w, sizeof w); + br_enc32le(buf, w[0]); + br_enc32le(buf + 4, w[1]); + br_enc32le(buf + 8, w[2]); + br_enc32le(buf + 12, w[3]); + buf += 16; + len -= 16; + } + br_range_enc32le(iv, ivw, 4); +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable = { + sizeof(br_aes_ct64_cbcenc_keys), + 16, + 4, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_aes_ct64_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_aes_ct64_cbcenc_run +}; diff --git a/third_party/bearssl/src/aes_ct64_ctr.c b/third_party/bearssl/src/aes_ct64_ctr.c new file mode 100644 index 0000000..1275873 --- /dev/null +++ b/third_party/bearssl/src/aes_ct64_ctr.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct64_ctr_vtable; + ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +uint32_t +br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + uint32_t ivw[16]; + uint64_t sk_exp[120]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + br_range_dec32le(ivw, 3, iv); + memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t)); + memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t)); + memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t)); + buf = data; + while (len > 0) { + uint64_t q[8]; + uint32_t w[16]; + unsigned char tmp[64]; + int i; + + /* + * TODO: see if we can save on the first br_aes_ct64_ortho() + * call, since iv0/iv1/iv2 are constant for the whole run. + */ + memcpy(w, ivw, sizeof ivw); + w[3] = br_swap32(cc); + w[7] = br_swap32(cc + 1); + w[11] = br_swap32(cc + 2); + w[15] = br_swap32(cc + 3); + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_in( + &q[i], &q[i + 4], w + (i << 2)); + } + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_out( + w + (i << 2), q[i], q[i + 4]); + } + br_range_enc32le(tmp, w, 16); + if (len <= 64) { + xorbuf(buf, tmp, len); + cc += (uint32_t)len >> 4; + break; + } + xorbuf(buf, tmp, 64); + buf += 64; + len -= 64; + cc += 4; + } + return cc; +} + +/* see bearssl_block.h */ +const br_block_ctr_class br_aes_ct64_ctr_vtable = { + sizeof(br_aes_ct64_ctr_keys), + 16, + 4, + (void (*)(const br_block_ctr_class **, const void *, size_t)) + &br_aes_ct64_ctr_init, + (uint32_t (*)(const br_block_ctr_class *const *, + const void *, uint32_t, void *, size_t)) + &br_aes_ct64_ctr_run +}; diff --git a/third_party/bearssl/src/aes_ct64_ctrcbc.c b/third_party/bearssl/src/aes_ct64_ctrcbc.c new file mode 100644 index 0000000..21bb8ef --- /dev/null +++ b/third_party/bearssl/src/aes_ct64_ctrcbc.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct64_ctrcbc_vtable; + ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint64_t sk_exp[120]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + buf = data; + while (len > 0) { + uint64_t q[8]; + uint32_t w[16]; + unsigned char tmp[64]; + int i, j; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + j = (len >= 64) ? 16 : (int)(len >> 2); + for (i = 0; i < j; i += 4) { + uint32_t carry; + + w[i + 0] = br_swap32(iv0); + w[i + 1] = br_swap32(iv1); + w[i + 2] = br_swap32(iv2); + w[i + 3] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + } + memset(w + i, 0, (16 - i) * sizeof(uint32_t)); + + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_in( + &q[i], &q[i + 4], w + (i << 2)); + } + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + for (i = 0; i < 4; i ++) { + br_aes_ct64_interleave_out( + w + (i << 2), q[i], q[i + 4]); + } + + br_range_enc32le(tmp, w, 16); + if (len <= 64) { + xorbuf(buf, tmp, len); + break; + } + xorbuf(buf, tmp, 64); + buf += 64; + len -= 64; + } + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + uint32_t cm0, cm1, cm2, cm3; + uint64_t q[8]; + uint64_t sk_exp[120]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + memset(q, 0, sizeof q); + while (len > 0) { + uint32_t w[4]; + + w[0] = cm0 ^ br_dec32le(buf + 0); + w[1] = cm1 ^ br_dec32le(buf + 4); + w[2] = cm2 ^ br_dec32le(buf + 8); + w[3] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + + cm0 = w[0]; + cm1 = w[1]; + cm2 = w[2]; + cm3 = w[3]; + buf += 16; + len -= 16; + } + + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + /* + * When encrypting, the CBC-MAC processing must be lagging by + * one block, since it operates on the encrypted values, so + * it must wait for that encryption to complete. + */ + + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint64_t sk_exp[120]; + uint64_t q[8]; + int first_iter; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + first_iter = 1; + memset(q, 0, sizeof q); + while (len > 0) { + uint32_t w[8], carry; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + w[0] = br_swap32(iv0); + w[1] = br_swap32(iv1); + w[2] = br_swap32(iv2); + w[3] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The block for CBC-MAC. + */ + w[4] = cm0; + w[5] = cm1; + w[6] = cm2; + w[7] = cm3; + + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_interleave_in(&q[1], &q[5], w + 4); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + br_aes_ct64_interleave_out(w + 4, q[1], q[5]); + + /* + * We do the XOR with the plaintext in 32-bit registers, + * so that the value are available for CBC-MAC processing + * as well. + */ + w[0] ^= br_dec32le(buf + 0); + w[1] ^= br_dec32le(buf + 4); + w[2] ^= br_dec32le(buf + 8); + w[3] ^= br_dec32le(buf + 12); + br_enc32le(buf + 0, w[0]); + br_enc32le(buf + 4, w[1]); + br_enc32le(buf + 8, w[2]); + br_enc32le(buf + 12, w[3]); + + buf += 16; + len -= 16; + + /* + * We set the cm* values to the block to encrypt in the + * next iteration. + */ + if (first_iter) { + first_iter = 0; + cm0 ^= w[0]; + cm1 ^= w[1]; + cm2 ^= w[2]; + cm3 ^= w[3]; + } else { + cm0 = w[0] ^ w[4]; + cm1 = w[1] ^ w[5]; + cm2 = w[2] ^ w[6]; + cm3 = w[3] ^ w[7]; + } + + /* + * If this was the last iteration, then compute the + * extra block encryption to complete CBC-MAC. + */ + if (len == 0) { + w[0] = cm0; + w[1] = cm1; + w[2] = cm2; + w[3] = cm3; + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt( + ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + cm0 = w[0]; + cm1 = w[1]; + cm2 = w[2]; + cm3 = w[3]; + break; + } + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint64_t sk_exp[120]; + uint64_t q[8]; + + br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + memset(q, 0, sizeof q); + while (len > 0) { + uint32_t w[8], carry; + unsigned char tmp[16]; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + w[0] = br_swap32(iv0); + w[1] = br_swap32(iv1); + w[2] = br_swap32(iv2); + w[3] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The block for CBC-MAC. + */ + w[4] = cm0 ^ br_dec32le(buf + 0); + w[5] = cm1 ^ br_dec32le(buf + 4); + w[6] = cm2 ^ br_dec32le(buf + 8); + w[7] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct64_interleave_in(&q[0], &q[4], w); + br_aes_ct64_interleave_in(&q[1], &q[5], w + 4); + br_aes_ct64_ortho(q); + br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct64_ortho(q); + br_aes_ct64_interleave_out(w, q[0], q[4]); + br_aes_ct64_interleave_out(w + 4, q[1], q[5]); + + br_enc32le(tmp + 0, w[0]); + br_enc32le(tmp + 4, w[1]); + br_enc32le(tmp + 8, w[2]); + br_enc32le(tmp + 12, w[3]); + xorbuf(buf, tmp, 16); + cm0 = w[4]; + cm1 = w[5]; + cm2 = w[6]; + cm3 = w[7]; + buf += 16; + len -= 16; + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = { + sizeof(br_aes_ct64_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_ct64_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct64_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct64_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_ct64_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_ct64_ctrcbc_mac +}; diff --git a/third_party/bearssl/src/aes_ct64_dec.c b/third_party/bearssl/src/aes_ct64_dec.c new file mode 100644 index 0000000..ab00e09 --- /dev/null +++ b/third_party/bearssl/src/aes_ct64_dec.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_aes_ct64_bitslice_invSbox(uint64_t *q) +{ + /* + * See br_aes_ct_bitslice_invSbox(). This is the natural extension + * to 64-bit registers. + */ + uint64_t q0, q1, q2, q3, q4, q5, q6, q7; + + q0 = ~q[0]; + q1 = ~q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = ~q[5]; + q6 = ~q[6]; + q7 = q[7]; + q[7] = q1 ^ q4 ^ q6; + q[6] = q0 ^ q3 ^ q5; + q[5] = q7 ^ q2 ^ q4; + q[4] = q6 ^ q1 ^ q3; + q[3] = q5 ^ q0 ^ q2; + q[2] = q4 ^ q7 ^ q1; + q[1] = q3 ^ q6 ^ q0; + q[0] = q2 ^ q5 ^ q7; + + br_aes_ct64_bitslice_Sbox(q); + + q0 = ~q[0]; + q1 = ~q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = ~q[5]; + q6 = ~q[6]; + q7 = q[7]; + q[7] = q1 ^ q4 ^ q6; + q[6] = q0 ^ q3 ^ q5; + q[5] = q7 ^ q2 ^ q4; + q[4] = q6 ^ q1 ^ q3; + q[3] = q5 ^ q0 ^ q2; + q[2] = q4 ^ q7 ^ q1; + q[1] = q3 ^ q6 ^ q0; + q[0] = q2 ^ q5 ^ q7; +} + +static void +add_round_key(uint64_t *q, const uint64_t *sk) +{ + int i; + + for (i = 0; i < 8; i ++) { + q[i] ^= sk[i]; + } +} + +static void +inv_shift_rows(uint64_t *q) +{ + int i; + + for (i = 0; i < 8; i ++) { + uint64_t x; + + x = q[i]; + q[i] = (x & (uint64_t)0x000000000000FFFF) + | ((x & (uint64_t)0x000000000FFF0000) << 4) + | ((x & (uint64_t)0x00000000F0000000) >> 12) + | ((x & (uint64_t)0x000000FF00000000) << 8) + | ((x & (uint64_t)0x0000FF0000000000) >> 8) + | ((x & (uint64_t)0x000F000000000000) << 12) + | ((x & (uint64_t)0xFFF0000000000000) >> 4); + } +} + +static inline uint64_t +rotr32(uint64_t x) +{ + return (x << 32) | (x >> 32); +} + +static void +inv_mix_columns(uint64_t *q) +{ + uint64_t q0, q1, q2, q3, q4, q5, q6, q7; + uint64_t r0, r1, r2, r3, r4, r5, r6, r7; + + q0 = q[0]; + q1 = q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = q[5]; + q6 = q[6]; + q7 = q[7]; + r0 = (q0 >> 16) | (q0 << 48); + r1 = (q1 >> 16) | (q1 << 48); + r2 = (q2 >> 16) | (q2 << 48); + r3 = (q3 >> 16) | (q3 << 48); + r4 = (q4 >> 16) | (q4 << 48); + r5 = (q5 >> 16) | (q5 << 48); + r6 = (q6 >> 16) | (q6 << 48); + r7 = (q7 >> 16) | (q7 << 48); + + q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5); + q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6); + q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7); + q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7); + q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6); + q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7); + q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7); + q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7); +} + +/* see inner.h */ +void +br_aes_ct64_bitslice_decrypt(unsigned num_rounds, + const uint64_t *skey, uint64_t *q) +{ + unsigned u; + + add_round_key(q, skey + (num_rounds << 3)); + for (u = num_rounds - 1; u > 0; u --) { + inv_shift_rows(q); + br_aes_ct64_bitslice_invSbox(q); + add_round_key(q, skey + (u << 3)); + inv_mix_columns(q); + } + inv_shift_rows(q); + br_aes_ct64_bitslice_invSbox(q); + add_round_key(q, skey); +} diff --git a/third_party/bearssl/src/aes_ct64_enc.c b/third_party/bearssl/src/aes_ct64_enc.c new file mode 100644 index 0000000..78631ce --- /dev/null +++ b/third_party/bearssl/src/aes_ct64_enc.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static inline void +add_round_key(uint64_t *q, const uint64_t *sk) +{ + q[0] ^= sk[0]; + q[1] ^= sk[1]; + q[2] ^= sk[2]; + q[3] ^= sk[3]; + q[4] ^= sk[4]; + q[5] ^= sk[5]; + q[6] ^= sk[6]; + q[7] ^= sk[7]; +} + +static inline void +shift_rows(uint64_t *q) +{ + int i; + + for (i = 0; i < 8; i ++) { + uint64_t x; + + x = q[i]; + q[i] = (x & (uint64_t)0x000000000000FFFF) + | ((x & (uint64_t)0x00000000FFF00000) >> 4) + | ((x & (uint64_t)0x00000000000F0000) << 12) + | ((x & (uint64_t)0x0000FF0000000000) >> 8) + | ((x & (uint64_t)0x000000FF00000000) << 8) + | ((x & (uint64_t)0xF000000000000000) >> 12) + | ((x & (uint64_t)0x0FFF000000000000) << 4); + } +} + +static inline uint64_t +rotr32(uint64_t x) +{ + return (x << 32) | (x >> 32); +} + +static inline void +mix_columns(uint64_t *q) +{ + uint64_t q0, q1, q2, q3, q4, q5, q6, q7; + uint64_t r0, r1, r2, r3, r4, r5, r6, r7; + + q0 = q[0]; + q1 = q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = q[5]; + q6 = q[6]; + q7 = q[7]; + r0 = (q0 >> 16) | (q0 << 48); + r1 = (q1 >> 16) | (q1 << 48); + r2 = (q2 >> 16) | (q2 << 48); + r3 = (q3 >> 16) | (q3 << 48); + r4 = (q4 >> 16) | (q4 << 48); + r5 = (q5 >> 16) | (q5 << 48); + r6 = (q6 >> 16) | (q6 << 48); + r7 = (q7 >> 16) | (q7 << 48); + + q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0); + q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1); + q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2); + q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3); + q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4); + q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5); + q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6); + q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7); +} + +/* see inner.h */ +void +br_aes_ct64_bitslice_encrypt(unsigned num_rounds, + const uint64_t *skey, uint64_t *q) +{ + unsigned u; + + add_round_key(q, skey); + for (u = 1; u < num_rounds; u ++) { + br_aes_ct64_bitslice_Sbox(q); + shift_rows(q); + mix_columns(q); + add_round_key(q, skey + (u << 3)); + } + br_aes_ct64_bitslice_Sbox(q); + shift_rows(q); + add_round_key(q, skey + (num_rounds << 3)); +} diff --git a/third_party/bearssl/src/aes_ct_cbcdec.c b/third_party/bearssl/src/aes_ct_cbcdec.c new file mode 100644 index 0000000..522645a --- /dev/null +++ b/third_party/bearssl/src/aes_ct_cbcdec.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct_cbcdec_vtable; + ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + ivbuf = iv; + iv0 = br_dec32le(ivbuf); + iv1 = br_dec32le(ivbuf + 4); + iv2 = br_dec32le(ivbuf + 8); + iv3 = br_dec32le(ivbuf + 12); + buf = data; + while (len > 0) { + uint32_t q[8], sq[8]; + + q[0] = br_dec32le(buf); + q[2] = br_dec32le(buf + 4); + q[4] = br_dec32le(buf + 8); + q[6] = br_dec32le(buf + 12); + if (len >= 32) { + q[1] = br_dec32le(buf + 16); + q[3] = br_dec32le(buf + 20); + q[5] = br_dec32le(buf + 24); + q[7] = br_dec32le(buf + 28); + } else { + q[1] = 0; + q[3] = 0; + q[5] = 0; + q[7] = 0; + } + memcpy(sq, q, sizeof q); + br_aes_ct_ortho(q); + br_aes_ct_bitslice_decrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + br_enc32le(buf, q[0] ^ iv0); + br_enc32le(buf + 4, q[2] ^ iv1); + br_enc32le(buf + 8, q[4] ^ iv2); + br_enc32le(buf + 12, q[6] ^ iv3); + if (len < 32) { + iv0 = sq[0]; + iv1 = sq[2]; + iv2 = sq[4]; + iv3 = sq[6]; + break; + } + br_enc32le(buf + 16, q[1] ^ sq[0]); + br_enc32le(buf + 20, q[3] ^ sq[2]); + br_enc32le(buf + 24, q[5] ^ sq[4]); + br_enc32le(buf + 28, q[7] ^ sq[6]); + iv0 = sq[1]; + iv1 = sq[3]; + iv2 = sq[5]; + iv3 = sq[7]; + buf += 32; + len -= 32; + } + br_enc32le(ivbuf, iv0); + br_enc32le(ivbuf + 4, iv1); + br_enc32le(ivbuf + 8, iv2); + br_enc32le(ivbuf + 12, iv3); +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_aes_ct_cbcdec_vtable = { + sizeof(br_aes_ct_cbcdec_keys), + 16, + 4, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_aes_ct_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_aes_ct_cbcdec_run +}; diff --git a/third_party/bearssl/src/aes_ct_cbcenc.c b/third_party/bearssl/src/aes_ct_cbcenc.c new file mode 100644 index 0000000..cb85977 --- /dev/null +++ b/third_party/bearssl/src/aes_ct_cbcenc.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct_cbcenc_vtable; + ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + uint32_t q[8]; + uint32_t iv0, iv1, iv2, iv3; + uint32_t sk_exp[120]; + + q[1] = 0; + q[3] = 0; + q[5] = 0; + q[7] = 0; + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + ivbuf = iv; + iv0 = br_dec32le(ivbuf); + iv1 = br_dec32le(ivbuf + 4); + iv2 = br_dec32le(ivbuf + 8); + iv3 = br_dec32le(ivbuf + 12); + buf = data; + while (len > 0) { + q[0] = iv0 ^ br_dec32le(buf); + q[2] = iv1 ^ br_dec32le(buf + 4); + q[4] = iv2 ^ br_dec32le(buf + 8); + q[6] = iv3 ^ br_dec32le(buf + 12); + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + iv0 = q[0]; + iv1 = q[2]; + iv2 = q[4]; + iv3 = q[6]; + br_enc32le(buf, iv0); + br_enc32le(buf + 4, iv1); + br_enc32le(buf + 8, iv2); + br_enc32le(buf + 12, iv3); + buf += 16; + len -= 16; + } + br_enc32le(ivbuf, iv0); + br_enc32le(ivbuf + 4, iv1); + br_enc32le(ivbuf + 8, iv2); + br_enc32le(ivbuf + 12, iv3); +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_aes_ct_cbcenc_vtable = { + sizeof(br_aes_ct_cbcenc_keys), + 16, + 4, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_aes_ct_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_aes_ct_cbcenc_run +}; diff --git a/third_party/bearssl/src/aes_ct_ctr.c b/third_party/bearssl/src/aes_ct_ctr.c new file mode 100644 index 0000000..f407689 --- /dev/null +++ b/third_party/bearssl/src/aes_ct_ctr.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct_ctr_vtable; + ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +uint32_t +br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + const unsigned char *ivbuf; + uint32_t iv0, iv1, iv2; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + ivbuf = iv; + iv0 = br_dec32le(ivbuf); + iv1 = br_dec32le(ivbuf + 4); + iv2 = br_dec32le(ivbuf + 8); + buf = data; + while (len > 0) { + uint32_t q[8]; + unsigned char tmp[32]; + + /* + * TODO: see if we can save on the first br_aes_ct_ortho() + * call, since iv0/iv1/iv2 are constant for the whole run. + */ + q[0] = q[1] = iv0; + q[2] = q[3] = iv1; + q[4] = q[5] = iv2; + q[6] = br_swap32(cc); + q[7] = br_swap32(cc + 1); + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + br_enc32le(tmp, q[0]); + br_enc32le(tmp + 4, q[2]); + br_enc32le(tmp + 8, q[4]); + br_enc32le(tmp + 12, q[6]); + br_enc32le(tmp + 16, q[1]); + br_enc32le(tmp + 20, q[3]); + br_enc32le(tmp + 24, q[5]); + br_enc32le(tmp + 28, q[7]); + + if (len <= 32) { + xorbuf(buf, tmp, len); + cc ++; + if (len > 16) { + cc ++; + } + break; + } + xorbuf(buf, tmp, 32); + buf += 32; + len -= 32; + cc += 2; + } + return cc; +} + +/* see bearssl_block.h */ +const br_block_ctr_class br_aes_ct_ctr_vtable = { + sizeof(br_aes_ct_ctr_keys), + 16, + 4, + (void (*)(const br_block_ctr_class **, const void *, size_t)) + &br_aes_ct_ctr_init, + (uint32_t (*)(const br_block_ctr_class *const *, + const void *, uint32_t, void *, size_t)) + &br_aes_ct_ctr_run +}; diff --git a/third_party/bearssl/src/aes_ct_ctrcbc.c b/third_party/bearssl/src/aes_ct_ctrcbc.c new file mode 100644 index 0000000..8ae9fc7 --- /dev/null +++ b/third_party/bearssl/src/aes_ct_ctrcbc.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_ct_ctrcbc_vtable; + ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + buf = data; + while (len > 0) { + uint32_t q[8], carry; + unsigned char tmp[32]; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + q[0] = br_swap32(iv0); + q[2] = br_swap32(iv1); + q[4] = br_swap32(iv2); + q[6] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + q[1] = br_swap32(iv0); + q[3] = br_swap32(iv1); + q[5] = br_swap32(iv2); + q[7] = br_swap32(iv3); + if (len > 16) { + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + } + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + br_enc32le(tmp, q[0]); + br_enc32le(tmp + 4, q[2]); + br_enc32le(tmp + 8, q[4]); + br_enc32le(tmp + 12, q[6]); + br_enc32le(tmp + 16, q[1]); + br_enc32le(tmp + 20, q[3]); + br_enc32le(tmp + 24, q[5]); + br_enc32le(tmp + 28, q[7]); + + if (len <= 32) { + xorbuf(buf, tmp, len); + break; + } + xorbuf(buf, tmp, 32); + buf += 32; + len -= 32; + } + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + uint32_t cm0, cm1, cm2, cm3; + uint32_t q[8]; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + buf = data; + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + q[1] = 0; + q[3] = 0; + q[5] = 0; + q[7] = 0; + + while (len > 0) { + q[0] = cm0 ^ br_dec32le(buf + 0); + q[2] = cm1 ^ br_dec32le(buf + 4); + q[4] = cm2 ^ br_dec32le(buf + 8); + q[6] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + cm0 = q[0]; + cm1 = q[2]; + cm2 = q[4]; + cm3 = q[6]; + buf += 16; + len -= 16; + } + + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + /* + * When encrypting, the CBC-MAC processing must be lagging by + * one block, since it operates on the encrypted values, so + * it must wait for that encryption to complete. + */ + + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint32_t sk_exp[120]; + int first_iter; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + first_iter = 1; + while (len > 0) { + uint32_t q[8], carry; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + q[0] = br_swap32(iv0); + q[2] = br_swap32(iv1); + q[4] = br_swap32(iv2); + q[6] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The odd values are used for CBC-MAC. + */ + q[1] = cm0; + q[3] = cm1; + q[5] = cm2; + q[7] = cm3; + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + /* + * We do the XOR with the plaintext in 32-bit registers, + * so that the value are available for CBC-MAC processing + * as well. + */ + q[0] ^= br_dec32le(buf + 0); + q[2] ^= br_dec32le(buf + 4); + q[4] ^= br_dec32le(buf + 8); + q[6] ^= br_dec32le(buf + 12); + br_enc32le(buf + 0, q[0]); + br_enc32le(buf + 4, q[2]); + br_enc32le(buf + 8, q[4]); + br_enc32le(buf + 12, q[6]); + + buf += 16; + len -= 16; + + /* + * We set the cm* values to the block to encrypt in the + * next iteration. + */ + if (first_iter) { + first_iter = 0; + cm0 ^= q[0]; + cm1 ^= q[2]; + cm2 ^= q[4]; + cm3 ^= q[6]; + } else { + cm0 = q[0] ^ q[1]; + cm1 = q[2] ^ q[3]; + cm2 = q[4] ^ q[5]; + cm3 = q[6] ^ q[7]; + } + + /* + * If this was the last iteration, then compute the + * extra block encryption to complete CBC-MAC. + */ + if (len == 0) { + q[0] = cm0; + q[2] = cm1; + q[4] = cm2; + q[6] = cm3; + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + cm0 = q[0]; + cm1 = q[2]; + cm2 = q[4]; + cm3 = q[6]; + break; + } + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +void +br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned char *ivbuf; + uint32_t iv0, iv1, iv2, iv3; + uint32_t cm0, cm1, cm2, cm3; + uint32_t sk_exp[120]; + + br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + + /* + * We keep the counter as four 32-bit values, with big-endian + * convention, because that's what is expected for purposes of + * incrementing the counter value. + */ + ivbuf = ctr; + iv0 = br_dec32be(ivbuf + 0); + iv1 = br_dec32be(ivbuf + 4); + iv2 = br_dec32be(ivbuf + 8); + iv3 = br_dec32be(ivbuf + 12); + + /* + * The current CBC-MAC value is kept in little-endian convention. + */ + cm0 = br_dec32le((unsigned char *)cbcmac + 0); + cm1 = br_dec32le((unsigned char *)cbcmac + 4); + cm2 = br_dec32le((unsigned char *)cbcmac + 8); + cm3 = br_dec32le((unsigned char *)cbcmac + 12); + + buf = data; + while (len > 0) { + uint32_t q[8], carry; + unsigned char tmp[16]; + + /* + * The bitslice implementation expects values in + * little-endian convention, so we have to byteswap them. + */ + q[0] = br_swap32(iv0); + q[2] = br_swap32(iv1); + q[4] = br_swap32(iv2); + q[6] = br_swap32(iv3); + iv3 ++; + carry = ~(iv3 | -iv3) >> 31; + iv2 += carry; + carry &= -(~(iv2 | -iv2) >> 31); + iv1 += carry; + carry &= -(~(iv1 | -iv1) >> 31); + iv0 += carry; + + /* + * The odd values are used for CBC-MAC. + */ + q[1] = cm0 ^ br_dec32le(buf + 0); + q[3] = cm1 ^ br_dec32le(buf + 4); + q[5] = cm2 ^ br_dec32le(buf + 8); + q[7] = cm3 ^ br_dec32le(buf + 12); + + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q); + br_aes_ct_ortho(q); + + br_enc32le(tmp + 0, q[0]); + br_enc32le(tmp + 4, q[2]); + br_enc32le(tmp + 8, q[4]); + br_enc32le(tmp + 12, q[6]); + xorbuf(buf, tmp, 16); + cm0 = q[1]; + cm1 = q[3]; + cm2 = q[5]; + cm3 = q[7]; + buf += 16; + len -= 16; + } + + br_enc32be(ivbuf + 0, iv0); + br_enc32be(ivbuf + 4, iv1); + br_enc32be(ivbuf + 8, iv2); + br_enc32be(ivbuf + 12, iv3); + br_enc32le((unsigned char *)cbcmac + 0, cm0); + br_enc32le((unsigned char *)cbcmac + 4, cm1); + br_enc32le((unsigned char *)cbcmac + 8, cm2); + br_enc32le((unsigned char *)cbcmac + 12, cm3); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = { + sizeof(br_aes_ct_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_ct_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_ct_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_ct_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_ct_ctrcbc_mac +}; diff --git a/third_party/bearssl/src/aes_ct_dec.c b/third_party/bearssl/src/aes_ct_dec.c new file mode 100644 index 0000000..7f32d2b --- /dev/null +++ b/third_party/bearssl/src/aes_ct_dec.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_aes_ct_bitslice_invSbox(uint32_t *q) +{ + /* + * AES S-box is: + * S(x) = A(I(x)) ^ 0x63 + * where I() is inversion in GF(256), and A() is a linear + * transform (0 is formally defined to be its own inverse). + * Since inversion is an involution, the inverse S-box can be + * computed from the S-box as: + * iS(x) = B(S(B(x ^ 0x63)) ^ 0x63) + * where B() is the inverse of A(). Indeed, for any y in GF(256): + * iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y + * + * Note: we reuse the implementation of the forward S-box, + * instead of duplicating it here, so that total code size is + * lower. By merging the B() transforms into the S-box circuit + * we could make faster CBC decryption, but CBC decryption is + * already quite faster than CBC encryption because we can + * process two blocks in parallel. + */ + uint32_t q0, q1, q2, q3, q4, q5, q6, q7; + + q0 = ~q[0]; + q1 = ~q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = ~q[5]; + q6 = ~q[6]; + q7 = q[7]; + q[7] = q1 ^ q4 ^ q6; + q[6] = q0 ^ q3 ^ q5; + q[5] = q7 ^ q2 ^ q4; + q[4] = q6 ^ q1 ^ q3; + q[3] = q5 ^ q0 ^ q2; + q[2] = q4 ^ q7 ^ q1; + q[1] = q3 ^ q6 ^ q0; + q[0] = q2 ^ q5 ^ q7; + + br_aes_ct_bitslice_Sbox(q); + + q0 = ~q[0]; + q1 = ~q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = ~q[5]; + q6 = ~q[6]; + q7 = q[7]; + q[7] = q1 ^ q4 ^ q6; + q[6] = q0 ^ q3 ^ q5; + q[5] = q7 ^ q2 ^ q4; + q[4] = q6 ^ q1 ^ q3; + q[3] = q5 ^ q0 ^ q2; + q[2] = q4 ^ q7 ^ q1; + q[1] = q3 ^ q6 ^ q0; + q[0] = q2 ^ q5 ^ q7; +} + +static void +add_round_key(uint32_t *q, const uint32_t *sk) +{ + int i; + + for (i = 0; i < 8; i ++) { + q[i] ^= sk[i]; + } +} + +static void +inv_shift_rows(uint32_t *q) +{ + int i; + + for (i = 0; i < 8; i ++) { + uint32_t x; + + x = q[i]; + q[i] = (x & 0x000000FF) + | ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6) + | ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4) + | ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2); + } +} + +static inline uint32_t +rotr16(uint32_t x) +{ + return (x << 16) | (x >> 16); +} + +static void +inv_mix_columns(uint32_t *q) +{ + uint32_t q0, q1, q2, q3, q4, q5, q6, q7; + uint32_t r0, r1, r2, r3, r4, r5, r6, r7; + + q0 = q[0]; + q1 = q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = q[5]; + q6 = q[6]; + q7 = q[7]; + r0 = (q0 >> 8) | (q0 << 24); + r1 = (q1 >> 8) | (q1 << 24); + r2 = (q2 >> 8) | (q2 << 24); + r3 = (q3 >> 8) | (q3 << 24); + r4 = (q4 >> 8) | (q4 << 24); + r5 = (q5 >> 8) | (q5 << 24); + r6 = (q6 >> 8) | (q6 << 24); + r7 = (q7 >> 8) | (q7 << 24); + + q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5); + q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6); + q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7); + q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7); + q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6); + q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7); + q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7); + q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7); +} + +/* see inner.h */ +void +br_aes_ct_bitslice_decrypt(unsigned num_rounds, + const uint32_t *skey, uint32_t *q) +{ + unsigned u; + + add_round_key(q, skey + (num_rounds << 3)); + for (u = num_rounds - 1; u > 0; u --) { + inv_shift_rows(q); + br_aes_ct_bitslice_invSbox(q); + add_round_key(q, skey + (u << 3)); + inv_mix_columns(q); + } + inv_shift_rows(q); + br_aes_ct_bitslice_invSbox(q); + add_round_key(q, skey); +} diff --git a/third_party/bearssl/src/aes_ct_enc.c b/third_party/bearssl/src/aes_ct_enc.c new file mode 100644 index 0000000..089bf35 --- /dev/null +++ b/third_party/bearssl/src/aes_ct_enc.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static inline void +add_round_key(uint32_t *q, const uint32_t *sk) +{ + q[0] ^= sk[0]; + q[1] ^= sk[1]; + q[2] ^= sk[2]; + q[3] ^= sk[3]; + q[4] ^= sk[4]; + q[5] ^= sk[5]; + q[6] ^= sk[6]; + q[7] ^= sk[7]; +} + +static inline void +shift_rows(uint32_t *q) +{ + int i; + + for (i = 0; i < 8; i ++) { + uint32_t x; + + x = q[i]; + q[i] = (x & 0x000000FF) + | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6) + | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4) + | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2); + } +} + +static inline uint32_t +rotr16(uint32_t x) +{ + return (x << 16) | (x >> 16); +} + +static inline void +mix_columns(uint32_t *q) +{ + uint32_t q0, q1, q2, q3, q4, q5, q6, q7; + uint32_t r0, r1, r2, r3, r4, r5, r6, r7; + + q0 = q[0]; + q1 = q[1]; + q2 = q[2]; + q3 = q[3]; + q4 = q[4]; + q5 = q[5]; + q6 = q[6]; + q7 = q[7]; + r0 = (q0 >> 8) | (q0 << 24); + r1 = (q1 >> 8) | (q1 << 24); + r2 = (q2 >> 8) | (q2 << 24); + r3 = (q3 >> 8) | (q3 << 24); + r4 = (q4 >> 8) | (q4 << 24); + r5 = (q5 >> 8) | (q5 << 24); + r6 = (q6 >> 8) | (q6 << 24); + r7 = (q7 >> 8) | (q7 << 24); + + q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0); + q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1); + q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2); + q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3); + q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4); + q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5); + q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6); + q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7); +} + +/* see inner.h */ +void +br_aes_ct_bitslice_encrypt(unsigned num_rounds, + const uint32_t *skey, uint32_t *q) +{ + unsigned u; + + add_round_key(q, skey); + for (u = 1; u < num_rounds; u ++) { + br_aes_ct_bitslice_Sbox(q); + shift_rows(q); + mix_columns(q); + add_round_key(q, skey + (u << 3)); + } + br_aes_ct_bitslice_Sbox(q); + shift_rows(q); + add_round_key(q, skey + (num_rounds << 3)); +} diff --git a/third_party/bearssl/src/aes_pwr8.c b/third_party/bearssl/src/aes_pwr8.c new file mode 100644 index 0000000..b2c63c3 --- /dev/null +++ b/third_party/bearssl/src/aes_pwr8.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_POWER_ASM_MACROS 1 +#include "inner.h" + +/* + * This code contains the AES key schedule implementation using the + * POWER8 opcodes. + */ + +#if BR_POWER8 + +static void +key_schedule_128(unsigned char *sk, const unsigned char *key) +{ + long cc; + + static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B }; +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc = 0; + + /* + * We use the VSX instructions for loading and storing the + * key/subkeys, since they support unaligned accesses. The rest + * of the computation is VMX only. VMX register 0 is VSX + * register 32. + */ + asm volatile ( + + /* + * v0 = all-zero word + * v1 = constant -8 / +8, copied into four words + * v2 = current subkey + * v3 = Rcon (x4 words) + * v6 = constant 8, copied into four words + * v7 = constant 0x11B, copied into four words + * v8 = constant for byteswapping words + */ + vspltisw(0, 0) +#if BR_POWER8_LE + vspltisw(1, -8) +#else + vspltisw(1, 8) +#endif + lxvw4x(34, 0, %[key]) + vspltisw(3, 1) + vspltisw(6, 8) + lxvw4x(39, 0, %[fmod]) +#if BR_POWER8_LE + lxvw4x(40, 0, %[idx2be]) +#endif + + /* + * First subkey is a copy of the key itself. + */ +#if BR_POWER8_LE + vperm(4, 2, 2, 8) + stxvw4x(36, 0, %[sk]) +#else + stxvw4x(34, 0, %[sk]) +#endif + + /* + * Loop must run 10 times. + */ + li(%[cc], 10) + mtctr(%[cc]) + label(loop) + /* Increment subkey address */ + addi(%[sk], %[sk], 16) + + /* Compute SubWord(RotWord(temp)) xor Rcon (into v4, splat) */ + vrlw(4, 2, 1) + vsbox(4, 4) +#if BR_POWER8_LE + vxor(4, 4, 3) +#else + vsldoi(5, 3, 0, 3) + vxor(4, 4, 5) +#endif + vspltw(4, 4, 3) + + /* XOR words for next subkey */ + vsldoi(5, 0, 2, 12) + vxor(2, 2, 5) + vsldoi(5, 0, 2, 12) + vxor(2, 2, 5) + vsldoi(5, 0, 2, 12) + vxor(2, 2, 5) + vxor(2, 2, 4) + + /* Store next subkey */ +#if BR_POWER8_LE + vperm(4, 2, 2, 8) + stxvw4x(36, 0, %[sk]) +#else + stxvw4x(34, 0, %[sk]) +#endif + + /* Update Rcon */ + vadduwm(3, 3, 3) + vsrw(4, 3, 6) + vsubuwm(4, 0, 4) + vand(4, 4, 7) + vxor(3, 3, 4) + + bdnz(loop) + +: [sk] "+b" (sk), [cc] "+b" (cc) +: [key] "b" (key), [fmod] "b" (fmod) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory" + ); +} + +static void +key_schedule_192(unsigned char *sk, const unsigned char *key) +{ + long cc; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc = 0; + + /* + * We use the VSX instructions for loading and storing the + * key/subkeys, since they support unaligned accesses. The rest + * of the computation is VMX only. VMX register 0 is VSX + * register 32. + */ + asm volatile ( + + /* + * v0 = all-zero word + * v1 = constant -8 / +8, copied into four words + * v2, v3 = current subkey + * v5 = Rcon (x4 words) (already shifted on big-endian) + * v6 = constant 8, copied into four words + * v8 = constant for byteswapping words + * + * The left two words of v3 are ignored. + */ + vspltisw(0, 0) +#if BR_POWER8_LE + vspltisw(1, -8) +#else + vspltisw(1, 8) +#endif + li(%[cc], 8) + lxvw4x(34, 0, %[key]) + lxvw4x(35, %[cc], %[key]) + vsldoi(3, 3, 0, 8) + vspltisw(5, 1) +#if !BR_POWER8_LE + vsldoi(5, 5, 0, 3) +#endif + vspltisw(6, 8) +#if BR_POWER8_LE + lxvw4x(40, 0, %[idx2be]) +#endif + + /* + * Loop must run 8 times. Each iteration produces 256 + * bits of subkeys, with a 64-bit overlap. + */ + li(%[cc], 8) + mtctr(%[cc]) + li(%[cc], 16) + label(loop) + + /* + * Last 6 words in v2:v3l. Compute next 6 words into + * v3r:v4. + */ + vrlw(10, 3, 1) + vsbox(10, 10) + vxor(10, 10, 5) + vspltw(10, 10, 1) + vsldoi(11, 0, 10, 8) + + vsldoi(12, 0, 2, 12) + vxor(12, 2, 12) + vsldoi(13, 0, 12, 12) + vxor(12, 12, 13) + vsldoi(13, 0, 12, 12) + vxor(12, 12, 13) + + vspltw(13, 12, 3) + vxor(13, 13, 3) + vsldoi(14, 0, 3, 12) + vxor(13, 13, 14) + + vsldoi(4, 12, 13, 8) + vsldoi(14, 0, 3, 8) + vsldoi(3, 14, 12, 8) + + vxor(3, 3, 11) + vxor(4, 4, 10) + + /* + * Update Rcon. Since for a 192-bit key, we use only 8 + * such constants, we will not hit the field modulus, + * so a simple shift (addition) works well. + */ + vadduwm(5, 5, 5) + + /* + * Write out the two left 128-bit words + */ +#if BR_POWER8_LE + vperm(10, 2, 2, 8) + vperm(11, 3, 3, 8) + stxvw4x(42, 0, %[sk]) + stxvw4x(43, %[cc], %[sk]) +#else + stxvw4x(34, 0, %[sk]) + stxvw4x(35, %[cc], %[sk]) +#endif + addi(%[sk], %[sk], 24) + + /* + * Shift words for next iteration. + */ + vsldoi(2, 3, 4, 8) + vsldoi(3, 4, 0, 8) + + bdnz(loop) + + /* + * The loop wrote the first 50 subkey words, but we need + * to produce 52, so we must do one last write. + */ +#if BR_POWER8_LE + vperm(10, 2, 2, 8) + stxvw4x(42, 0, %[sk]) +#else + stxvw4x(34, 0, %[sk]) +#endif + +: [sk] "+b" (sk), [cc] "+b" (cc) +: [key] "b" (key) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory" + ); +} + +static void +key_schedule_256(unsigned char *sk, const unsigned char *key) +{ + long cc; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc = 0; + + /* + * We use the VSX instructions for loading and storing the + * key/subkeys, since they support unaligned accesses. The rest + * of the computation is VMX only. VMX register 0 is VSX + * register 32. + */ + asm volatile ( + + /* + * v0 = all-zero word + * v1 = constant -8 / +8, copied into four words + * v2, v3 = current subkey + * v6 = Rcon (x4 words) (already shifted on big-endian) + * v7 = constant 8, copied into four words + * v8 = constant for byteswapping words + * + * The left two words of v3 are ignored. + */ + vspltisw(0, 0) +#if BR_POWER8_LE + vspltisw(1, -8) +#else + vspltisw(1, 8) +#endif + li(%[cc], 16) + lxvw4x(34, 0, %[key]) + lxvw4x(35, %[cc], %[key]) + vspltisw(6, 1) +#if !BR_POWER8_LE + vsldoi(6, 6, 0, 3) +#endif + vspltisw(7, 8) +#if BR_POWER8_LE + lxvw4x(40, 0, %[idx2be]) +#endif + + /* + * Loop must run 7 times. Each iteration produces two + * subkeys. + */ + li(%[cc], 7) + mtctr(%[cc]) + li(%[cc], 16) + label(loop) + + /* + * Current words are in v2:v3. Compute next word in v4. + */ + vrlw(10, 3, 1) + vsbox(10, 10) + vxor(10, 10, 6) + vspltw(10, 10, 3) + + vsldoi(4, 0, 2, 12) + vxor(4, 2, 4) + vsldoi(5, 0, 4, 12) + vxor(4, 4, 5) + vsldoi(5, 0, 4, 12) + vxor(4, 4, 5) + vxor(4, 4, 10) + + /* + * Then other word in v5. + */ + vsbox(10, 4) + vspltw(10, 10, 3) + + vsldoi(5, 0, 3, 12) + vxor(5, 3, 5) + vsldoi(11, 0, 5, 12) + vxor(5, 5, 11) + vsldoi(11, 0, 5, 12) + vxor(5, 5, 11) + vxor(5, 5, 10) + + /* + * Update Rcon. Since for a 256-bit key, we use only 7 + * such constants, we will not hit the field modulus, + * so a simple shift (addition) works well. + */ + vadduwm(6, 6, 6) + + /* + * Write out the two left 128-bit words + */ +#if BR_POWER8_LE + vperm(10, 2, 2, 8) + vperm(11, 3, 3, 8) + stxvw4x(42, 0, %[sk]) + stxvw4x(43, %[cc], %[sk]) +#else + stxvw4x(34, 0, %[sk]) + stxvw4x(35, %[cc], %[sk]) +#endif + addi(%[sk], %[sk], 32) + + /* + * Replace v2:v3 with v4:v5. + */ + vxor(2, 0, 4) + vxor(3, 0, 5) + + bdnz(loop) + + /* + * The loop wrote the first 14 subkeys, but we need 15, + * so we must do an extra write. + */ +#if BR_POWER8_LE + vperm(10, 2, 2, 8) + stxvw4x(42, 0, %[sk]) +#else + stxvw4x(34, 0, %[sk]) +#endif + +: [sk] "+b" (sk), [cc] "+b" (cc) +: [key] "b" (key) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory" + ); +} + +/* see inner.h */ +int +br_aes_pwr8_supported(void) +{ + return 1; +} + +/* see inner.h */ +unsigned +br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len) +{ + switch (len) { + case 16: + key_schedule_128(sk, key); + return 10; + case 24: + key_schedule_192(sk, key); + return 12; + default: + key_schedule_256(sk, key); + return 14; + } +} + +#endif diff --git a/third_party/bearssl/src/aes_pwr8_cbcdec.c b/third_party/bearssl/src/aes_pwr8_cbcdec.c new file mode 100644 index 0000000..e535ba6 --- /dev/null +++ b/third_party/bearssl/src/aes_pwr8_cbcdec.c @@ -0,0 +1,670 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_POWER_ASM_MACROS 1 +#include "inner.h" + +#if BR_POWER8 + +/* see bearssl_block.h */ +void +br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_pwr8_cbcdec_vtable; + ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len); +} + +static void +cbcdec_128(const unsigned char *sk, + const unsigned char *iv, unsigned char *buf, size_t num_blocks) +{ + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + + /* + * Load subkeys into v0..v10 + */ + lxvw4x(32, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(33, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(34, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(35, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(36, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(37, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(38, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(39, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(40, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(41, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(42, %[cc0], %[sk]) + li(%[cc0], 0) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * Load IV into v24. + */ + lxvw4x(56, 0, %[iv]) +#if BR_POWER8_LE + vperm(24, 24, 24, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Load next ciphertext words in v16..v19. Also save them + * in v20..v23. + */ + lxvw4x(48, %[cc0], %[buf]) + lxvw4x(49, %[cc1], %[buf]) + lxvw4x(50, %[cc2], %[buf]) + lxvw4x(51, %[cc3], %[buf]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + vand(20, 16, 16) + vand(21, 17, 17) + vand(22, 18, 18) + vand(23, 19, 19) + + /* + * Decrypt the blocks. + */ + vxor(16, 16, 10) + vxor(17, 17, 10) + vxor(18, 18, 10) + vxor(19, 19, 10) + vncipher(16, 16, 9) + vncipher(17, 17, 9) + vncipher(18, 18, 9) + vncipher(19, 19, 9) + vncipher(16, 16, 8) + vncipher(17, 17, 8) + vncipher(18, 18, 8) + vncipher(19, 19, 8) + vncipher(16, 16, 7) + vncipher(17, 17, 7) + vncipher(18, 18, 7) + vncipher(19, 19, 7) + vncipher(16, 16, 6) + vncipher(17, 17, 6) + vncipher(18, 18, 6) + vncipher(19, 19, 6) + vncipher(16, 16, 5) + vncipher(17, 17, 5) + vncipher(18, 18, 5) + vncipher(19, 19, 5) + vncipher(16, 16, 4) + vncipher(17, 17, 4) + vncipher(18, 18, 4) + vncipher(19, 19, 4) + vncipher(16, 16, 3) + vncipher(17, 17, 3) + vncipher(18, 18, 3) + vncipher(19, 19, 3) + vncipher(16, 16, 2) + vncipher(17, 17, 2) + vncipher(18, 18, 2) + vncipher(19, 19, 2) + vncipher(16, 16, 1) + vncipher(17, 17, 1) + vncipher(18, 18, 1) + vncipher(19, 19, 1) + vncipherlast(16, 16, 0) + vncipherlast(17, 17, 0) + vncipherlast(18, 18, 0) + vncipherlast(19, 19, 0) + + /* + * XOR decrypted blocks with IV / previous block. + */ + vxor(16, 16, 24) + vxor(17, 17, 20) + vxor(18, 18, 21) + vxor(19, 19, 22) + + /* + * Store back result (with byteswap) + */ +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + stxvw4x(48, %[cc0], %[buf]) + stxvw4x(49, %[cc1], %[buf]) + stxvw4x(50, %[cc2], %[buf]) + stxvw4x(51, %[cc3], %[buf]) + + /* + * Fourth encrypted block is IV for next run. + */ + vand(24, 23, 23) + + addi(%[buf], %[buf], 64) + + bdnz(loop) + +: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3), + [buf] "+b" (buf) +: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +static void +cbcdec_192(const unsigned char *sk, + const unsigned char *iv, unsigned char *buf, size_t num_blocks) +{ + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + + /* + * Load subkeys into v0..v12 + */ + lxvw4x(32, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(33, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(34, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(35, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(36, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(37, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(38, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(39, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(40, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(41, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(42, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(43, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(44, %[cc0], %[sk]) + li(%[cc0], 0) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * Load IV into v24. + */ + lxvw4x(56, 0, %[iv]) +#if BR_POWER8_LE + vperm(24, 24, 24, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Load next ciphertext words in v16..v19. Also save them + * in v20..v23. + */ + lxvw4x(48, %[cc0], %[buf]) + lxvw4x(49, %[cc1], %[buf]) + lxvw4x(50, %[cc2], %[buf]) + lxvw4x(51, %[cc3], %[buf]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + vand(20, 16, 16) + vand(21, 17, 17) + vand(22, 18, 18) + vand(23, 19, 19) + + /* + * Decrypt the blocks. + */ + vxor(16, 16, 12) + vxor(17, 17, 12) + vxor(18, 18, 12) + vxor(19, 19, 12) + vncipher(16, 16, 11) + vncipher(17, 17, 11) + vncipher(18, 18, 11) + vncipher(19, 19, 11) + vncipher(16, 16, 10) + vncipher(17, 17, 10) + vncipher(18, 18, 10) + vncipher(19, 19, 10) + vncipher(16, 16, 9) + vncipher(17, 17, 9) + vncipher(18, 18, 9) + vncipher(19, 19, 9) + vncipher(16, 16, 8) + vncipher(17, 17, 8) + vncipher(18, 18, 8) + vncipher(19, 19, 8) + vncipher(16, 16, 7) + vncipher(17, 17, 7) + vncipher(18, 18, 7) + vncipher(19, 19, 7) + vncipher(16, 16, 6) + vncipher(17, 17, 6) + vncipher(18, 18, 6) + vncipher(19, 19, 6) + vncipher(16, 16, 5) + vncipher(17, 17, 5) + vncipher(18, 18, 5) + vncipher(19, 19, 5) + vncipher(16, 16, 4) + vncipher(17, 17, 4) + vncipher(18, 18, 4) + vncipher(19, 19, 4) + vncipher(16, 16, 3) + vncipher(17, 17, 3) + vncipher(18, 18, 3) + vncipher(19, 19, 3) + vncipher(16, 16, 2) + vncipher(17, 17, 2) + vncipher(18, 18, 2) + vncipher(19, 19, 2) + vncipher(16, 16, 1) + vncipher(17, 17, 1) + vncipher(18, 18, 1) + vncipher(19, 19, 1) + vncipherlast(16, 16, 0) + vncipherlast(17, 17, 0) + vncipherlast(18, 18, 0) + vncipherlast(19, 19, 0) + + /* + * XOR decrypted blocks with IV / previous block. + */ + vxor(16, 16, 24) + vxor(17, 17, 20) + vxor(18, 18, 21) + vxor(19, 19, 22) + + /* + * Store back result (with byteswap) + */ +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + stxvw4x(48, %[cc0], %[buf]) + stxvw4x(49, %[cc1], %[buf]) + stxvw4x(50, %[cc2], %[buf]) + stxvw4x(51, %[cc3], %[buf]) + + /* + * Fourth encrypted block is IV for next run. + */ + vand(24, 23, 23) + + addi(%[buf], %[buf], 64) + + bdnz(loop) + +: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3), + [buf] "+b" (buf) +: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +static void +cbcdec_256(const unsigned char *sk, + const unsigned char *iv, unsigned char *buf, size_t num_blocks) +{ + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + + /* + * Load subkeys into v0..v14 + */ + lxvw4x(32, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(33, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(34, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(35, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(36, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(37, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(38, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(39, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(40, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(41, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(42, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(43, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(44, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(45, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(46, %[cc0], %[sk]) + li(%[cc0], 0) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * Load IV into v24. + */ + lxvw4x(56, 0, %[iv]) +#if BR_POWER8_LE + vperm(24, 24, 24, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Load next ciphertext words in v16..v19. Also save them + * in v20..v23. + */ + lxvw4x(48, %[cc0], %[buf]) + lxvw4x(49, %[cc1], %[buf]) + lxvw4x(50, %[cc2], %[buf]) + lxvw4x(51, %[cc3], %[buf]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + vand(20, 16, 16) + vand(21, 17, 17) + vand(22, 18, 18) + vand(23, 19, 19) + + /* + * Decrypt the blocks. + */ + vxor(16, 16, 14) + vxor(17, 17, 14) + vxor(18, 18, 14) + vxor(19, 19, 14) + vncipher(16, 16, 13) + vncipher(17, 17, 13) + vncipher(18, 18, 13) + vncipher(19, 19, 13) + vncipher(16, 16, 12) + vncipher(17, 17, 12) + vncipher(18, 18, 12) + vncipher(19, 19, 12) + vncipher(16, 16, 11) + vncipher(17, 17, 11) + vncipher(18, 18, 11) + vncipher(19, 19, 11) + vncipher(16, 16, 10) + vncipher(17, 17, 10) + vncipher(18, 18, 10) + vncipher(19, 19, 10) + vncipher(16, 16, 9) + vncipher(17, 17, 9) + vncipher(18, 18, 9) + vncipher(19, 19, 9) + vncipher(16, 16, 8) + vncipher(17, 17, 8) + vncipher(18, 18, 8) + vncipher(19, 19, 8) + vncipher(16, 16, 7) + vncipher(17, 17, 7) + vncipher(18, 18, 7) + vncipher(19, 19, 7) + vncipher(16, 16, 6) + vncipher(17, 17, 6) + vncipher(18, 18, 6) + vncipher(19, 19, 6) + vncipher(16, 16, 5) + vncipher(17, 17, 5) + vncipher(18, 18, 5) + vncipher(19, 19, 5) + vncipher(16, 16, 4) + vncipher(17, 17, 4) + vncipher(18, 18, 4) + vncipher(19, 19, 4) + vncipher(16, 16, 3) + vncipher(17, 17, 3) + vncipher(18, 18, 3) + vncipher(19, 19, 3) + vncipher(16, 16, 2) + vncipher(17, 17, 2) + vncipher(18, 18, 2) + vncipher(19, 19, 2) + vncipher(16, 16, 1) + vncipher(17, 17, 1) + vncipher(18, 18, 1) + vncipher(19, 19, 1) + vncipherlast(16, 16, 0) + vncipherlast(17, 17, 0) + vncipherlast(18, 18, 0) + vncipherlast(19, 19, 0) + + /* + * XOR decrypted blocks with IV / previous block. + */ + vxor(16, 16, 24) + vxor(17, 17, 20) + vxor(18, 18, 21) + vxor(19, 19, 22) + + /* + * Store back result (with byteswap) + */ +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + stxvw4x(48, %[cc0], %[buf]) + stxvw4x(49, %[cc1], %[buf]) + stxvw4x(50, %[cc2], %[buf]) + stxvw4x(51, %[cc3], %[buf]) + + /* + * Fourth encrypted block is IV for next run. + */ + vand(24, 23, 23) + + addi(%[buf], %[buf], 64) + + bdnz(loop) + +: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3), + [buf] "+b" (buf) +: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +/* see bearssl_block.h */ +void +br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char nextiv[16]; + unsigned char *buf; + + if (len == 0) { + return; + } + buf = data; + memcpy(nextiv, buf + len - 16, 16); + if (len >= 64) { + size_t num_blocks; + unsigned char tmp[16]; + + num_blocks = (len >> 4) & ~(size_t)3; + memcpy(tmp, buf + (num_blocks << 4) - 16, 16); + switch (ctx->num_rounds) { + case 10: + cbcdec_128(ctx->skey.skni, iv, buf, num_blocks); + break; + case 12: + cbcdec_192(ctx->skey.skni, iv, buf, num_blocks); + break; + default: + cbcdec_256(ctx->skey.skni, iv, buf, num_blocks); + break; + } + buf += num_blocks << 4; + len &= 63; + memcpy(iv, tmp, 16); + } + if (len > 0) { + unsigned char tmp[64]; + + memcpy(tmp, buf, len); + memset(tmp + len, 0, (sizeof tmp) - len); + switch (ctx->num_rounds) { + case 10: + cbcdec_128(ctx->skey.skni, iv, tmp, 4); + break; + case 12: + cbcdec_192(ctx->skey.skni, iv, tmp, 4); + break; + default: + cbcdec_256(ctx->skey.skni, iv, tmp, 4); + break; + } + memcpy(buf, tmp, len); + } + memcpy(iv, nextiv, 16); +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = { + sizeof(br_aes_pwr8_cbcdec_keys), + 16, + 4, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_aes_pwr8_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_aes_pwr8_cbcdec_run +}; + +/* see bearssl_block.h */ +const br_block_cbcdec_class * +br_aes_pwr8_cbcdec_get_vtable(void) +{ + return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL; +} + +#else + +/* see bearssl_block.h */ +const br_block_cbcdec_class * +br_aes_pwr8_cbcdec_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_pwr8_cbcenc.c b/third_party/bearssl/src/aes_pwr8_cbcenc.c new file mode 100644 index 0000000..00f8eca --- /dev/null +++ b/third_party/bearssl/src/aes_pwr8_cbcenc.c @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_POWER_ASM_MACROS 1 +#include "inner.h" + +#if BR_POWER8 + +/* see bearssl_block.h */ +void +br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_pwr8_cbcenc_vtable; + ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len); +} + +static void +cbcenc_128(const unsigned char *sk, + const unsigned char *iv, unsigned char *buf, size_t len) +{ + long cc; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc = 0; + asm volatile ( + + /* + * Load subkeys into v0..v10 + */ + lxvw4x(32, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(33, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(34, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(35, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(36, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(37, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(38, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(39, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(40, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(41, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(42, %[cc], %[sk]) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * Load IV into v16. + */ + lxvw4x(48, 0, %[iv]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Load next plaintext word and XOR with current IV. + */ + lxvw4x(49, 0, %[buf]) +#if BR_POWER8_LE + vperm(17, 17, 17, 15) +#endif + vxor(16, 16, 17) + + /* + * Encrypt the block. + */ + vxor(16, 16, 0) + vcipher(16, 16, 1) + vcipher(16, 16, 2) + vcipher(16, 16, 3) + vcipher(16, 16, 4) + vcipher(16, 16, 5) + vcipher(16, 16, 6) + vcipher(16, 16, 7) + vcipher(16, 16, 8) + vcipher(16, 16, 9) + vcipherlast(16, 16, 10) + + /* + * Store back result (with byteswap) + */ +#if BR_POWER8_LE + vperm(17, 16, 16, 15) + stxvw4x(49, 0, %[buf]) +#else + stxvw4x(48, 0, %[buf]) +#endif + addi(%[buf], %[buf], 16) + + bdnz(loop) + +: [cc] "+b" (cc), [buf] "+b" (buf) +: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "ctr", "memory" + ); +} + +static void +cbcenc_192(const unsigned char *sk, + const unsigned char *iv, unsigned char *buf, size_t len) +{ + long cc; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc = 0; + asm volatile ( + + /* + * Load subkeys into v0..v12 + */ + lxvw4x(32, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(33, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(34, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(35, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(36, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(37, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(38, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(39, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(40, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(41, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(42, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(43, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(44, %[cc], %[sk]) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * Load IV into v16. + */ + lxvw4x(48, 0, %[iv]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Load next plaintext word and XOR with current IV. + */ + lxvw4x(49, 0, %[buf]) +#if BR_POWER8_LE + vperm(17, 17, 17, 15) +#endif + vxor(16, 16, 17) + + /* + * Encrypt the block. + */ + vxor(16, 16, 0) + vcipher(16, 16, 1) + vcipher(16, 16, 2) + vcipher(16, 16, 3) + vcipher(16, 16, 4) + vcipher(16, 16, 5) + vcipher(16, 16, 6) + vcipher(16, 16, 7) + vcipher(16, 16, 8) + vcipher(16, 16, 9) + vcipher(16, 16, 10) + vcipher(16, 16, 11) + vcipherlast(16, 16, 12) + + /* + * Store back result (with byteswap) + */ +#if BR_POWER8_LE + vperm(17, 16, 16, 15) + stxvw4x(49, 0, %[buf]) +#else + stxvw4x(48, 0, %[buf]) +#endif + addi(%[buf], %[buf], 16) + + bdnz(loop) + +: [cc] "+b" (cc), [buf] "+b" (buf) +: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "ctr", "memory" + ); +} + +static void +cbcenc_256(const unsigned char *sk, + const unsigned char *iv, unsigned char *buf, size_t len) +{ + long cc; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + cc = 0; + asm volatile ( + + /* + * Load subkeys into v0..v14 + */ + lxvw4x(32, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(33, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(34, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(35, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(36, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(37, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(38, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(39, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(40, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(41, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(42, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(43, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(44, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(45, %[cc], %[sk]) + addi(%[cc], %[cc], 16) + lxvw4x(46, %[cc], %[sk]) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * Load IV into v16. + */ + lxvw4x(48, 0, %[iv]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Load next plaintext word and XOR with current IV. + */ + lxvw4x(49, 0, %[buf]) +#if BR_POWER8_LE + vperm(17, 17, 17, 15) +#endif + vxor(16, 16, 17) + + /* + * Encrypt the block. + */ + vxor(16, 16, 0) + vcipher(16, 16, 1) + vcipher(16, 16, 2) + vcipher(16, 16, 3) + vcipher(16, 16, 4) + vcipher(16, 16, 5) + vcipher(16, 16, 6) + vcipher(16, 16, 7) + vcipher(16, 16, 8) + vcipher(16, 16, 9) + vcipher(16, 16, 10) + vcipher(16, 16, 11) + vcipher(16, 16, 12) + vcipher(16, 16, 13) + vcipherlast(16, 16, 14) + + /* + * Store back result (with byteswap) + */ +#if BR_POWER8_LE + vperm(17, 16, 16, 15) + stxvw4x(49, 0, %[buf]) +#else + stxvw4x(48, 0, %[buf]) +#endif + addi(%[buf], %[buf], 16) + + bdnz(loop) + +: [cc] "+b" (cc), [buf] "+b" (buf) +: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "ctr", "memory" + ); +} + +/* see bearssl_block.h */ +void +br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + if (len > 0) { + switch (ctx->num_rounds) { + case 10: + cbcenc_128(ctx->skey.skni, iv, data, len); + break; + case 12: + cbcenc_192(ctx->skey.skni, iv, data, len); + break; + default: + cbcenc_256(ctx->skey.skni, iv, data, len); + break; + } + memcpy(iv, (unsigned char *)data + (len - 16), 16); + } +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = { + sizeof(br_aes_pwr8_cbcenc_keys), + 16, + 4, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_aes_pwr8_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_aes_pwr8_cbcenc_run +}; + +/* see bearssl_block.h */ +const br_block_cbcenc_class * +br_aes_pwr8_cbcenc_get_vtable(void) +{ + return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL; +} + +#else + +/* see bearssl_block.h */ +const br_block_cbcenc_class * +br_aes_pwr8_cbcenc_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_pwr8_ctr.c b/third_party/bearssl/src/aes_pwr8_ctr.c new file mode 100644 index 0000000..f5d20c0 --- /dev/null +++ b/third_party/bearssl/src/aes_pwr8_ctr.c @@ -0,0 +1,717 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_POWER_ASM_MACROS 1 +#include "inner.h" + +#if BR_POWER8 + +/* see bearssl_block.h */ +void +br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_pwr8_ctr_vtable; + ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len); +} + +static void +ctr_128(const unsigned char *sk, const unsigned char *ivbuf, + unsigned char *buf, size_t num_blocks) +{ + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + static const uint32_t ctrinc[] = { + 0, 0, 0, 4 + }; + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + + /* + * Load subkeys into v0..v10 + */ + lxvw4x(32, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(33, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(34, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(35, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(36, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(37, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(38, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(39, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(40, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(41, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(42, %[cc0], %[sk]) + li(%[cc0], 0) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * v28 = increment for IV counter. + */ + lxvw4x(60, 0, %[ctrinc]) + + /* + * Load IV into v16..v19 + */ + lxvw4x(48, %[cc0], %[ivbuf]) + lxvw4x(49, %[cc1], %[ivbuf]) + lxvw4x(50, %[cc2], %[ivbuf]) + lxvw4x(51, %[cc3], %[ivbuf]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Compute next IV into v24..v27 + */ + vadduwm(24, 16, 28) + vadduwm(25, 17, 28) + vadduwm(26, 18, 28) + vadduwm(27, 19, 28) + + /* + * Load next data blocks. We do this early on but we + * won't need them until IV encryption is done. + */ + lxvw4x(52, %[cc0], %[buf]) + lxvw4x(53, %[cc1], %[buf]) + lxvw4x(54, %[cc2], %[buf]) + lxvw4x(55, %[cc3], %[buf]) + + /* + * Encrypt the current IV. + */ + vxor(16, 16, 0) + vxor(17, 17, 0) + vxor(18, 18, 0) + vxor(19, 19, 0) + vcipher(16, 16, 1) + vcipher(17, 17, 1) + vcipher(18, 18, 1) + vcipher(19, 19, 1) + vcipher(16, 16, 2) + vcipher(17, 17, 2) + vcipher(18, 18, 2) + vcipher(19, 19, 2) + vcipher(16, 16, 3) + vcipher(17, 17, 3) + vcipher(18, 18, 3) + vcipher(19, 19, 3) + vcipher(16, 16, 4) + vcipher(17, 17, 4) + vcipher(18, 18, 4) + vcipher(19, 19, 4) + vcipher(16, 16, 5) + vcipher(17, 17, 5) + vcipher(18, 18, 5) + vcipher(19, 19, 5) + vcipher(16, 16, 6) + vcipher(17, 17, 6) + vcipher(18, 18, 6) + vcipher(19, 19, 6) + vcipher(16, 16, 7) + vcipher(17, 17, 7) + vcipher(18, 18, 7) + vcipher(19, 19, 7) + vcipher(16, 16, 8) + vcipher(17, 17, 8) + vcipher(18, 18, 8) + vcipher(19, 19, 8) + vcipher(16, 16, 9) + vcipher(17, 17, 9) + vcipher(18, 18, 9) + vcipher(19, 19, 9) + vcipherlast(16, 16, 10) + vcipherlast(17, 17, 10) + vcipherlast(18, 18, 10) + vcipherlast(19, 19, 10) + +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + + /* + * Load next plaintext word and XOR with encrypted IV. + */ + vxor(16, 20, 16) + vxor(17, 21, 17) + vxor(18, 22, 18) + vxor(19, 23, 19) + stxvw4x(48, %[cc0], %[buf]) + stxvw4x(49, %[cc1], %[buf]) + stxvw4x(50, %[cc2], %[buf]) + stxvw4x(51, %[cc3], %[buf]) + + addi(%[buf], %[buf], 64) + + /* + * Update IV. + */ + vand(16, 24, 24) + vand(17, 25, 25) + vand(18, 26, 26) + vand(19, 27, 27) + + bdnz(loop) + +: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3), + [buf] "+b" (buf) +: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2), + [ctrinc] "b" (ctrinc) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +static void +ctr_192(const unsigned char *sk, const unsigned char *ivbuf, + unsigned char *buf, size_t num_blocks) +{ + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + static const uint32_t ctrinc[] = { + 0, 0, 0, 4 + }; + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + + /* + * Load subkeys into v0..v12 + */ + lxvw4x(32, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(33, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(34, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(35, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(36, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(37, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(38, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(39, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(40, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(41, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(42, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(43, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(44, %[cc0], %[sk]) + li(%[cc0], 0) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * v28 = increment for IV counter. + */ + lxvw4x(60, 0, %[ctrinc]) + + /* + * Load IV into v16..v19 + */ + lxvw4x(48, %[cc0], %[ivbuf]) + lxvw4x(49, %[cc1], %[ivbuf]) + lxvw4x(50, %[cc2], %[ivbuf]) + lxvw4x(51, %[cc3], %[ivbuf]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Compute next IV into v24..v27 + */ + vadduwm(24, 16, 28) + vadduwm(25, 17, 28) + vadduwm(26, 18, 28) + vadduwm(27, 19, 28) + + /* + * Load next data blocks. We do this early on but we + * won't need them until IV encryption is done. + */ + lxvw4x(52, %[cc0], %[buf]) + lxvw4x(53, %[cc1], %[buf]) + lxvw4x(54, %[cc2], %[buf]) + lxvw4x(55, %[cc3], %[buf]) + + /* + * Encrypt the current IV. + */ + vxor(16, 16, 0) + vxor(17, 17, 0) + vxor(18, 18, 0) + vxor(19, 19, 0) + vcipher(16, 16, 1) + vcipher(17, 17, 1) + vcipher(18, 18, 1) + vcipher(19, 19, 1) + vcipher(16, 16, 2) + vcipher(17, 17, 2) + vcipher(18, 18, 2) + vcipher(19, 19, 2) + vcipher(16, 16, 3) + vcipher(17, 17, 3) + vcipher(18, 18, 3) + vcipher(19, 19, 3) + vcipher(16, 16, 4) + vcipher(17, 17, 4) + vcipher(18, 18, 4) + vcipher(19, 19, 4) + vcipher(16, 16, 5) + vcipher(17, 17, 5) + vcipher(18, 18, 5) + vcipher(19, 19, 5) + vcipher(16, 16, 6) + vcipher(17, 17, 6) + vcipher(18, 18, 6) + vcipher(19, 19, 6) + vcipher(16, 16, 7) + vcipher(17, 17, 7) + vcipher(18, 18, 7) + vcipher(19, 19, 7) + vcipher(16, 16, 8) + vcipher(17, 17, 8) + vcipher(18, 18, 8) + vcipher(19, 19, 8) + vcipher(16, 16, 9) + vcipher(17, 17, 9) + vcipher(18, 18, 9) + vcipher(19, 19, 9) + vcipher(16, 16, 10) + vcipher(17, 17, 10) + vcipher(18, 18, 10) + vcipher(19, 19, 10) + vcipher(16, 16, 11) + vcipher(17, 17, 11) + vcipher(18, 18, 11) + vcipher(19, 19, 11) + vcipherlast(16, 16, 12) + vcipherlast(17, 17, 12) + vcipherlast(18, 18, 12) + vcipherlast(19, 19, 12) + +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + + /* + * Load next plaintext word and XOR with encrypted IV. + */ + vxor(16, 20, 16) + vxor(17, 21, 17) + vxor(18, 22, 18) + vxor(19, 23, 19) + stxvw4x(48, %[cc0], %[buf]) + stxvw4x(49, %[cc1], %[buf]) + stxvw4x(50, %[cc2], %[buf]) + stxvw4x(51, %[cc3], %[buf]) + + addi(%[buf], %[buf], 64) + + /* + * Update IV. + */ + vand(16, 24, 24) + vand(17, 25, 25) + vand(18, 26, 26) + vand(19, 27, 27) + + bdnz(loop) + +: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3), + [buf] "+b" (buf) +: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2), + [ctrinc] "b" (ctrinc) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +static void +ctr_256(const unsigned char *sk, const unsigned char *ivbuf, + unsigned char *buf, size_t num_blocks) +{ + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + static const uint32_t ctrinc[] = { + 0, 0, 0, 4 + }; + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + + /* + * Load subkeys into v0..v14 + */ + lxvw4x(32, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(33, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(34, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(35, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(36, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(37, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(38, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(39, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(40, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(41, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(42, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(43, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(44, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(45, %[cc0], %[sk]) + addi(%[cc0], %[cc0], 16) + lxvw4x(46, %[cc0], %[sk]) + li(%[cc0], 0) + +#if BR_POWER8_LE + /* + * v15 = constant for byteswapping words + */ + lxvw4x(47, 0, %[idx2be]) +#endif + /* + * v28 = increment for IV counter. + */ + lxvw4x(60, 0, %[ctrinc]) + + /* + * Load IV into v16..v19 + */ + lxvw4x(48, %[cc0], %[ivbuf]) + lxvw4x(49, %[cc1], %[ivbuf]) + lxvw4x(50, %[cc2], %[ivbuf]) + lxvw4x(51, %[cc3], %[ivbuf]) +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + + mtctr(%[num_blocks]) + label(loop) + /* + * Compute next IV into v24..v27 + */ + vadduwm(24, 16, 28) + vadduwm(25, 17, 28) + vadduwm(26, 18, 28) + vadduwm(27, 19, 28) + + /* + * Load next data blocks. We do this early on but we + * won't need them until IV encryption is done. + */ + lxvw4x(52, %[cc0], %[buf]) + lxvw4x(53, %[cc1], %[buf]) + lxvw4x(54, %[cc2], %[buf]) + lxvw4x(55, %[cc3], %[buf]) + + /* + * Encrypt the current IV. + */ + vxor(16, 16, 0) + vxor(17, 17, 0) + vxor(18, 18, 0) + vxor(19, 19, 0) + vcipher(16, 16, 1) + vcipher(17, 17, 1) + vcipher(18, 18, 1) + vcipher(19, 19, 1) + vcipher(16, 16, 2) + vcipher(17, 17, 2) + vcipher(18, 18, 2) + vcipher(19, 19, 2) + vcipher(16, 16, 3) + vcipher(17, 17, 3) + vcipher(18, 18, 3) + vcipher(19, 19, 3) + vcipher(16, 16, 4) + vcipher(17, 17, 4) + vcipher(18, 18, 4) + vcipher(19, 19, 4) + vcipher(16, 16, 5) + vcipher(17, 17, 5) + vcipher(18, 18, 5) + vcipher(19, 19, 5) + vcipher(16, 16, 6) + vcipher(17, 17, 6) + vcipher(18, 18, 6) + vcipher(19, 19, 6) + vcipher(16, 16, 7) + vcipher(17, 17, 7) + vcipher(18, 18, 7) + vcipher(19, 19, 7) + vcipher(16, 16, 8) + vcipher(17, 17, 8) + vcipher(18, 18, 8) + vcipher(19, 19, 8) + vcipher(16, 16, 9) + vcipher(17, 17, 9) + vcipher(18, 18, 9) + vcipher(19, 19, 9) + vcipher(16, 16, 10) + vcipher(17, 17, 10) + vcipher(18, 18, 10) + vcipher(19, 19, 10) + vcipher(16, 16, 11) + vcipher(17, 17, 11) + vcipher(18, 18, 11) + vcipher(19, 19, 11) + vcipher(16, 16, 12) + vcipher(17, 17, 12) + vcipher(18, 18, 12) + vcipher(19, 19, 12) + vcipher(16, 16, 13) + vcipher(17, 17, 13) + vcipher(18, 18, 13) + vcipher(19, 19, 13) + vcipherlast(16, 16, 14) + vcipherlast(17, 17, 14) + vcipherlast(18, 18, 14) + vcipherlast(19, 19, 14) + +#if BR_POWER8_LE + vperm(16, 16, 16, 15) + vperm(17, 17, 17, 15) + vperm(18, 18, 18, 15) + vperm(19, 19, 19, 15) +#endif + + /* + * Load next plaintext word and XOR with encrypted IV. + */ + vxor(16, 20, 16) + vxor(17, 21, 17) + vxor(18, 22, 18) + vxor(19, 23, 19) + stxvw4x(48, %[cc0], %[buf]) + stxvw4x(49, %[cc1], %[buf]) + stxvw4x(50, %[cc2], %[buf]) + stxvw4x(51, %[cc3], %[buf]) + + addi(%[buf], %[buf], 64) + + /* + * Update IV. + */ + vand(16, 24, 24) + vand(17, 25, 25) + vand(18, 26, 26) + vand(19, 27, 27) + + bdnz(loop) + +: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3), + [buf] "+b" (buf) +: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2), + [ctrinc] "b" (ctrinc) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +/* see bearssl_block.h */ +uint32_t +br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + unsigned char ivbuf[64]; + + buf = data; + memcpy(ivbuf + 0, iv, 12); + memcpy(ivbuf + 16, iv, 12); + memcpy(ivbuf + 32, iv, 12); + memcpy(ivbuf + 48, iv, 12); + if (len >= 64) { + br_enc32be(ivbuf + 12, cc + 0); + br_enc32be(ivbuf + 28, cc + 1); + br_enc32be(ivbuf + 44, cc + 2); + br_enc32be(ivbuf + 60, cc + 3); + switch (ctx->num_rounds) { + case 10: + ctr_128(ctx->skey.skni, ivbuf, buf, + (len >> 4) & ~(size_t)3); + break; + case 12: + ctr_192(ctx->skey.skni, ivbuf, buf, + (len >> 4) & ~(size_t)3); + break; + default: + ctr_256(ctx->skey.skni, ivbuf, buf, + (len >> 4) & ~(size_t)3); + break; + } + cc += (len >> 4) & ~(size_t)3; + buf += len & ~(size_t)63; + len &= 63; + } + if (len > 0) { + unsigned char tmp[64]; + + memcpy(tmp, buf, len); + memset(tmp + len, 0, (sizeof tmp) - len); + br_enc32be(ivbuf + 12, cc + 0); + br_enc32be(ivbuf + 28, cc + 1); + br_enc32be(ivbuf + 44, cc + 2); + br_enc32be(ivbuf + 60, cc + 3); + switch (ctx->num_rounds) { + case 10: + ctr_128(ctx->skey.skni, ivbuf, tmp, 4); + break; + case 12: + ctr_192(ctx->skey.skni, ivbuf, tmp, 4); + break; + default: + ctr_256(ctx->skey.skni, ivbuf, tmp, 4); + break; + } + memcpy(buf, tmp, len); + cc += (len + 15) >> 4; + } + return cc; +} + +/* see bearssl_block.h */ +const br_block_ctr_class br_aes_pwr8_ctr_vtable = { + sizeof(br_aes_pwr8_ctr_keys), + 16, + 4, + (void (*)(const br_block_ctr_class **, const void *, size_t)) + &br_aes_pwr8_ctr_init, + (uint32_t (*)(const br_block_ctr_class *const *, + const void *, uint32_t, void *, size_t)) + &br_aes_pwr8_ctr_run +}; + +/* see bearssl_block.h */ +const br_block_ctr_class * +br_aes_pwr8_ctr_get_vtable(void) +{ + return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL; +} + +#else + +/* see bearssl_block.h */ +const br_block_ctr_class * +br_aes_pwr8_ctr_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_pwr8_ctrcbc.c b/third_party/bearssl/src/aes_pwr8_ctrcbc.c new file mode 100644 index 0000000..a67d30b --- /dev/null +++ b/third_party/bearssl/src/aes_pwr8_ctrcbc.c @@ -0,0 +1,946 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_POWER_ASM_MACROS 1 +#include "inner.h" + +#if BR_POWER8 + +/* see bearssl_block.h */ +const br_block_ctrcbc_class * +br_aes_pwr8_ctrcbc_get_vtable(void) +{ + return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL; +} + +/* see bearssl_block.h */ +void +br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_pwr8_ctrcbc_vtable; + ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len); +} + +/* + * Register conventions for CTR + CBC-MAC: + * + * AES subkeys are in registers 0 to 10/12/14 (depending on keys size) + * Register v15 contains the byteswap index register (little-endian only) + * Register v16 contains the CTR counter value + * Register v17 contains the CBC-MAC current value + * Registers v18 to v27 are scratch + * Counter increment uses v28, v29 and v30 + * + * For CTR alone: + * + * AES subkeys are in registers 0 to 10/12/14 (depending on keys size) + * Register v15 contains the byteswap index register (little-endian only) + * Registers v16 to v19 contain the CTR counter values (four blocks) + * Registers v20 to v27 are scratch + * Counter increment uses v28, v29 and v30 + */ + +#define LOAD_SUBKEYS_128 \ + lxvw4x(32, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(33, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(34, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(35, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(36, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(37, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(38, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(39, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(40, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(41, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(42, %[cc], %[sk]) + +#define LOAD_SUBKEYS_192 \ + LOAD_SUBKEYS_128 \ + addi(%[cc], %[cc], 16) \ + lxvw4x(43, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(44, %[cc], %[sk]) + +#define LOAD_SUBKEYS_256 \ + LOAD_SUBKEYS_192 \ + addi(%[cc], %[cc], 16) \ + lxvw4x(45, %[cc], %[sk]) \ + addi(%[cc], %[cc], 16) \ + lxvw4x(46, %[cc], %[sk]) + +#define BLOCK_ENCRYPT_128(x) \ + vxor(x, x, 0) \ + vcipher(x, x, 1) \ + vcipher(x, x, 2) \ + vcipher(x, x, 3) \ + vcipher(x, x, 4) \ + vcipher(x, x, 5) \ + vcipher(x, x, 6) \ + vcipher(x, x, 7) \ + vcipher(x, x, 8) \ + vcipher(x, x, 9) \ + vcipherlast(x, x, 10) + +#define BLOCK_ENCRYPT_192(x) \ + vxor(x, x, 0) \ + vcipher(x, x, 1) \ + vcipher(x, x, 2) \ + vcipher(x, x, 3) \ + vcipher(x, x, 4) \ + vcipher(x, x, 5) \ + vcipher(x, x, 6) \ + vcipher(x, x, 7) \ + vcipher(x, x, 8) \ + vcipher(x, x, 9) \ + vcipher(x, x, 10) \ + vcipher(x, x, 11) \ + vcipherlast(x, x, 12) + +#define BLOCK_ENCRYPT_256(x) \ + vxor(x, x, 0) \ + vcipher(x, x, 1) \ + vcipher(x, x, 2) \ + vcipher(x, x, 3) \ + vcipher(x, x, 4) \ + vcipher(x, x, 5) \ + vcipher(x, x, 6) \ + vcipher(x, x, 7) \ + vcipher(x, x, 8) \ + vcipher(x, x, 9) \ + vcipher(x, x, 10) \ + vcipher(x, x, 11) \ + vcipher(x, x, 12) \ + vcipher(x, x, 13) \ + vcipherlast(x, x, 14) + +#define BLOCK_ENCRYPT_X2_128(x, y) \ + vxor(x, x, 0) \ + vxor(y, y, 0) \ + vcipher(x, x, 1) \ + vcipher(y, y, 1) \ + vcipher(x, x, 2) \ + vcipher(y, y, 2) \ + vcipher(x, x, 3) \ + vcipher(y, y, 3) \ + vcipher(x, x, 4) \ + vcipher(y, y, 4) \ + vcipher(x, x, 5) \ + vcipher(y, y, 5) \ + vcipher(x, x, 6) \ + vcipher(y, y, 6) \ + vcipher(x, x, 7) \ + vcipher(y, y, 7) \ + vcipher(x, x, 8) \ + vcipher(y, y, 8) \ + vcipher(x, x, 9) \ + vcipher(y, y, 9) \ + vcipherlast(x, x, 10) \ + vcipherlast(y, y, 10) + +#define BLOCK_ENCRYPT_X2_192(x, y) \ + vxor(x, x, 0) \ + vxor(y, y, 0) \ + vcipher(x, x, 1) \ + vcipher(y, y, 1) \ + vcipher(x, x, 2) \ + vcipher(y, y, 2) \ + vcipher(x, x, 3) \ + vcipher(y, y, 3) \ + vcipher(x, x, 4) \ + vcipher(y, y, 4) \ + vcipher(x, x, 5) \ + vcipher(y, y, 5) \ + vcipher(x, x, 6) \ + vcipher(y, y, 6) \ + vcipher(x, x, 7) \ + vcipher(y, y, 7) \ + vcipher(x, x, 8) \ + vcipher(y, y, 8) \ + vcipher(x, x, 9) \ + vcipher(y, y, 9) \ + vcipher(x, x, 10) \ + vcipher(y, y, 10) \ + vcipher(x, x, 11) \ + vcipher(y, y, 11) \ + vcipherlast(x, x, 12) \ + vcipherlast(y, y, 12) + +#define BLOCK_ENCRYPT_X2_256(x, y) \ + vxor(x, x, 0) \ + vxor(y, y, 0) \ + vcipher(x, x, 1) \ + vcipher(y, y, 1) \ + vcipher(x, x, 2) \ + vcipher(y, y, 2) \ + vcipher(x, x, 3) \ + vcipher(y, y, 3) \ + vcipher(x, x, 4) \ + vcipher(y, y, 4) \ + vcipher(x, x, 5) \ + vcipher(y, y, 5) \ + vcipher(x, x, 6) \ + vcipher(y, y, 6) \ + vcipher(x, x, 7) \ + vcipher(y, y, 7) \ + vcipher(x, x, 8) \ + vcipher(y, y, 8) \ + vcipher(x, x, 9) \ + vcipher(y, y, 9) \ + vcipher(x, x, 10) \ + vcipher(y, y, 10) \ + vcipher(x, x, 11) \ + vcipher(y, y, 11) \ + vcipher(x, x, 12) \ + vcipher(y, y, 12) \ + vcipher(x, x, 13) \ + vcipher(y, y, 13) \ + vcipherlast(x, x, 14) \ + vcipherlast(y, y, 14) + +#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \ + vxor(x0, x0, 0) \ + vxor(x1, x1, 0) \ + vxor(x2, x2, 0) \ + vxor(x3, x3, 0) \ + vcipher(x0, x0, 1) \ + vcipher(x1, x1, 1) \ + vcipher(x2, x2, 1) \ + vcipher(x3, x3, 1) \ + vcipher(x0, x0, 2) \ + vcipher(x1, x1, 2) \ + vcipher(x2, x2, 2) \ + vcipher(x3, x3, 2) \ + vcipher(x0, x0, 3) \ + vcipher(x1, x1, 3) \ + vcipher(x2, x2, 3) \ + vcipher(x3, x3, 3) \ + vcipher(x0, x0, 4) \ + vcipher(x1, x1, 4) \ + vcipher(x2, x2, 4) \ + vcipher(x3, x3, 4) \ + vcipher(x0, x0, 5) \ + vcipher(x1, x1, 5) \ + vcipher(x2, x2, 5) \ + vcipher(x3, x3, 5) \ + vcipher(x0, x0, 6) \ + vcipher(x1, x1, 6) \ + vcipher(x2, x2, 6) \ + vcipher(x3, x3, 6) \ + vcipher(x0, x0, 7) \ + vcipher(x1, x1, 7) \ + vcipher(x2, x2, 7) \ + vcipher(x3, x3, 7) \ + vcipher(x0, x0, 8) \ + vcipher(x1, x1, 8) \ + vcipher(x2, x2, 8) \ + vcipher(x3, x3, 8) \ + vcipher(x0, x0, 9) \ + vcipher(x1, x1, 9) \ + vcipher(x2, x2, 9) \ + vcipher(x3, x3, 9) \ + vcipherlast(x0, x0, 10) \ + vcipherlast(x1, x1, 10) \ + vcipherlast(x2, x2, 10) \ + vcipherlast(x3, x3, 10) + +#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \ + vxor(x0, x0, 0) \ + vxor(x1, x1, 0) \ + vxor(x2, x2, 0) \ + vxor(x3, x3, 0) \ + vcipher(x0, x0, 1) \ + vcipher(x1, x1, 1) \ + vcipher(x2, x2, 1) \ + vcipher(x3, x3, 1) \ + vcipher(x0, x0, 2) \ + vcipher(x1, x1, 2) \ + vcipher(x2, x2, 2) \ + vcipher(x3, x3, 2) \ + vcipher(x0, x0, 3) \ + vcipher(x1, x1, 3) \ + vcipher(x2, x2, 3) \ + vcipher(x3, x3, 3) \ + vcipher(x0, x0, 4) \ + vcipher(x1, x1, 4) \ + vcipher(x2, x2, 4) \ + vcipher(x3, x3, 4) \ + vcipher(x0, x0, 5) \ + vcipher(x1, x1, 5) \ + vcipher(x2, x2, 5) \ + vcipher(x3, x3, 5) \ + vcipher(x0, x0, 6) \ + vcipher(x1, x1, 6) \ + vcipher(x2, x2, 6) \ + vcipher(x3, x3, 6) \ + vcipher(x0, x0, 7) \ + vcipher(x1, x1, 7) \ + vcipher(x2, x2, 7) \ + vcipher(x3, x3, 7) \ + vcipher(x0, x0, 8) \ + vcipher(x1, x1, 8) \ + vcipher(x2, x2, 8) \ + vcipher(x3, x3, 8) \ + vcipher(x0, x0, 9) \ + vcipher(x1, x1, 9) \ + vcipher(x2, x2, 9) \ + vcipher(x3, x3, 9) \ + vcipher(x0, x0, 10) \ + vcipher(x1, x1, 10) \ + vcipher(x2, x2, 10) \ + vcipher(x3, x3, 10) \ + vcipher(x0, x0, 11) \ + vcipher(x1, x1, 11) \ + vcipher(x2, x2, 11) \ + vcipher(x3, x3, 11) \ + vcipherlast(x0, x0, 12) \ + vcipherlast(x1, x1, 12) \ + vcipherlast(x2, x2, 12) \ + vcipherlast(x3, x3, 12) + +#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \ + vxor(x0, x0, 0) \ + vxor(x1, x1, 0) \ + vxor(x2, x2, 0) \ + vxor(x3, x3, 0) \ + vcipher(x0, x0, 1) \ + vcipher(x1, x1, 1) \ + vcipher(x2, x2, 1) \ + vcipher(x3, x3, 1) \ + vcipher(x0, x0, 2) \ + vcipher(x1, x1, 2) \ + vcipher(x2, x2, 2) \ + vcipher(x3, x3, 2) \ + vcipher(x0, x0, 3) \ + vcipher(x1, x1, 3) \ + vcipher(x2, x2, 3) \ + vcipher(x3, x3, 3) \ + vcipher(x0, x0, 4) \ + vcipher(x1, x1, 4) \ + vcipher(x2, x2, 4) \ + vcipher(x3, x3, 4) \ + vcipher(x0, x0, 5) \ + vcipher(x1, x1, 5) \ + vcipher(x2, x2, 5) \ + vcipher(x3, x3, 5) \ + vcipher(x0, x0, 6) \ + vcipher(x1, x1, 6) \ + vcipher(x2, x2, 6) \ + vcipher(x3, x3, 6) \ + vcipher(x0, x0, 7) \ + vcipher(x1, x1, 7) \ + vcipher(x2, x2, 7) \ + vcipher(x3, x3, 7) \ + vcipher(x0, x0, 8) \ + vcipher(x1, x1, 8) \ + vcipher(x2, x2, 8) \ + vcipher(x3, x3, 8) \ + vcipher(x0, x0, 9) \ + vcipher(x1, x1, 9) \ + vcipher(x2, x2, 9) \ + vcipher(x3, x3, 9) \ + vcipher(x0, x0, 10) \ + vcipher(x1, x1, 10) \ + vcipher(x2, x2, 10) \ + vcipher(x3, x3, 10) \ + vcipher(x0, x0, 11) \ + vcipher(x1, x1, 11) \ + vcipher(x2, x2, 11) \ + vcipher(x3, x3, 11) \ + vcipher(x0, x0, 12) \ + vcipher(x1, x1, 12) \ + vcipher(x2, x2, 12) \ + vcipher(x3, x3, 12) \ + vcipher(x0, x0, 13) \ + vcipher(x1, x1, 13) \ + vcipher(x2, x2, 13) \ + vcipher(x3, x3, 13) \ + vcipherlast(x0, x0, 14) \ + vcipherlast(x1, x1, 14) \ + vcipherlast(x2, x2, 14) \ + vcipherlast(x3, x3, 14) + +#if BR_POWER8_LE +static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C +}; +#define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be]) +#define BYTESWAP(x) vperm(x, x, x, 15) +#define BYTESWAPX(d, s) vperm(d, s, s, 15) +#define BYTESWAP_REG , [idx2be] "b" (idx2be) +#else +#define BYTESWAP_INIT +#define BYTESWAP(x) +#define BYTESWAPX(d, s) vand(d, s, s) +#define BYTESWAP_REG +#endif + +static const uint32_t ctrinc[] = { + 0, 0, 0, 1 +}; +static const uint32_t ctrinc_x4[] = { + 0, 0, 0, 4 +}; +#define INCR_128_INIT lxvw4x(60, 0, %[ctrinc]) +#define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4]) +#define INCR_128(d, s) \ + vaddcuw(29, s, 28) \ + vadduwm(d, s, 28) \ + vsldoi(30, 29, 29, 4) \ + vaddcuw(29, d, 30) \ + vadduwm(d, d, 30) \ + vsldoi(30, 29, 29, 4) \ + vaddcuw(29, d, 30) \ + vadduwm(d, d, 30) \ + vsldoi(30, 29, 29, 4) \ + vadduwm(d, d, 30) + +#define MKCTR(size) \ +static void \ +ctr_ ## size(const unsigned char *sk, \ + unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \ +{ \ + long cc, cc0, cc1, cc2, cc3; \ + \ + cc = 0; \ + cc0 = 0; \ + cc1 = 16; \ + cc2 = 32; \ + cc3 = 48; \ + asm volatile ( \ + \ + /* \ + * Load subkeys into v0..v10 \ + */ \ + LOAD_SUBKEYS_ ## size \ + li(%[cc], 0) \ + \ + BYTESWAP_INIT \ + INCR_128_X4_INIT \ + \ + /* \ + * Load current CTR counters into v16 to v19. \ + */ \ + lxvw4x(48, %[cc0], %[ctrbuf]) \ + lxvw4x(49, %[cc1], %[ctrbuf]) \ + lxvw4x(50, %[cc2], %[ctrbuf]) \ + lxvw4x(51, %[cc3], %[ctrbuf]) \ + BYTESWAP(16) \ + BYTESWAP(17) \ + BYTESWAP(18) \ + BYTESWAP(19) \ + \ + mtctr(%[num_blocks_x4]) \ + \ + label(loop) \ + /* \ + * Compute next counter values into v20..v23. \ + */ \ + INCR_128(20, 16) \ + INCR_128(21, 17) \ + INCR_128(22, 18) \ + INCR_128(23, 19) \ + \ + /* \ + * Encrypt counter values and XOR into next data blocks. \ + */ \ + lxvw4x(56, %[cc0], %[buf]) \ + lxvw4x(57, %[cc1], %[buf]) \ + lxvw4x(58, %[cc2], %[buf]) \ + lxvw4x(59, %[cc3], %[buf]) \ + BYTESWAP(24) \ + BYTESWAP(25) \ + BYTESWAP(26) \ + BYTESWAP(27) \ + BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \ + vxor(16, 16, 24) \ + vxor(17, 17, 25) \ + vxor(18, 18, 26) \ + vxor(19, 19, 27) \ + BYTESWAP(16) \ + BYTESWAP(17) \ + BYTESWAP(18) \ + BYTESWAP(19) \ + stxvw4x(48, %[cc0], %[buf]) \ + stxvw4x(49, %[cc1], %[buf]) \ + stxvw4x(50, %[cc2], %[buf]) \ + stxvw4x(51, %[cc3], %[buf]) \ + \ + /* \ + * Update counters and data pointer. \ + */ \ + vand(16, 20, 20) \ + vand(17, 21, 21) \ + vand(18, 22, 22) \ + vand(19, 23, 23) \ + addi(%[buf], %[buf], 64) \ + \ + bdnz(loop) \ + \ + /* \ + * Write back new counter values. \ + */ \ + BYTESWAP(16) \ + BYTESWAP(17) \ + BYTESWAP(18) \ + BYTESWAP(19) \ + stxvw4x(48, %[cc0], %[ctrbuf]) \ + stxvw4x(49, %[cc1], %[ctrbuf]) \ + stxvw4x(50, %[cc2], %[ctrbuf]) \ + stxvw4x(51, %[cc3], %[ctrbuf]) \ + \ +: [cc] "+b" (cc), [buf] "+b" (buf), \ + [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \ +: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \ + [num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \ + BYTESWAP_REG \ +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \ + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \ + "v30", "ctr", "memory" \ + ); \ +} + +MKCTR(128) +MKCTR(192) +MKCTR(256) + +#define MKCBCMAC(size) \ +static void \ +cbcmac_ ## size(const unsigned char *sk, \ + unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \ +{ \ + long cc; \ + \ + cc = 0; \ + asm volatile ( \ + \ + /* \ + * Load subkeys into v0..v10 \ + */ \ + LOAD_SUBKEYS_ ## size \ + li(%[cc], 0) \ + \ + BYTESWAP_INIT \ + \ + /* \ + * Load current CBC-MAC value into v16. \ + */ \ + lxvw4x(48, %[cc], %[cbcmac]) \ + BYTESWAP(16) \ + \ + mtctr(%[num_blocks]) \ + \ + label(loop) \ + /* \ + * Load next block, XOR into current CBC-MAC value, \ + * and then encrypt it. \ + */ \ + lxvw4x(49, %[cc], %[buf]) \ + BYTESWAP(17) \ + vxor(16, 16, 17) \ + BLOCK_ENCRYPT_ ## size(16) \ + addi(%[buf], %[buf], 16) \ + \ + bdnz(loop) \ + \ + /* \ + * Write back new CBC-MAC value. \ + */ \ + BYTESWAP(16) \ + stxvw4x(48, %[cc], %[cbcmac]) \ + \ +: [cc] "+b" (cc), [buf] "+b" (buf) \ +: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \ + BYTESWAP_REG \ +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \ + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \ + "v30", "ctr", "memory" \ + ); \ +} + +MKCBCMAC(128) +MKCBCMAC(192) +MKCBCMAC(256) + +#define MKENCRYPT(size) \ +static void \ +ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \ + unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \ + size_t num_blocks) \ +{ \ + long cc; \ + \ + cc = 0; \ + asm volatile ( \ + \ + /* \ + * Load subkeys into v0..v10 \ + */ \ + LOAD_SUBKEYS_ ## size \ + li(%[cc], 0) \ + \ + BYTESWAP_INIT \ + INCR_128_INIT \ + \ + /* \ + * Load current CTR counter into v16, and current \ + * CBC-MAC IV into v17. \ + */ \ + lxvw4x(48, %[cc], %[ctr]) \ + lxvw4x(49, %[cc], %[cbcmac]) \ + BYTESWAP(16) \ + BYTESWAP(17) \ + \ + /* \ + * At each iteration, we do two parallel encryption: \ + * - new counter value for encryption of the next block; \ + * - CBC-MAC over the previous encrypted block. \ + * Thus, each plaintext block implies two AES instances, \ + * over two successive iterations. This requires a single \ + * counter encryption before the loop, and a single \ + * CBC-MAC encryption after the loop. \ + */ \ + \ + /* \ + * Encrypt first block (into v20). \ + */ \ + lxvw4x(52, %[cc], %[buf]) \ + BYTESWAP(20) \ + INCR_128(22, 16) \ + BLOCK_ENCRYPT_ ## size(16) \ + vxor(20, 20, 16) \ + BYTESWAPX(21, 20) \ + stxvw4x(53, %[cc], %[buf]) \ + vand(16, 22, 22) \ + addi(%[buf], %[buf], 16) \ + \ + /* \ + * Load loop counter; skip the loop if there is only \ + * one block in total (already handled by the boundary \ + * conditions). \ + */ \ + mtctr(%[num_blocks]) \ + bdz(fastexit) \ + \ + label(loop) \ + /* \ + * Upon loop entry: \ + * v16 counter value for next block \ + * v17 current CBC-MAC value \ + * v20 encrypted previous block \ + */ \ + vxor(17, 17, 20) \ + INCR_128(22, 16) \ + lxvw4x(52, %[cc], %[buf]) \ + BYTESWAP(20) \ + BLOCK_ENCRYPT_X2_ ## size(16, 17) \ + vxor(20, 20, 16) \ + BYTESWAPX(21, 20) \ + stxvw4x(53, %[cc], %[buf]) \ + addi(%[buf], %[buf], 16) \ + vand(16, 22, 22) \ + \ + bdnz(loop) \ + \ + label(fastexit) \ + vxor(17, 17, 20) \ + BLOCK_ENCRYPT_ ## size(17) \ + BYTESWAP(16) \ + BYTESWAP(17) \ + stxvw4x(48, %[cc], %[ctr]) \ + stxvw4x(49, %[cc], %[cbcmac]) \ + \ +: [cc] "+b" (cc), [buf] "+b" (buf) \ +: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \ + [num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \ + BYTESWAP_REG \ +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \ + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \ + "v30", "ctr", "memory" \ + ); \ +} + +MKENCRYPT(128) +MKENCRYPT(192) +MKENCRYPT(256) + +#define MKDECRYPT(size) \ +static void \ +ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \ + unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \ + size_t num_blocks) \ +{ \ + long cc; \ + \ + cc = 0; \ + asm volatile ( \ + \ + /* \ + * Load subkeys into v0..v10 \ + */ \ + LOAD_SUBKEYS_ ## size \ + li(%[cc], 0) \ + \ + BYTESWAP_INIT \ + INCR_128_INIT \ + \ + /* \ + * Load current CTR counter into v16, and current \ + * CBC-MAC IV into v17. \ + */ \ + lxvw4x(48, %[cc], %[ctr]) \ + lxvw4x(49, %[cc], %[cbcmac]) \ + BYTESWAP(16) \ + BYTESWAP(17) \ + \ + /* \ + * At each iteration, we do two parallel encryption: \ + * - new counter value for decryption of the next block; \ + * - CBC-MAC over the next encrypted block. \ + * Each iteration performs the two AES instances related \ + * to the current block; there is thus no need for some \ + * extra pre-loop and post-loop work as in encryption. \ + */ \ + \ + mtctr(%[num_blocks]) \ + \ + label(loop) \ + /* \ + * Upon loop entry: \ + * v16 counter value for next block \ + * v17 current CBC-MAC value \ + */ \ + lxvw4x(52, %[cc], %[buf]) \ + BYTESWAP(20) \ + vxor(17, 17, 20) \ + INCR_128(22, 16) \ + BLOCK_ENCRYPT_X2_ ## size(16, 17) \ + vxor(20, 20, 16) \ + BYTESWAPX(21, 20) \ + stxvw4x(53, %[cc], %[buf]) \ + addi(%[buf], %[buf], 16) \ + vand(16, 22, 22) \ + \ + bdnz(loop) \ + \ + /* \ + * Store back counter and CBC-MAC value. \ + */ \ + BYTESWAP(16) \ + BYTESWAP(17) \ + stxvw4x(48, %[cc], %[ctr]) \ + stxvw4x(49, %[cc], %[cbcmac]) \ + \ +: [cc] "+b" (cc), [buf] "+b" (buf) \ +: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \ + [num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \ + BYTESWAP_REG \ +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \ + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \ + "v30", "ctr", "memory" \ + ); \ +} + +MKDECRYPT(128) +MKDECRYPT(192) +MKDECRYPT(256) + +/* see bearssl_block.h */ +void +br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + if (len == 0) { + return; + } + switch (ctx->num_rounds) { + case 10: + ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4); + break; + case 12: + ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4); + break; + default: + ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4); + break; + } +} + +/* see bearssl_block.h */ +void +br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + if (len == 0) { + return; + } + switch (ctx->num_rounds) { + case 10: + ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4); + break; + case 12: + ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4); + break; + default: + ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4); + break; + } +} + +static inline void +incr_ctr(void *dst, const void *src) +{ + uint64_t hi, lo; + + hi = br_dec64be(src); + lo = br_dec64be((const unsigned char *)src + 8); + lo ++; + hi += ((lo | -lo) >> 63) ^ (uint64_t)1; + br_enc64be(dst, hi); + br_enc64be((unsigned char *)dst + 8, lo); +} + +/* see bearssl_block.h */ +void +br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char ctrbuf[64]; + + memcpy(ctrbuf, ctr, 16); + incr_ctr(ctrbuf + 16, ctrbuf); + incr_ctr(ctrbuf + 32, ctrbuf + 16); + incr_ctr(ctrbuf + 48, ctrbuf + 32); + if (len >= 64) { + switch (ctx->num_rounds) { + case 10: + ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6); + break; + case 12: + ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6); + break; + default: + ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6); + break; + } + data = (unsigned char *)data + (len & ~(size_t)63); + len &= 63; + } + if (len > 0) { + unsigned char tmp[64]; + + if (len >= 32) { + if (len >= 48) { + memcpy(ctr, ctrbuf + 48, 16); + } else { + memcpy(ctr, ctrbuf + 32, 16); + } + } else { + if (len >= 16) { + memcpy(ctr, ctrbuf + 16, 16); + } + } + memcpy(tmp, data, len); + memset(tmp + len, 0, (sizeof tmp) - len); + switch (ctx->num_rounds) { + case 10: + ctr_128(ctx->skey.skni, ctrbuf, tmp, 1); + break; + case 12: + ctr_192(ctx->skey.skni, ctrbuf, tmp, 1); + break; + default: + ctr_256(ctx->skey.skni, ctrbuf, tmp, 1); + break; + } + memcpy(data, tmp, len); + } else { + memcpy(ctr, ctrbuf, 16); + } +} + +/* see bearssl_block.h */ +void +br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + if (len > 0) { + switch (ctx->num_rounds) { + case 10: + cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4); + break; + case 12: + cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4); + break; + default: + cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4); + break; + } + } +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = { + sizeof(br_aes_pwr8_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_pwr8_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_pwr8_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_pwr8_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_pwr8_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_pwr8_ctrcbc_mac +}; + +#else + +/* see bearssl_block.h */ +const br_block_ctrcbc_class * +br_aes_pwr8_ctrcbc_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_small_cbcdec.c b/third_party/bearssl/src/aes_small_cbcdec.c new file mode 100644 index 0000000..8567244 --- /dev/null +++ b/third_party/bearssl/src/aes_small_cbcdec.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_small_cbcdec_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + + ivbuf = iv; + buf = data; + while (len > 0) { + unsigned char tmp[16]; + int i; + + memcpy(tmp, buf, 16); + br_aes_small_decrypt(ctx->num_rounds, ctx->skey, buf); + for (i = 0; i < 16; i ++) { + buf[i] ^= ivbuf[i]; + } + memcpy(ivbuf, tmp, 16); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_aes_small_cbcdec_vtable = { + sizeof(br_aes_small_cbcdec_keys), + 16, + 4, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_aes_small_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_aes_small_cbcdec_run +}; diff --git a/third_party/bearssl/src/aes_small_cbcenc.c b/third_party/bearssl/src/aes_small_cbcenc.c new file mode 100644 index 0000000..0dc2910 --- /dev/null +++ b/third_party/bearssl/src/aes_small_cbcenc.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_small_cbcenc_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + + ivbuf = iv; + buf = data; + while (len > 0) { + int i; + + for (i = 0; i < 16; i ++) { + buf[i] ^= ivbuf[i]; + } + br_aes_small_encrypt(ctx->num_rounds, ctx->skey, buf); + memcpy(ivbuf, buf, 16); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_aes_small_cbcenc_vtable = { + sizeof(br_aes_small_cbcenc_keys), + 16, + 4, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_aes_small_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_aes_small_cbcenc_run +}; diff --git a/third_party/bearssl/src/aes_small_ctr.c b/third_party/bearssl/src/aes_small_ctr.c new file mode 100644 index 0000000..d5d371c --- /dev/null +++ b/third_party/bearssl/src/aes_small_ctr.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_small_ctr_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +uint32_t +br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + + buf = data; + while (len > 0) { + unsigned char tmp[16]; + + memcpy(tmp, iv, 12); + br_enc32be(tmp + 12, cc ++); + br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp); + if (len <= 16) { + xorbuf(buf, tmp, len); + break; + } + xorbuf(buf, tmp, 16); + buf += 16; + len -= 16; + } + return cc; +} + +/* see bearssl_block.h */ +const br_block_ctr_class br_aes_small_ctr_vtable = { + sizeof(br_aes_small_ctr_keys), + 16, + 4, + (void (*)(const br_block_ctr_class **, const void *, size_t)) + &br_aes_small_ctr_init, + (uint32_t (*)(const br_block_ctr_class *const *, + const void *, uint32_t, void *, size_t)) + &br_aes_small_ctr_run +}; diff --git a/third_party/bearssl/src/aes_small_ctrcbc.c b/third_party/bearssl/src/aes_small_ctrcbc.c new file mode 100644 index 0000000..2d6ba32 --- /dev/null +++ b/third_party/bearssl/src/aes_small_ctrcbc.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_small_ctrcbc_vtable; + ctx->num_rounds = br_aes_keysched(ctx->skey, key, len); +} + +static void +xorbuf(void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + *d ++ ^= *s ++; + } +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf, *bctr; + uint32_t cc0, cc1, cc2, cc3; + + buf = data; + bctr = ctr; + cc3 = br_dec32be(bctr + 0); + cc2 = br_dec32be(bctr + 4); + cc1 = br_dec32be(bctr + 8); + cc0 = br_dec32be(bctr + 12); + while (len > 0) { + unsigned char tmp[16]; + uint32_t carry; + + br_enc32be(tmp + 0, cc3); + br_enc32be(tmp + 4, cc2); + br_enc32be(tmp + 8, cc1); + br_enc32be(tmp + 12, cc0); + br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp); + xorbuf(buf, tmp, 16); + buf += 16; + len -= 16; + cc0 ++; + carry = (~(cc0 | -cc0)) >> 31; + cc1 += carry; + carry &= (~(cc1 | -cc1)) >> 31; + cc2 += carry; + carry &= (~(cc2 | -cc2)) >> 31; + cc3 += carry; + } + br_enc32be(bctr + 0, cc3); + br_enc32be(bctr + 4, cc2); + br_enc32be(bctr + 8, cc1); + br_enc32be(bctr + 12, cc0); +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + + buf = data; + while (len > 0) { + xorbuf(cbcmac, buf, 16); + br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac); + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_small_ctrcbc_ctr(ctx, ctr, data, len); + br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len); +} + +/* see bearssl_block.h */ +void +br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len); + br_aes_small_ctrcbc_ctr(ctx, ctr, data, len); +} + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = { + sizeof(br_aes_small_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_small_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_small_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_small_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_small_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_small_ctrcbc_mac +}; diff --git a/third_party/bearssl/src/aes_small_dec.c b/third_party/bearssl/src/aes_small_dec.c new file mode 100644 index 0000000..59dca8e --- /dev/null +++ b/third_party/bearssl/src/aes_small_dec.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Inverse S-box. + */ +static const unsigned char iS[] = { + 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, + 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, + 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32, + 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, + 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, + 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50, + 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, + 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, + 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, + 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41, + 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, + 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, + 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, + 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B, + 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, + 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, + 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, + 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D, + 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, + 0x55, 0x21, 0x0C, 0x7D +}; + +static void +add_round_key(unsigned *state, const uint32_t *skeys) +{ + int i; + + for (i = 0; i < 16; i += 4) { + uint32_t k; + + k = *skeys ++; + state[i + 0] ^= (unsigned)(k >> 24); + state[i + 1] ^= (unsigned)(k >> 16) & 0xFF; + state[i + 2] ^= (unsigned)(k >> 8) & 0xFF; + state[i + 3] ^= (unsigned)k & 0xFF; + } +} + +static void +inv_sub_bytes(unsigned *state) +{ + int i; + + for (i = 0; i < 16; i ++) { + state[i] = iS[state[i]]; + } +} + +static void +inv_shift_rows(unsigned *state) +{ + unsigned tmp; + + tmp = state[13]; + state[13] = state[9]; + state[9] = state[5]; + state[5] = state[1]; + state[1] = tmp; + + tmp = state[2]; + state[2] = state[10]; + state[10] = tmp; + tmp = state[6]; + state[6] = state[14]; + state[14] = tmp; + + tmp = state[3]; + state[3] = state[7]; + state[7] = state[11]; + state[11] = state[15]; + state[15] = tmp; +} + +static inline unsigned +gf256red(unsigned x) +{ + unsigned y; + + y = x >> 8; + return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 0xFF; +} + +static void +inv_mix_columns(unsigned *state) +{ + int i; + + for (i = 0; i < 16; i += 4) { + unsigned s0, s1, s2, s3; + unsigned t0, t1, t2, t3; + + s0 = state[i + 0]; + s1 = state[i + 1]; + s2 = state[i + 2]; + s3 = state[i + 3]; + t0 = (s0 << 1) ^ (s0 << 2) ^ (s0 << 3) + ^ s1 ^ (s1 << 1) ^ (s1 << 3) + ^ s2 ^ (s2 << 2) ^ (s2 << 3) + ^ s3 ^ (s3 << 3); + t1 = s0 ^ (s0 << 3) + ^ (s1 << 1) ^ (s1 << 2) ^ (s1 << 3) + ^ s2 ^ (s2 << 1) ^ (s2 << 3) + ^ s3 ^ (s3 << 2) ^ (s3 << 3); + t2 = s0 ^ (s0 << 2) ^ (s0 << 3) + ^ s1 ^ (s1 << 3) + ^ (s2 << 1) ^ (s2 << 2) ^ (s2 << 3) + ^ s3 ^ (s3 << 1) ^ (s3 << 3); + t3 = s0 ^ (s0 << 1) ^ (s0 << 3) + ^ s1 ^ (s1 << 2) ^ (s1 << 3) + ^ s2 ^ (s2 << 3) + ^ (s3 << 1) ^ (s3 << 2) ^ (s3 << 3); + state[i + 0] = gf256red(t0); + state[i + 1] = gf256red(t1); + state[i + 2] = gf256red(t2); + state[i + 3] = gf256red(t3); + } +} + +/* see inner.h */ +void +br_aes_small_decrypt(unsigned num_rounds, const uint32_t *skey, void *data) +{ + unsigned char *buf; + unsigned state[16]; + unsigned u; + + buf = data; + for (u = 0; u < 16; u ++) { + state[u] = buf[u]; + } + add_round_key(state, skey + (num_rounds << 2)); + for (u = num_rounds - 1; u > 0; u --) { + inv_shift_rows(state); + inv_sub_bytes(state); + add_round_key(state, skey + (u << 2)); + inv_mix_columns(state); + } + inv_shift_rows(state); + inv_sub_bytes(state); + add_round_key(state, skey); + for (u = 0; u < 16; u ++) { + buf[u] = state[u]; + } +} diff --git a/third_party/bearssl/src/aes_small_enc.c b/third_party/bearssl/src/aes_small_enc.c new file mode 100644 index 0000000..29f48a8 --- /dev/null +++ b/third_party/bearssl/src/aes_small_enc.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define S br_aes_S + +static void +add_round_key(unsigned *state, const uint32_t *skeys) +{ + int i; + + for (i = 0; i < 16; i += 4) { + uint32_t k; + + k = *skeys ++; + state[i + 0] ^= (unsigned)(k >> 24); + state[i + 1] ^= (unsigned)(k >> 16) & 0xFF; + state[i + 2] ^= (unsigned)(k >> 8) & 0xFF; + state[i + 3] ^= (unsigned)k & 0xFF; + } +} + +static void +sub_bytes(unsigned *state) +{ + int i; + + for (i = 0; i < 16; i ++) { + state[i] = S[state[i]]; + } +} + +static void +shift_rows(unsigned *state) +{ + unsigned tmp; + + tmp = state[1]; + state[1] = state[5]; + state[5] = state[9]; + state[9] = state[13]; + state[13] = tmp; + + tmp = state[2]; + state[2] = state[10]; + state[10] = tmp; + tmp = state[6]; + state[6] = state[14]; + state[14] = tmp; + + tmp = state[15]; + state[15] = state[11]; + state[11] = state[7]; + state[7] = state[3]; + state[3] = tmp; +} + +static void +mix_columns(unsigned *state) +{ + int i; + + for (i = 0; i < 16; i += 4) { + unsigned s0, s1, s2, s3; + unsigned t0, t1, t2, t3; + + s0 = state[i + 0]; + s1 = state[i + 1]; + s2 = state[i + 2]; + s3 = state[i + 3]; + t0 = (s0 << 1) ^ s1 ^ (s1 << 1) ^ s2 ^ s3; + t1 = s0 ^ (s1 << 1) ^ s2 ^ (s2 << 1) ^ s3; + t2 = s0 ^ s1 ^ (s2 << 1) ^ s3 ^ (s3 << 1); + t3 = s0 ^ (s0 << 1) ^ s1 ^ s2 ^ (s3 << 1); + state[i + 0] = t0 ^ ((unsigned)(-(int)(t0 >> 8)) & 0x11B); + state[i + 1] = t1 ^ ((unsigned)(-(int)(t1 >> 8)) & 0x11B); + state[i + 2] = t2 ^ ((unsigned)(-(int)(t2 >> 8)) & 0x11B); + state[i + 3] = t3 ^ ((unsigned)(-(int)(t3 >> 8)) & 0x11B); + } +} + +/* see inner.h */ +void +br_aes_small_encrypt(unsigned num_rounds, const uint32_t *skey, void *data) +{ + unsigned char *buf; + unsigned state[16]; + unsigned u; + + buf = data; + for (u = 0; u < 16; u ++) { + state[u] = buf[u]; + } + add_round_key(state, skey); + for (u = 1; u < num_rounds; u ++) { + sub_bytes(state); + shift_rows(state); + mix_columns(state); + add_round_key(state, skey + (u << 2)); + } + sub_bytes(state); + shift_rows(state); + add_round_key(state, skey + (num_rounds << 2)); + for (u = 0; u < 16; u ++) { + buf[u] = state[u]; + } +} diff --git a/third_party/bearssl/src/aes_x86ni.c b/third_party/bearssl/src/aes_x86ni.c new file mode 100644 index 0000000..d5408f1 --- /dev/null +++ b/third_party/bearssl/src/aes_x86ni.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +/* + * This code contains the AES key schedule implementation using the + * AES-NI opcodes. + */ + +#if BR_AES_X86NI + +/* see inner.h */ +int +br_aes_x86ni_supported(void) +{ + /* + * Bit mask for features in ECX: + * 19 SSE4.1 (used for _mm_insert_epi32(), for AES-CTR) + * 25 AES-NI + */ + return br_cpuid(0, 0, 0x02080000, 0); +} + +BR_TARGETS_X86_UP + +BR_TARGET("sse2,aes") +static inline __m128i +expand_step128(__m128i k, __m128i k2) +{ + k = _mm_xor_si128(k, _mm_slli_si128(k, 4)); + k = _mm_xor_si128(k, _mm_slli_si128(k, 4)); + k = _mm_xor_si128(k, _mm_slli_si128(k, 4)); + k2 = _mm_shuffle_epi32(k2, 0xFF); + return _mm_xor_si128(k, k2); +} + +BR_TARGET("sse2,aes") +static inline void +expand_step192(__m128i *t1, __m128i *t2, __m128i *t3) +{ + __m128i t4; + + *t2 = _mm_shuffle_epi32(*t2, 0x55); + t4 = _mm_slli_si128(*t1, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + *t1 = _mm_xor_si128(*t1, *t2); + *t2 = _mm_shuffle_epi32(*t1, 0xFF); + t4 = _mm_slli_si128(*t3, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + *t3 = _mm_xor_si128(*t3, *t2); +} + +BR_TARGET("sse2,aes") +static inline void +expand_step256_1(__m128i *t1, __m128i *t2) +{ + __m128i t4; + + *t2 = _mm_shuffle_epi32(*t2, 0xFF); + t4 = _mm_slli_si128(*t1, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + *t1 = _mm_xor_si128(*t1, *t2); +} + +BR_TARGET("sse2,aes") +static inline void +expand_step256_2(__m128i *t1, __m128i *t3) +{ + __m128i t2, t4; + + t4 = _mm_aeskeygenassist_si128(*t1, 0x0); + t2 = _mm_shuffle_epi32(t4, 0xAA); + t4 = _mm_slli_si128(*t3, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + *t3 = _mm_xor_si128(*t3, t2); +} + +/* + * Perform key schedule for AES, encryption direction. Subkeys are written + * in sk[], and the number of rounds is returned. Key length MUST be 16, + * 24 or 32 bytes. + */ +BR_TARGET("sse2,aes") +static unsigned +x86ni_keysched(__m128i *sk, const void *key, size_t len) +{ + const unsigned char *kb; + +#define KEXP128(k, i, rcon) do { \ + k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \ + sk[i] = k; \ + } while (0) + +#define KEXP192(i, rcon1, rcon2) do { \ + sk[(i) + 0] = t1; \ + sk[(i) + 1] = t3; \ + t2 = _mm_aeskeygenassist_si128(t3, rcon1); \ + expand_step192(&t1, &t2, &t3); \ + sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \ + _mm_castsi128_pd(sk[(i) + 1]), \ + _mm_castsi128_pd(t1), 0)); \ + sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \ + _mm_castsi128_pd(t1), \ + _mm_castsi128_pd(t3), 1)); \ + t2 = _mm_aeskeygenassist_si128(t3, rcon2); \ + expand_step192(&t1, &t2, &t3); \ + } while (0) + +#define KEXP256(i, rcon) do { \ + sk[(i) + 0] = t3; \ + t2 = _mm_aeskeygenassist_si128(t3, rcon); \ + expand_step256_1(&t1, &t2); \ + sk[(i) + 1] = t1; \ + expand_step256_2(&t1, &t3); \ + } while (0) + + kb = key; + switch (len) { + __m128i t1, t2, t3; + + case 16: + t1 = _mm_loadu_si128((const void *)kb); + sk[0] = t1; + KEXP128(t1, 1, 0x01); + KEXP128(t1, 2, 0x02); + KEXP128(t1, 3, 0x04); + KEXP128(t1, 4, 0x08); + KEXP128(t1, 5, 0x10); + KEXP128(t1, 6, 0x20); + KEXP128(t1, 7, 0x40); + KEXP128(t1, 8, 0x80); + KEXP128(t1, 9, 0x1B); + KEXP128(t1, 10, 0x36); + return 10; + + case 24: + t1 = _mm_loadu_si128((const void *)kb); + t3 = _mm_loadu_si128((const void *)(kb + 8)); + t3 = _mm_shuffle_epi32(t3, 0x4E); + KEXP192(0, 0x01, 0x02); + KEXP192(3, 0x04, 0x08); + KEXP192(6, 0x10, 0x20); + KEXP192(9, 0x40, 0x80); + sk[12] = t1; + return 12; + + case 32: + t1 = _mm_loadu_si128((const void *)kb); + t3 = _mm_loadu_si128((const void *)(kb + 16)); + sk[0] = t1; + KEXP256( 1, 0x01); + KEXP256( 3, 0x02); + KEXP256( 5, 0x04); + KEXP256( 7, 0x08); + KEXP256( 9, 0x10); + KEXP256(11, 0x20); + sk[13] = t3; + t2 = _mm_aeskeygenassist_si128(t3, 0x40); + expand_step256_1(&t1, &t2); + sk[14] = t1; + return 14; + + default: + return 0; + } + +#undef KEXP128 +#undef KEXP192 +#undef KEXP256 +} + +/* see inner.h */ +BR_TARGET("sse2,aes") +unsigned +br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len) +{ + __m128i sk[15]; + unsigned num_rounds; + + num_rounds = x86ni_keysched(sk, key, len); + memcpy(skni, sk, (num_rounds + 1) << 4); + return num_rounds; +} + +/* see inner.h */ +BR_TARGET("sse2,aes") +unsigned +br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len) +{ + __m128i sk[15]; + unsigned u, num_rounds; + + num_rounds = x86ni_keysched(sk, key, len); + _mm_storeu_si128((void *)skni, sk[num_rounds]); + for (u = 1; u < num_rounds; u ++) { + _mm_storeu_si128((void *)(skni + (u << 4)), + _mm_aesimc_si128(sk[num_rounds - u])); + } + _mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]); + return num_rounds; +} + +BR_TARGETS_X86_DOWN + +#endif diff --git a/third_party/bearssl/src/aes_x86ni_cbcdec.c b/third_party/bearssl/src/aes_x86ni_cbcdec.c new file mode 100644 index 0000000..862b1b5 --- /dev/null +++ b/third_party/bearssl/src/aes_x86ni_cbcdec.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_AES_X86NI + +/* see bearssl_block.h */ +const br_block_cbcdec_class * +br_aes_x86ni_cbcdec_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL; +} + +/* see bearssl_block.h */ +void +br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_x86ni_cbcdec_vtable; + ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len); +} + +BR_TARGETS_X86_UP + +/* see bearssl_block.h */ +BR_TARGET("sse2,aes") +void +br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15], ivx; + unsigned u; + + buf = data; + ivx = _mm_loadu_si128(iv); + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + while (len > 0) { + __m128i x0, x1, x2, x3, e0, e1, e2, e3; + + x0 = _mm_loadu_si128((void *)(buf + 0)); + if (len >= 64) { + x1 = _mm_loadu_si128((void *)(buf + 16)); + x2 = _mm_loadu_si128((void *)(buf + 32)); + x3 = _mm_loadu_si128((void *)(buf + 48)); + } else { + x0 = _mm_loadu_si128((void *)(buf + 0)); + if (len >= 32) { + x1 = _mm_loadu_si128((void *)(buf + 16)); + if (len >= 48) { + x2 = _mm_loadu_si128( + (void *)(buf + 32)); + x3 = x2; + } else { + x2 = x0; + x3 = x1; + } + } else { + x1 = x0; + x2 = x0; + x3 = x0; + } + } + e0 = x0; + e1 = x1; + e2 = x2; + e3 = x3; + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x2 = _mm_xor_si128(x2, sk[0]); + x3 = _mm_xor_si128(x3, sk[0]); + x0 = _mm_aesdec_si128(x0, sk[1]); + x1 = _mm_aesdec_si128(x1, sk[1]); + x2 = _mm_aesdec_si128(x2, sk[1]); + x3 = _mm_aesdec_si128(x3, sk[1]); + x0 = _mm_aesdec_si128(x0, sk[2]); + x1 = _mm_aesdec_si128(x1, sk[2]); + x2 = _mm_aesdec_si128(x2, sk[2]); + x3 = _mm_aesdec_si128(x3, sk[2]); + x0 = _mm_aesdec_si128(x0, sk[3]); + x1 = _mm_aesdec_si128(x1, sk[3]); + x2 = _mm_aesdec_si128(x2, sk[3]); + x3 = _mm_aesdec_si128(x3, sk[3]); + x0 = _mm_aesdec_si128(x0, sk[4]); + x1 = _mm_aesdec_si128(x1, sk[4]); + x2 = _mm_aesdec_si128(x2, sk[4]); + x3 = _mm_aesdec_si128(x3, sk[4]); + x0 = _mm_aesdec_si128(x0, sk[5]); + x1 = _mm_aesdec_si128(x1, sk[5]); + x2 = _mm_aesdec_si128(x2, sk[5]); + x3 = _mm_aesdec_si128(x3, sk[5]); + x0 = _mm_aesdec_si128(x0, sk[6]); + x1 = _mm_aesdec_si128(x1, sk[6]); + x2 = _mm_aesdec_si128(x2, sk[6]); + x3 = _mm_aesdec_si128(x3, sk[6]); + x0 = _mm_aesdec_si128(x0, sk[7]); + x1 = _mm_aesdec_si128(x1, sk[7]); + x2 = _mm_aesdec_si128(x2, sk[7]); + x3 = _mm_aesdec_si128(x3, sk[7]); + x0 = _mm_aesdec_si128(x0, sk[8]); + x1 = _mm_aesdec_si128(x1, sk[8]); + x2 = _mm_aesdec_si128(x2, sk[8]); + x3 = _mm_aesdec_si128(x3, sk[8]); + x0 = _mm_aesdec_si128(x0, sk[9]); + x1 = _mm_aesdec_si128(x1, sk[9]); + x2 = _mm_aesdec_si128(x2, sk[9]); + x3 = _mm_aesdec_si128(x3, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesdeclast_si128(x0, sk[10]); + x1 = _mm_aesdeclast_si128(x1, sk[10]); + x2 = _mm_aesdeclast_si128(x2, sk[10]); + x3 = _mm_aesdeclast_si128(x3, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesdec_si128(x0, sk[10]); + x1 = _mm_aesdec_si128(x1, sk[10]); + x2 = _mm_aesdec_si128(x2, sk[10]); + x3 = _mm_aesdec_si128(x3, sk[10]); + x0 = _mm_aesdec_si128(x0, sk[11]); + x1 = _mm_aesdec_si128(x1, sk[11]); + x2 = _mm_aesdec_si128(x2, sk[11]); + x3 = _mm_aesdec_si128(x3, sk[11]); + x0 = _mm_aesdeclast_si128(x0, sk[12]); + x1 = _mm_aesdeclast_si128(x1, sk[12]); + x2 = _mm_aesdeclast_si128(x2, sk[12]); + x3 = _mm_aesdeclast_si128(x3, sk[12]); + } else { + x0 = _mm_aesdec_si128(x0, sk[10]); + x1 = _mm_aesdec_si128(x1, sk[10]); + x2 = _mm_aesdec_si128(x2, sk[10]); + x3 = _mm_aesdec_si128(x3, sk[10]); + x0 = _mm_aesdec_si128(x0, sk[11]); + x1 = _mm_aesdec_si128(x1, sk[11]); + x2 = _mm_aesdec_si128(x2, sk[11]); + x3 = _mm_aesdec_si128(x3, sk[11]); + x0 = _mm_aesdec_si128(x0, sk[12]); + x1 = _mm_aesdec_si128(x1, sk[12]); + x2 = _mm_aesdec_si128(x2, sk[12]); + x3 = _mm_aesdec_si128(x3, sk[12]); + x0 = _mm_aesdec_si128(x0, sk[13]); + x1 = _mm_aesdec_si128(x1, sk[13]); + x2 = _mm_aesdec_si128(x2, sk[13]); + x3 = _mm_aesdec_si128(x3, sk[13]); + x0 = _mm_aesdeclast_si128(x0, sk[14]); + x1 = _mm_aesdeclast_si128(x1, sk[14]); + x2 = _mm_aesdeclast_si128(x2, sk[14]); + x3 = _mm_aesdeclast_si128(x3, sk[14]); + } + x0 = _mm_xor_si128(x0, ivx); + x1 = _mm_xor_si128(x1, e0); + x2 = _mm_xor_si128(x2, e1); + x3 = _mm_xor_si128(x3, e2); + ivx = e3; + _mm_storeu_si128((void *)(buf + 0), x0); + if (len >= 64) { + _mm_storeu_si128((void *)(buf + 16), x1); + _mm_storeu_si128((void *)(buf + 32), x2); + _mm_storeu_si128((void *)(buf + 48), x3); + buf += 64; + len -= 64; + } else { + if (len >= 32) { + _mm_storeu_si128((void *)(buf + 16), x1); + if (len >= 48) { + _mm_storeu_si128( + (void *)(buf + 32), x2); + } + } + break; + } + } + _mm_storeu_si128(iv, ivx); +} + +BR_TARGETS_X86_DOWN + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = { + sizeof(br_aes_x86ni_cbcdec_keys), + 16, + 4, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_aes_x86ni_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_aes_x86ni_cbcdec_run +}; + +#else + +/* see bearssl_block.h */ +const br_block_cbcdec_class * +br_aes_x86ni_cbcdec_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_x86ni_cbcenc.c b/third_party/bearssl/src/aes_x86ni_cbcenc.c new file mode 100644 index 0000000..85feecd --- /dev/null +++ b/third_party/bearssl/src/aes_x86ni_cbcenc.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_AES_X86NI + +/* see bearssl_block.h */ +const br_block_cbcenc_class * +br_aes_x86ni_cbcenc_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL; +} + +/* see bearssl_block.h */ +void +br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_x86ni_cbcenc_vtable; + ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); +} + +BR_TARGETS_X86_UP + +/* see bearssl_block.h */ +BR_TARGET("sse2,aes") +void +br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15], ivx; + unsigned u; + + buf = data; + ivx = _mm_loadu_si128(iv); + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + while (len > 0) { + __m128i x; + + x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx); + x = _mm_xor_si128(x, sk[0]); + x = _mm_aesenc_si128(x, sk[1]); + x = _mm_aesenc_si128(x, sk[2]); + x = _mm_aesenc_si128(x, sk[3]); + x = _mm_aesenc_si128(x, sk[4]); + x = _mm_aesenc_si128(x, sk[5]); + x = _mm_aesenc_si128(x, sk[6]); + x = _mm_aesenc_si128(x, sk[7]); + x = _mm_aesenc_si128(x, sk[8]); + x = _mm_aesenc_si128(x, sk[9]); + if (num_rounds == 10) { + x = _mm_aesenclast_si128(x, sk[10]); + } else if (num_rounds == 12) { + x = _mm_aesenc_si128(x, sk[10]); + x = _mm_aesenc_si128(x, sk[11]); + x = _mm_aesenclast_si128(x, sk[12]); + } else { + x = _mm_aesenc_si128(x, sk[10]); + x = _mm_aesenc_si128(x, sk[11]); + x = _mm_aesenc_si128(x, sk[12]); + x = _mm_aesenc_si128(x, sk[13]); + x = _mm_aesenclast_si128(x, sk[14]); + } + ivx = x; + _mm_storeu_si128((void *)buf, x); + buf += 16; + len -= 16; + } + _mm_storeu_si128(iv, ivx); +} + +BR_TARGETS_X86_DOWN + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = { + sizeof(br_aes_x86ni_cbcenc_keys), + 16, + 4, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_aes_x86ni_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_aes_x86ni_cbcenc_run +}; + +#else + +/* see bearssl_block.h */ +const br_block_cbcenc_class * +br_aes_x86ni_cbcenc_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_x86ni_ctr.c b/third_party/bearssl/src/aes_x86ni_ctr.c new file mode 100644 index 0000000..1cddd60 --- /dev/null +++ b/third_party/bearssl/src/aes_x86ni_ctr.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_AES_X86NI + +/* see bearssl_block.h */ +const br_block_ctr_class * +br_aes_x86ni_ctr_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL; +} + +/* see bearssl_block.h */ +void +br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_x86ni_ctr_vtable; + ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); +} + +BR_TARGETS_X86_UP + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +uint32_t +br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + unsigned char ivbuf[16]; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx; + unsigned u; + + buf = data; + memcpy(ivbuf, iv, 12); + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + ivx = _mm_loadu_si128((void *)ivbuf); + while (len > 0) { + __m128i x0, x1, x2, x3; + + x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3); + x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3); + x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3); + x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3); + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x2 = _mm_xor_si128(x2, sk[0]); + x3 = _mm_xor_si128(x3, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x2 = _mm_aesenc_si128(x2, sk[1]); + x3 = _mm_aesenc_si128(x3, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x2 = _mm_aesenc_si128(x2, sk[2]); + x3 = _mm_aesenc_si128(x3, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x2 = _mm_aesenc_si128(x2, sk[3]); + x3 = _mm_aesenc_si128(x3, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x2 = _mm_aesenc_si128(x2, sk[4]); + x3 = _mm_aesenc_si128(x3, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x2 = _mm_aesenc_si128(x2, sk[5]); + x3 = _mm_aesenc_si128(x3, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x2 = _mm_aesenc_si128(x2, sk[6]); + x3 = _mm_aesenc_si128(x3, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x2 = _mm_aesenc_si128(x2, sk[7]); + x3 = _mm_aesenc_si128(x3, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x2 = _mm_aesenc_si128(x2, sk[8]); + x3 = _mm_aesenc_si128(x3, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + x2 = _mm_aesenc_si128(x2, sk[9]); + x3 = _mm_aesenc_si128(x3, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + x2 = _mm_aesenclast_si128(x2, sk[10]); + x3 = _mm_aesenclast_si128(x3, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x2 = _mm_aesenc_si128(x2, sk[10]); + x3 = _mm_aesenc_si128(x3, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x2 = _mm_aesenc_si128(x2, sk[11]); + x3 = _mm_aesenc_si128(x3, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + x2 = _mm_aesenclast_si128(x2, sk[12]); + x3 = _mm_aesenclast_si128(x3, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x2 = _mm_aesenc_si128(x2, sk[10]); + x3 = _mm_aesenc_si128(x3, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x2 = _mm_aesenc_si128(x2, sk[11]); + x3 = _mm_aesenc_si128(x3, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x2 = _mm_aesenc_si128(x2, sk[12]); + x3 = _mm_aesenc_si128(x3, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x2 = _mm_aesenc_si128(x2, sk[13]); + x3 = _mm_aesenc_si128(x3, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + x2 = _mm_aesenclast_si128(x2, sk[14]); + x3 = _mm_aesenclast_si128(x3, sk[14]); + } + if (len >= 64) { + x0 = _mm_xor_si128(x0, + _mm_loadu_si128((void *)(buf + 0))); + x1 = _mm_xor_si128(x1, + _mm_loadu_si128((void *)(buf + 16))); + x2 = _mm_xor_si128(x2, + _mm_loadu_si128((void *)(buf + 32))); + x3 = _mm_xor_si128(x3, + _mm_loadu_si128((void *)(buf + 48))); + _mm_storeu_si128((void *)(buf + 0), x0); + _mm_storeu_si128((void *)(buf + 16), x1); + _mm_storeu_si128((void *)(buf + 32), x2); + _mm_storeu_si128((void *)(buf + 48), x3); + buf += 64; + len -= 64; + cc += 4; + } else { + unsigned char tmp[64]; + + _mm_storeu_si128((void *)(tmp + 0), x0); + _mm_storeu_si128((void *)(tmp + 16), x1); + _mm_storeu_si128((void *)(tmp + 32), x2); + _mm_storeu_si128((void *)(tmp + 48), x3); + for (u = 0; u < len; u ++) { + buf[u] ^= tmp[u]; + } + cc += (uint32_t)len >> 4; + break; + } + } + return cc; +} + +BR_TARGETS_X86_DOWN + +/* see bearssl_block.h */ +const br_block_ctr_class br_aes_x86ni_ctr_vtable = { + sizeof(br_aes_x86ni_ctr_keys), + 16, + 4, + (void (*)(const br_block_ctr_class **, const void *, size_t)) + &br_aes_x86ni_ctr_init, + (uint32_t (*)(const br_block_ctr_class *const *, + const void *, uint32_t, void *, size_t)) + &br_aes_x86ni_ctr_run +}; + +#else + +/* see bearssl_block.h */ +const br_block_ctr_class * +br_aes_x86ni_ctr_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aes_x86ni_ctrcbc.c b/third_party/bearssl/src/aes_x86ni_ctrcbc.c new file mode 100644 index 0000000..f57fead --- /dev/null +++ b/third_party/bearssl/src/aes_x86ni_ctrcbc.c @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_AES_X86NI + +/* see bearssl_block.h */ +const br_block_ctrcbc_class * +br_aes_x86ni_ctrcbc_get_vtable(void) +{ + return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL; +} + +/* see bearssl_block.h */ +void +br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_aes_x86ni_ctrcbc_vtable; + ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); +} + +BR_TARGETS_X86_UP + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx0, ivx1, ivx2, ivx3; + __m128i erev, zero, one, four, notthree; + unsigned u; + + buf = data; + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + + /* + * Some SSE2 constants. + */ + erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + zero = _mm_setzero_si128(); + one = _mm_set_epi64x(0, 1); + four = _mm_set_epi64x(0, 4); + notthree = _mm_sub_epi64(zero, four); + + /* + * Decode the counter in big-endian and pre-increment the other + * three counters. + */ + ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev); + ivx1 = _mm_add_epi64(ivx0, one); + ivx1 = _mm_sub_epi64(ivx1, + _mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8)); + ivx2 = _mm_add_epi64(ivx1, one); + ivx2 = _mm_sub_epi64(ivx2, + _mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8)); + ivx3 = _mm_add_epi64(ivx2, one); + ivx3 = _mm_sub_epi64(ivx3, + _mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8)); + while (len > 0) { + __m128i x0, x1, x2, x3; + + /* + * Load counter values; we need to byteswap them because + * the specification says that they use big-endian. + */ + x0 = _mm_shuffle_epi8(ivx0, erev); + x1 = _mm_shuffle_epi8(ivx1, erev); + x2 = _mm_shuffle_epi8(ivx2, erev); + x3 = _mm_shuffle_epi8(ivx3, erev); + + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x2 = _mm_xor_si128(x2, sk[0]); + x3 = _mm_xor_si128(x3, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x2 = _mm_aesenc_si128(x2, sk[1]); + x3 = _mm_aesenc_si128(x3, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x2 = _mm_aesenc_si128(x2, sk[2]); + x3 = _mm_aesenc_si128(x3, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x2 = _mm_aesenc_si128(x2, sk[3]); + x3 = _mm_aesenc_si128(x3, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x2 = _mm_aesenc_si128(x2, sk[4]); + x3 = _mm_aesenc_si128(x3, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x2 = _mm_aesenc_si128(x2, sk[5]); + x3 = _mm_aesenc_si128(x3, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x2 = _mm_aesenc_si128(x2, sk[6]); + x3 = _mm_aesenc_si128(x3, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x2 = _mm_aesenc_si128(x2, sk[7]); + x3 = _mm_aesenc_si128(x3, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x2 = _mm_aesenc_si128(x2, sk[8]); + x3 = _mm_aesenc_si128(x3, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + x2 = _mm_aesenc_si128(x2, sk[9]); + x3 = _mm_aesenc_si128(x3, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + x2 = _mm_aesenclast_si128(x2, sk[10]); + x3 = _mm_aesenclast_si128(x3, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x2 = _mm_aesenc_si128(x2, sk[10]); + x3 = _mm_aesenc_si128(x3, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x2 = _mm_aesenc_si128(x2, sk[11]); + x3 = _mm_aesenc_si128(x3, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + x2 = _mm_aesenclast_si128(x2, sk[12]); + x3 = _mm_aesenclast_si128(x3, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x2 = _mm_aesenc_si128(x2, sk[10]); + x3 = _mm_aesenc_si128(x3, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x2 = _mm_aesenc_si128(x2, sk[11]); + x3 = _mm_aesenc_si128(x3, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x2 = _mm_aesenc_si128(x2, sk[12]); + x3 = _mm_aesenc_si128(x3, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x2 = _mm_aesenc_si128(x2, sk[13]); + x3 = _mm_aesenc_si128(x3, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + x2 = _mm_aesenclast_si128(x2, sk[14]); + x3 = _mm_aesenclast_si128(x3, sk[14]); + } + if (len >= 64) { + x0 = _mm_xor_si128(x0, + _mm_loadu_si128((void *)(buf + 0))); + x1 = _mm_xor_si128(x1, + _mm_loadu_si128((void *)(buf + 16))); + x2 = _mm_xor_si128(x2, + _mm_loadu_si128((void *)(buf + 32))); + x3 = _mm_xor_si128(x3, + _mm_loadu_si128((void *)(buf + 48))); + _mm_storeu_si128((void *)(buf + 0), x0); + _mm_storeu_si128((void *)(buf + 16), x1); + _mm_storeu_si128((void *)(buf + 32), x2); + _mm_storeu_si128((void *)(buf + 48), x3); + buf += 64; + len -= 64; + } else { + unsigned char tmp[64]; + + _mm_storeu_si128((void *)(tmp + 0), x0); + _mm_storeu_si128((void *)(tmp + 16), x1); + _mm_storeu_si128((void *)(tmp + 32), x2); + _mm_storeu_si128((void *)(tmp + 48), x3); + for (u = 0; u < len; u ++) { + buf[u] ^= tmp[u]; + } + switch (len) { + case 16: + ivx0 = ivx1; + break; + case 32: + ivx0 = ivx2; + break; + case 48: + ivx0 = ivx3; + break; + } + break; + } + + /* + * Add 4 to each counter value. For carry propagation + * into the upper 64-bit words, we would need to compare + * the results with 4, but SSE2+ has only _signed_ + * comparisons. Instead, we mask out the low two bits, + * and check whether the remaining bits are zero. + */ + ivx0 = _mm_add_epi64(ivx0, four); + ivx1 = _mm_add_epi64(ivx1, four); + ivx2 = _mm_add_epi64(ivx2, four); + ivx3 = _mm_add_epi64(ivx3, four); + ivx0 = _mm_sub_epi64(ivx0, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx0, notthree), zero), 8)); + ivx1 = _mm_sub_epi64(ivx1, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx1, notthree), zero), 8)); + ivx2 = _mm_sub_epi64(ivx2, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx2, notthree), zero), 8)); + ivx3 = _mm_sub_epi64(ivx3, + _mm_slli_si128(_mm_cmpeq_epi64( + _mm_and_si128(ivx3, notthree), zero), 8)); + } + + /* + * Write back new counter value. The loop took care to put the + * right counter value in ivx0. + */ + _mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev)); +} + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx, + void *cbcmac, const void *data, size_t len) +{ + const unsigned char *buf; + unsigned num_rounds; + __m128i sk[15], ivx; + unsigned u; + + buf = data; + ivx = _mm_loadu_si128(cbcmac); + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + while (len > 0) { + __m128i x; + + x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx); + x = _mm_xor_si128(x, sk[0]); + x = _mm_aesenc_si128(x, sk[1]); + x = _mm_aesenc_si128(x, sk[2]); + x = _mm_aesenc_si128(x, sk[3]); + x = _mm_aesenc_si128(x, sk[4]); + x = _mm_aesenc_si128(x, sk[5]); + x = _mm_aesenc_si128(x, sk[6]); + x = _mm_aesenc_si128(x, sk[7]); + x = _mm_aesenc_si128(x, sk[8]); + x = _mm_aesenc_si128(x, sk[9]); + if (num_rounds == 10) { + x = _mm_aesenclast_si128(x, sk[10]); + } else if (num_rounds == 12) { + x = _mm_aesenc_si128(x, sk[10]); + x = _mm_aesenc_si128(x, sk[11]); + x = _mm_aesenclast_si128(x, sk[12]); + } else { + x = _mm_aesenc_si128(x, sk[10]); + x = _mm_aesenc_si128(x, sk[11]); + x = _mm_aesenc_si128(x, sk[12]); + x = _mm_aesenc_si128(x, sk[13]); + x = _mm_aesenclast_si128(x, sk[14]); + } + ivx = x; + buf += 16; + len -= 16; + } + _mm_storeu_si128(cbcmac, ivx); +} + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx, cmx; + __m128i erev, zero, one; + unsigned u; + int first_iter; + + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + + /* + * Some SSE2 constants. + */ + erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + zero = _mm_setzero_si128(); + one = _mm_set_epi64x(0, 1); + + /* + * Decode the counter in big-endian. + */ + ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev); + cmx = _mm_loadu_si128(cbcmac); + + buf = data; + first_iter = 1; + while (len > 0) { + __m128i dx, x0, x1; + + /* + * Load initial values: + * dx encrypted block of data + * x0 counter (for CTR encryption) + * x1 input for CBC-MAC + */ + dx = _mm_loadu_si128((void *)buf); + x0 = _mm_shuffle_epi8(ivx, erev); + x1 = cmx; + + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + } + + x0 = _mm_xor_si128(x0, dx); + if (first_iter) { + cmx = _mm_xor_si128(cmx, x0); + first_iter = 0; + } else { + cmx = _mm_xor_si128(x1, x0); + } + _mm_storeu_si128((void *)buf, x0); + + buf += 16; + len -= 16; + + /* + * Increment the counter value. + */ + ivx = _mm_add_epi64(ivx, one); + ivx = _mm_sub_epi64(ivx, + _mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8)); + + /* + * If this was the last iteration, then compute the + * extra block encryption to complete CBC-MAC. + */ + if (len == 0) { + cmx = _mm_xor_si128(cmx, sk[0]); + cmx = _mm_aesenc_si128(cmx, sk[1]); + cmx = _mm_aesenc_si128(cmx, sk[2]); + cmx = _mm_aesenc_si128(cmx, sk[3]); + cmx = _mm_aesenc_si128(cmx, sk[4]); + cmx = _mm_aesenc_si128(cmx, sk[5]); + cmx = _mm_aesenc_si128(cmx, sk[6]); + cmx = _mm_aesenc_si128(cmx, sk[7]); + cmx = _mm_aesenc_si128(cmx, sk[8]); + cmx = _mm_aesenc_si128(cmx, sk[9]); + if (num_rounds == 10) { + cmx = _mm_aesenclast_si128(cmx, sk[10]); + } else if (num_rounds == 12) { + cmx = _mm_aesenc_si128(cmx, sk[10]); + cmx = _mm_aesenc_si128(cmx, sk[11]); + cmx = _mm_aesenclast_si128(cmx, sk[12]); + } else { + cmx = _mm_aesenc_si128(cmx, sk[10]); + cmx = _mm_aesenc_si128(cmx, sk[11]); + cmx = _mm_aesenc_si128(cmx, sk[12]); + cmx = _mm_aesenc_si128(cmx, sk[13]); + cmx = _mm_aesenclast_si128(cmx, sk[14]); + } + break; + } + } + + /* + * Write back new counter value and CBC-MAC value. + */ + _mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev)); + _mm_storeu_si128(cbcmac, cmx); +} + +/* see bearssl_block.h */ +BR_TARGET("sse2,sse4.1,aes") +void +br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx, + void *ctr, void *cbcmac, void *data, size_t len) +{ + unsigned char *buf; + unsigned num_rounds; + __m128i sk[15]; + __m128i ivx, cmx; + __m128i erev, zero, one; + unsigned u; + + num_rounds = ctx->num_rounds; + for (u = 0; u <= num_rounds; u ++) { + sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); + } + + /* + * Some SSE2 constants. + */ + erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + zero = _mm_setzero_si128(); + one = _mm_set_epi64x(0, 1); + + /* + * Decode the counter in big-endian. + */ + ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev); + cmx = _mm_loadu_si128(cbcmac); + + buf = data; + while (len > 0) { + __m128i dx, x0, x1; + + /* + * Load initial values: + * dx encrypted block of data + * x0 counter (for CTR encryption) + * x1 input for CBC-MAC + */ + dx = _mm_loadu_si128((void *)buf); + x0 = _mm_shuffle_epi8(ivx, erev); + x1 = _mm_xor_si128(cmx, dx); + + x0 = _mm_xor_si128(x0, sk[0]); + x1 = _mm_xor_si128(x1, sk[0]); + x0 = _mm_aesenc_si128(x0, sk[1]); + x1 = _mm_aesenc_si128(x1, sk[1]); + x0 = _mm_aesenc_si128(x0, sk[2]); + x1 = _mm_aesenc_si128(x1, sk[2]); + x0 = _mm_aesenc_si128(x0, sk[3]); + x1 = _mm_aesenc_si128(x1, sk[3]); + x0 = _mm_aesenc_si128(x0, sk[4]); + x1 = _mm_aesenc_si128(x1, sk[4]); + x0 = _mm_aesenc_si128(x0, sk[5]); + x1 = _mm_aesenc_si128(x1, sk[5]); + x0 = _mm_aesenc_si128(x0, sk[6]); + x1 = _mm_aesenc_si128(x1, sk[6]); + x0 = _mm_aesenc_si128(x0, sk[7]); + x1 = _mm_aesenc_si128(x1, sk[7]); + x0 = _mm_aesenc_si128(x0, sk[8]); + x1 = _mm_aesenc_si128(x1, sk[8]); + x0 = _mm_aesenc_si128(x0, sk[9]); + x1 = _mm_aesenc_si128(x1, sk[9]); + if (num_rounds == 10) { + x0 = _mm_aesenclast_si128(x0, sk[10]); + x1 = _mm_aesenclast_si128(x1, sk[10]); + } else if (num_rounds == 12) { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenclast_si128(x0, sk[12]); + x1 = _mm_aesenclast_si128(x1, sk[12]); + } else { + x0 = _mm_aesenc_si128(x0, sk[10]); + x1 = _mm_aesenc_si128(x1, sk[10]); + x0 = _mm_aesenc_si128(x0, sk[11]); + x1 = _mm_aesenc_si128(x1, sk[11]); + x0 = _mm_aesenc_si128(x0, sk[12]); + x1 = _mm_aesenc_si128(x1, sk[12]); + x0 = _mm_aesenc_si128(x0, sk[13]); + x1 = _mm_aesenc_si128(x1, sk[13]); + x0 = _mm_aesenclast_si128(x0, sk[14]); + x1 = _mm_aesenclast_si128(x1, sk[14]); + } + x0 = _mm_xor_si128(x0, dx); + cmx = x1; + _mm_storeu_si128((void *)buf, x0); + + buf += 16; + len -= 16; + + /* + * Increment the counter value. + */ + ivx = _mm_add_epi64(ivx, one); + ivx = _mm_sub_epi64(ivx, + _mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8)); + } + + /* + * Write back new counter value and CBC-MAC value. + */ + _mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev)); + _mm_storeu_si128(cbcmac, cmx); +} + +BR_TARGETS_X86_DOWN + +/* see bearssl_block.h */ +const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = { + sizeof(br_aes_x86ni_ctrcbc_keys), + 16, + 4, + (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) + &br_aes_x86ni_ctrcbc_init, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_x86ni_ctrcbc_encrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, void *, size_t)) + &br_aes_x86ni_ctrcbc_decrypt, + (void (*)(const br_block_ctrcbc_class *const *, + void *, void *, size_t)) + &br_aes_x86ni_ctrcbc_ctr, + (void (*)(const br_block_ctrcbc_class *const *, + void *, const void *, size_t)) + &br_aes_x86ni_ctrcbc_mac +}; + +#else + +/* see bearssl_block.h */ +const br_block_ctrcbc_class * +br_aes_x86ni_ctrcbc_get_vtable(void) +{ + return NULL; +} + +#endif diff --git a/third_party/bearssl/src/aesctr_drbg.c b/third_party/bearssl/src/aesctr_drbg.c new file mode 100644 index 0000000..8dbd501 --- /dev/null +++ b/third_party/bearssl/src/aesctr_drbg.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rand.h */ +void +br_aesctr_drbg_init(br_aesctr_drbg_context *ctx, + const br_block_ctr_class *aesctr, + const void *seed, size_t len) +{ + unsigned char tmp[16]; + + ctx->vtable = &br_aesctr_drbg_vtable; + memset(tmp, 0, sizeof tmp); + aesctr->init(&ctx->sk.vtable, tmp, 16); + ctx->cc = 0; + br_aesctr_drbg_update(ctx, seed, len); +} + +/* see bearssl_rand.h */ +void +br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx, void *out, size_t len) +{ + unsigned char *buf; + unsigned char iv[12]; + + buf = out; + memset(iv, 0, sizeof iv); + while (len > 0) { + size_t clen; + + /* + * We generate data by blocks of at most 65280 bytes. This + * allows for unambiguously testing the counter overflow + * condition; also, it should work on 16-bit architectures + * (where 'size_t' is 16 bits only). + */ + clen = len; + if (clen > 65280) { + clen = 65280; + } + + /* + * We make sure that the counter won't exceed the configured + * limit. + */ + if ((uint32_t)(ctx->cc + ((clen + 15) >> 4)) > 32768) { + clen = (32768 - ctx->cc) << 4; + if (clen > len) { + clen = len; + } + } + + /* + * Run CTR. + */ + memset(buf, 0, clen); + ctx->cc = ctx->sk.vtable->run(&ctx->sk.vtable, + iv, ctx->cc, buf, clen); + buf += clen; + len -= clen; + + /* + * Every 32768 blocks, we force a state update. + */ + if (ctx->cc >= 32768) { + br_aesctr_drbg_update(ctx, NULL, 0); + } + } +} + +/* see bearssl_rand.h */ +void +br_aesctr_drbg_update(br_aesctr_drbg_context *ctx, const void *seed, size_t len) +{ + /* + * We use a Hirose construction on AES-256 to make a hash function. + * Function definition: + * - running state consists in two 16-byte blocks G and H + * - initial values of G and H are conventional + * - there is a fixed block-sized constant C + * - for next data block m: + * set AES key to H||m + * G' = E(G) xor G + * H' = E(G xor C) xor G xor C + * G <- G', H <- H' + * - once all blocks have been processed, output is H||G + * + * Constants: + * G_init = B6 B6 ... B6 + * H_init = A5 A5 ... A5 + * C = 01 00 ... 00 + * + * With this hash function h(), we compute the new state as + * follows: + * - produce a state-dependent value s as encryption of an + * all-one block with AES and the current key + * - compute the new key as the first 128 bits of h(s||seed) + * + * Original Hirose article: + * https://www.iacr.org/archive/fse2006/40470213/40470213.pdf + */ + + unsigned char s[16], iv[12]; + unsigned char G[16], H[16]; + int first; + + /* + * Use an all-one IV to get a fresh output block that depends on the + * current seed. + */ + memset(iv, 0xFF, sizeof iv); + memset(s, 0, 16); + ctx->sk.vtable->run(&ctx->sk.vtable, iv, 0xFFFFFFFF, s, 16); + + /* + * Set G[] and H[] to conventional start values. + */ + memset(G, 0xB6, sizeof G); + memset(H, 0x5A, sizeof H); + + /* + * Process the concatenation of the current state and the seed + * with the custom hash function. + */ + first = 1; + for (;;) { + unsigned char tmp[32]; + unsigned char newG[16]; + + /* + * Assemble new key H||m into tmp[]. + */ + memcpy(tmp, H, 16); + if (first) { + memcpy(tmp + 16, s, 16); + first = 0; + } else { + size_t clen; + + if (len == 0) { + break; + } + clen = len < 16 ? len : 16; + memcpy(tmp + 16, seed, clen); + memset(tmp + 16 + clen, 0, 16 - clen); + seed = (const unsigned char *)seed + clen; + len -= clen; + } + ctx->sk.vtable->init(&ctx->sk.vtable, tmp, 32); + + /* + * Compute new G and H values. + */ + memcpy(iv, G, 12); + memcpy(newG, G, 16); + ctx->sk.vtable->run(&ctx->sk.vtable, iv, + br_dec32be(G + 12), newG, 16); + iv[0] ^= 0x01; + memcpy(H, G, 16); + H[0] ^= 0x01; + ctx->sk.vtable->run(&ctx->sk.vtable, iv, + br_dec32be(G + 12), H, 16); + memcpy(G, newG, 16); + } + + /* + * Output hash value is H||G. We truncate it to its first 128 bits, + * i.e. H; that's our new AES key. + */ + ctx->sk.vtable->init(&ctx->sk.vtable, H, 16); + ctx->cc = 0; +} + +/* see bearssl_rand.h */ +const br_prng_class br_aesctr_drbg_vtable = { + sizeof(br_aesctr_drbg_context), + (void (*)(const br_prng_class **, const void *, const void *, size_t)) + &br_aesctr_drbg_init, + (void (*)(const br_prng_class **, void *, size_t)) + &br_aesctr_drbg_generate, + (void (*)(const br_prng_class **, const void *, size_t)) + &br_aesctr_drbg_update +}; diff --git a/third_party/bearssl/src/asn1enc.c b/third_party/bearssl/src/asn1enc.c new file mode 100644 index 0000000..7a74963 --- /dev/null +++ b/third_party/bearssl/src/asn1enc.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +br_asn1_uint +br_asn1_uint_prepare(const void *xdata, size_t xlen) +{ + const unsigned char *x; + br_asn1_uint t; + + x = xdata; + while (xlen > 0 && *x == 0) { + x ++; + xlen --; + } + t.data = x; + t.len = xlen; + t.asn1len = xlen; + if (xlen == 0 || x[0] >= 0x80) { + t.asn1len ++; + } + return t; +} + +/* see inner.h */ +size_t +br_asn1_encode_length(void *dest, size_t len) +{ + unsigned char *buf; + size_t z; + int i, j; + + buf = dest; + if (len < 0x80) { + if (buf != NULL) { + *buf = len; + } + return 1; + } + i = 0; + for (z = len; z != 0; z >>= 8) { + i ++; + } + if (buf != NULL) { + *buf ++ = 0x80 + i; + for (j = i - 1; j >= 0; j --) { + *buf ++ = len >> (j << 3); + } + } + return i + 1; +} + +/* see inner.h */ +size_t +br_asn1_encode_uint(void *dest, br_asn1_uint pp) +{ + unsigned char *buf; + size_t lenlen; + + if (dest == NULL) { + return 1 + br_asn1_encode_length(NULL, pp.asn1len) + pp.asn1len; + } + buf = dest; + *buf ++ = 0x02; + lenlen = br_asn1_encode_length(buf, pp.asn1len); + buf += lenlen; + *buf = 0x00; + memcpy(buf + pp.asn1len - pp.len, pp.data, pp.len); + return 1 + lenlen + pp.asn1len; +} diff --git a/third_party/bearssl/src/ccm.c b/third_party/bearssl/src/ccm.c new file mode 100644 index 0000000..68cc913 --- /dev/null +++ b/third_party/bearssl/src/ccm.c @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Implementation Notes + * ==================== + * + * The combined CTR + CBC-MAC functions can only handle full blocks, + * so some buffering is necessary. + * + * - 'ptr' contains a value from 0 to 15, which is the number of bytes + * accumulated in buf[] that still needs to be processed with the + * current CBC-MAC computation. + * + * - When processing the message itself, CTR encryption/decryption is + * also done at the same time. The first 'ptr' bytes of buf[] then + * contains the plaintext bytes, while the last '16 - ptr' bytes of + * buf[] are the remnants of the stream block, to be used against + * the next input bytes, when available. When 'ptr' is 0, the + * contents of buf[] are to be ignored. + * + * - The current counter and running CBC-MAC values are kept in 'ctr' + * and 'cbcmac', respectively. + */ + +/* see bearssl_block.h */ +void +br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx) +{ + ctx->bctx = bctx; +} + +/* see bearssl_block.h */ +int +br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len, + uint64_t aad_len, uint64_t data_len, size_t tag_len) +{ + unsigned char tmp[16]; + unsigned u, q; + + if (nonce_len < 7 || nonce_len > 13) { + return 0; + } + if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) { + return 0; + } + q = 15 - (unsigned)nonce_len; + ctx->tag_len = tag_len; + + /* + * Block B0, to start CBC-MAC. + */ + tmp[0] = (aad_len > 0 ? 0x40 : 0x00) + | (((unsigned)tag_len - 2) << 2) + | (q - 1); + memcpy(tmp + 1, nonce, nonce_len); + for (u = 0; u < q; u ++) { + tmp[15 - u] = (unsigned char)data_len; + data_len >>= 8; + } + if (data_len != 0) { + /* + * If the data length was not entirely consumed in the + * loop above, then it exceeds the maximum limit of + * q bytes (when encoded). + */ + return 0; + } + + /* + * Start CBC-MAC. + */ + memset(ctx->cbcmac, 0, sizeof ctx->cbcmac); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp); + + /* + * Assemble AAD length header. + */ + if ((aad_len >> 32) != 0) { + ctx->buf[0] = 0xFF; + ctx->buf[1] = 0xFF; + br_enc64be(ctx->buf + 2, aad_len); + ctx->ptr = 10; + } else if (aad_len >= 0xFF00) { + ctx->buf[0] = 0xFF; + ctx->buf[1] = 0xFE; + br_enc32be(ctx->buf + 2, (uint32_t)aad_len); + ctx->ptr = 6; + } else if (aad_len > 0) { + br_enc16be(ctx->buf, (unsigned)aad_len); + ctx->ptr = 2; + } else { + ctx->ptr = 0; + } + + /* + * Make initial counter value and compute tag mask. + */ + ctx->ctr[0] = q - 1; + memcpy(ctx->ctr + 1, nonce, nonce_len); + memset(ctx->ctr + 1 + nonce_len, 0, q); + memset(ctx->tagmask, 0, sizeof ctx->tagmask); + (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, + ctx->tagmask, sizeof ctx->tagmask); + + return 1; +} + +/* see bearssl_block.h */ +void +br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len) +{ + const unsigned char *dbuf; + size_t ptr; + + dbuf = data; + + /* + * Complete partial block, if needed. + */ + ptr = ctx->ptr; + if (ptr != 0) { + size_t clen; + + clen = (sizeof ctx->buf) - ptr; + if (clen > len) { + memcpy(ctx->buf + ptr, dbuf, len); + ctx->ptr = ptr + len; + return; + } + memcpy(ctx->buf + ptr, dbuf, clen); + dbuf += clen; + len -= clen; + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + } + + /* + * Process complete blocks. + */ + ptr = len & 15; + len -= ptr; + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len); + dbuf += len; + + /* + * Copy last partial block in the context buffer. + */ + memcpy(ctx->buf, dbuf, ptr); + ctx->ptr = ptr; +} + +/* see bearssl_block.h */ +void +br_ccm_flip(br_ccm_context *ctx) +{ + size_t ptr; + + /* + * Complete AAD partial block with zeros, if necessary. + */ + ptr = ctx->ptr; + if (ptr != 0) { + memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + ctx->ptr = 0; + } + + /* + * Counter was already set by br_ccm_reset(). + */ +} + +/* see bearssl_block.h */ +void +br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len) +{ + unsigned char *dbuf; + size_t ptr; + + dbuf = data; + + /* + * Complete a partial block, if any: ctx->buf[] contains + * ctx->ptr plaintext bytes (already reported), and the other + * bytes are CTR stream output. + */ + ptr = ctx->ptr; + if (ptr != 0) { + size_t clen; + size_t u; + + clen = (sizeof ctx->buf) - ptr; + if (clen > len) { + clen = len; + } + if (encrypt) { + for (u = 0; u < clen; u ++) { + unsigned w, x; + + w = ctx->buf[ptr + u]; + x = dbuf[u]; + ctx->buf[ptr + u] = x; + dbuf[u] = w ^ x; + } + } else { + for (u = 0; u < clen; u ++) { + unsigned w; + + w = ctx->buf[ptr + u] ^ dbuf[u]; + dbuf[u] = w; + ctx->buf[ptr + u] = w; + } + } + dbuf += clen; + len -= clen; + ptr += clen; + if (ptr < sizeof ctx->buf) { + ctx->ptr = ptr; + return; + } + (*ctx->bctx)->mac(ctx->bctx, + ctx->cbcmac, ctx->buf, sizeof ctx->buf); + } + + /* + * Process all complete blocks. Note that the ctrcbc API is for + * encrypt-then-MAC (CBC-MAC is computed over the encrypted + * blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed + * over the plaintext blocks). Therefore, we need to use the + * _decryption_ function for encryption, and the encryption + * function for decryption (this works because CTR encryption + * and decryption are identical, so the choice really is about + * computing the CBC-MAC before or after XORing with the CTR + * stream). + */ + ptr = len & 15; + len -= ptr; + if (encrypt) { + (*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } else { + (*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } + dbuf += len; + + /* + * If there is some remaining data, then we need to compute an + * extra block of CTR stream. + */ + if (ptr != 0) { + size_t u; + + memset(ctx->buf, 0, sizeof ctx->buf); + (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, + ctx->buf, sizeof ctx->buf); + if (encrypt) { + for (u = 0; u < ptr; u ++) { + unsigned w, x; + + w = ctx->buf[u]; + x = dbuf[u]; + ctx->buf[u] = x; + dbuf[u] = w ^ x; + } + } else { + for (u = 0; u < ptr; u ++) { + unsigned w; + + w = ctx->buf[u] ^ dbuf[u]; + dbuf[u] = w; + ctx->buf[u] = w; + } + } + } + ctx->ptr = ptr; +} + +/* see bearssl_block.h */ +size_t +br_ccm_get_tag(br_ccm_context *ctx, void *tag) +{ + size_t ptr; + size_t u; + + /* + * If there is some buffered data, then we need to pad it with + * zeros and finish up CBC-MAC. + */ + ptr = ctx->ptr; + if (ptr != 0) { + memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr); + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + } + + /* + * XOR the tag mask into the CBC-MAC output. + */ + for (u = 0; u < ctx->tag_len; u ++) { + ctx->cbcmac[u] ^= ctx->tagmask[u]; + } + memcpy(tag, ctx->cbcmac, ctx->tag_len); + return ctx->tag_len; +} + +/* see bearssl_block.h */ +uint32_t +br_ccm_check_tag(br_ccm_context *ctx, const void *tag) +{ + unsigned char tmp[16]; + size_t u, tag_len; + uint32_t z; + + tag_len = br_ccm_get_tag(ctx, tmp); + z = 0; + for (u = 0; u < tag_len; u ++) { + z |= tmp[u] ^ ((const unsigned char *)tag)[u]; + } + return EQ0(z); +} diff --git a/third_party/bearssl/src/ccopy.c b/third_party/bearssl/src/ccopy.c new file mode 100644 index 0000000..2beace7 --- /dev/null +++ b/third_party/bearssl/src/ccopy.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len) +{ + unsigned char *d; + const unsigned char *s; + + d = dst; + s = src; + while (len -- > 0) { + uint32_t x, y; + + x = *s ++; + y = *d; + *d = MUX(ctl, x, y); + d ++; + } +} diff --git a/third_party/bearssl/src/chacha20_ct.c b/third_party/bearssl/src/chacha20_ct.c new file mode 100644 index 0000000..9961eb1 --- /dev/null +++ b/third_party/bearssl/src/chacha20_ct.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +uint32_t +br_chacha20_ct_run(const void *key, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + uint32_t kw[8], ivw[3]; + size_t u; + + static const uint32_t CW[] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }; + + buf = data; + for (u = 0; u < 8; u ++) { + kw[u] = br_dec32le((const unsigned char *)key + (u << 2)); + } + for (u = 0; u < 3; u ++) { + ivw[u] = br_dec32le((const unsigned char *)iv + (u << 2)); + } + while (len > 0) { + uint32_t state[16]; + int i; + size_t clen; + unsigned char tmp[64]; + + memcpy(&state[0], CW, sizeof CW); + memcpy(&state[4], kw, sizeof kw); + state[12] = cc; + memcpy(&state[13], ivw, sizeof ivw); + for (i = 0; i < 10; i ++) { + +#define QROUND(a, b, c, d) do { \ + state[a] += state[b]; \ + state[d] ^= state[a]; \ + state[d] = (state[d] << 16) | (state[d] >> 16); \ + state[c] += state[d]; \ + state[b] ^= state[c]; \ + state[b] = (state[b] << 12) | (state[b] >> 20); \ + state[a] += state[b]; \ + state[d] ^= state[a]; \ + state[d] = (state[d] << 8) | (state[d] >> 24); \ + state[c] += state[d]; \ + state[b] ^= state[c]; \ + state[b] = (state[b] << 7) | (state[b] >> 25); \ + } while (0) + + QROUND( 0, 4, 8, 12); + QROUND( 1, 5, 9, 13); + QROUND( 2, 6, 10, 14); + QROUND( 3, 7, 11, 15); + QROUND( 0, 5, 10, 15); + QROUND( 1, 6, 11, 12); + QROUND( 2, 7, 8, 13); + QROUND( 3, 4, 9, 14); + +#undef QROUND + + } + for (u = 0; u < 4; u ++) { + br_enc32le(&tmp[u << 2], state[u] + CW[u]); + } + for (u = 4; u < 12; u ++) { + br_enc32le(&tmp[u << 2], state[u] + kw[u - 4]); + } + br_enc32le(&tmp[48], state[12] + cc); + for (u = 13; u < 16; u ++) { + br_enc32le(&tmp[u << 2], state[u] + ivw[u - 13]); + } + + clen = len < 64 ? len : 64; + for (u = 0; u < clen; u ++) { + buf[u] ^= tmp[u]; + } + buf += clen; + len -= clen; + cc ++; + } + return cc; +} diff --git a/third_party/bearssl/src/chacha20_sse2.c b/third_party/bearssl/src/chacha20_sse2.c new file mode 100644 index 0000000..92b4a4a --- /dev/null +++ b/third_party/bearssl/src/chacha20_sse2.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_SSE2 + +/* + * This file contains a ChaCha20 implementation that leverages SSE2 + * opcodes for better performance. + */ + +/* see bearssl_block.h */ +br_chacha20_run +br_chacha20_sse2_get(void) +{ + /* + * If using 64-bit mode, then SSE2 opcodes should be automatically + * available, since they are part of the ABI. + * + * In 32-bit mode, we use CPUID to detect the SSE2 feature. + */ + +#if BR_amd64 + return &br_chacha20_sse2_run; +#else + + /* + * SSE2 support is indicated by bit 26 in EDX. + */ + if (br_cpuid(0, 0, 0, 0x04000000)) { + return &br_chacha20_sse2_run; + } else { + return 0; + } +#endif +} + +BR_TARGETS_X86_UP + +/* see bearssl_block.h */ +BR_TARGET("sse2") +uint32_t +br_chacha20_sse2_run(const void *key, + const void *iv, uint32_t cc, void *data, size_t len) +{ + unsigned char *buf; + uint32_t ivtmp[4]; + __m128i kw0, kw1; + __m128i iw, cw; + __m128i one; + + static const uint32_t CW[] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }; + + buf = data; + kw0 = _mm_loadu_si128(key); + kw1 = _mm_loadu_si128((const void *)((const unsigned char *)key + 16)); + ivtmp[0] = cc; + memcpy(ivtmp + 1, iv, 12); + iw = _mm_loadu_si128((const void *)ivtmp); + cw = _mm_loadu_si128((const void *)CW); + one = _mm_set_epi32(0, 0, 0, 1); + + while (len > 0) { + /* + * sj contains state words 4*j to 4*j+3. + */ + __m128i s0, s1, s2, s3; + int i; + + s0 = cw; + s1 = kw0; + s2 = kw1; + s3 = iw; + for (i = 0; i < 10; i ++) { + /* + * Even round is straightforward application on + * the state words. + */ + s0 = _mm_add_epi32(s0, s1); + s3 = _mm_xor_si128(s3, s0); + s3 = _mm_or_si128( + _mm_slli_epi32(s3, 16), + _mm_srli_epi32(s3, 16)); + + s2 = _mm_add_epi32(s2, s3); + s1 = _mm_xor_si128(s1, s2); + s1 = _mm_or_si128( + _mm_slli_epi32(s1, 12), + _mm_srli_epi32(s1, 20)); + + s0 = _mm_add_epi32(s0, s1); + s3 = _mm_xor_si128(s3, s0); + s3 = _mm_or_si128( + _mm_slli_epi32(s3, 8), + _mm_srli_epi32(s3, 24)); + + s2 = _mm_add_epi32(s2, s3); + s1 = _mm_xor_si128(s1, s2); + s1 = _mm_or_si128( + _mm_slli_epi32(s1, 7), + _mm_srli_epi32(s1, 25)); + + /* + * For the odd round, we must rotate some state + * words so that the computations apply on the + * right combinations of words. + */ + s1 = _mm_shuffle_epi32(s1, 0x39); + s2 = _mm_shuffle_epi32(s2, 0x4E); + s3 = _mm_shuffle_epi32(s3, 0x93); + + s0 = _mm_add_epi32(s0, s1); + s3 = _mm_xor_si128(s3, s0); + s3 = _mm_or_si128( + _mm_slli_epi32(s3, 16), + _mm_srli_epi32(s3, 16)); + + s2 = _mm_add_epi32(s2, s3); + s1 = _mm_xor_si128(s1, s2); + s1 = _mm_or_si128( + _mm_slli_epi32(s1, 12), + _mm_srli_epi32(s1, 20)); + + s0 = _mm_add_epi32(s0, s1); + s3 = _mm_xor_si128(s3, s0); + s3 = _mm_or_si128( + _mm_slli_epi32(s3, 8), + _mm_srli_epi32(s3, 24)); + + s2 = _mm_add_epi32(s2, s3); + s1 = _mm_xor_si128(s1, s2); + s1 = _mm_or_si128( + _mm_slli_epi32(s1, 7), + _mm_srli_epi32(s1, 25)); + + /* + * After the odd round, we rotate back the values + * to undo the rotate at the start of the odd round. + */ + s1 = _mm_shuffle_epi32(s1, 0x93); + s2 = _mm_shuffle_epi32(s2, 0x4E); + s3 = _mm_shuffle_epi32(s3, 0x39); + } + + /* + * Addition with the initial state. + */ + s0 = _mm_add_epi32(s0, cw); + s1 = _mm_add_epi32(s1, kw0); + s2 = _mm_add_epi32(s2, kw1); + s3 = _mm_add_epi32(s3, iw); + + /* + * Increment block counter. + */ + iw = _mm_add_epi32(iw, one); + + /* + * XOR final state with the data. + */ + if (len < 64) { + unsigned char tmp[64]; + size_t u; + + _mm_storeu_si128((void *)(tmp + 0), s0); + _mm_storeu_si128((void *)(tmp + 16), s1); + _mm_storeu_si128((void *)(tmp + 32), s2); + _mm_storeu_si128((void *)(tmp + 48), s3); + for (u = 0; u < len; u ++) { + buf[u] ^= tmp[u]; + } + break; + } else { + __m128i b0, b1, b2, b3; + + b0 = _mm_loadu_si128((const void *)(buf + 0)); + b1 = _mm_loadu_si128((const void *)(buf + 16)); + b2 = _mm_loadu_si128((const void *)(buf + 32)); + b3 = _mm_loadu_si128((const void *)(buf + 48)); + b0 = _mm_xor_si128(b0, s0); + b1 = _mm_xor_si128(b1, s1); + b2 = _mm_xor_si128(b2, s2); + b3 = _mm_xor_si128(b3, s3); + _mm_storeu_si128((void *)(buf + 0), b0); + _mm_storeu_si128((void *)(buf + 16), b1); + _mm_storeu_si128((void *)(buf + 32), b2); + _mm_storeu_si128((void *)(buf + 48), b3); + buf += 64; + len -= 64; + } + } + + /* + * _mm_extract_epi32() requires SSE4.1. We prefer to stick to + * raw SSE2, thus we use _mm_extract_epi16(). + */ + return (uint32_t)_mm_extract_epi16(iw, 0) + | ((uint32_t)_mm_extract_epi16(iw, 1) << 16); +} + +BR_TARGETS_X86_DOWN + +#else + +/* see bearssl_block.h */ +br_chacha20_run +br_chacha20_sse2_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/config.h b/third_party/bearssl/src/config.h new file mode 100644 index 0000000..edf5d25 --- /dev/null +++ b/third_party/bearssl/src/config.h @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CONFIG_H__ +#define CONFIG_H__ + +/* + * This file contains compile-time flags that can override the + * autodetection performed in relevant files. Each flag is a macro; it + * deactivates the feature if defined to 0, activates it if defined to a + * non-zero integer (normally 1). If the macro is not defined, then + * autodetection applies. + */ + +/* The x86 intrinsics seem to be incomplete compared to what aes_x86ni expects when compiling with NXDK */ +#ifdef NXDK +#define BR_AES_X86NI 0 +#define BR_ENABLE_INTRINSICS 0 +#define BR_SSE2 0 +#define BR_RDRAND 0 +#undef _WIN32 +#endif + +/* + * When BR_64 is enabled, 64-bit integer types are assumed to be + * efficient (i.e. the architecture has 64-bit registers and can + * do 64-bit operations as fast as 32-bit operations). + * +#define BR_64 1 + */ + +/* + * When BR_LOMUL is enabled, then multiplications of 32-bit values whose + * result are truncated to the low 32 bits are assumed to be + * substantially more efficient than 32-bit multiplications that yield + * 64-bit results. This is typically the case on low-end ARM Cortex M + * systems (M0, M0+, M1, and arguably M3 and M4 as well). + * +#define BR_LOMUL 1 + */ + +/* + * When BR_SLOW_MUL is enabled, multiplications are assumed to be + * substantially slow with regards to other integer operations, thus + * making it worth to make more operations for a given task if it allows + * using less multiplications. + * +#define BR_SLOW_MUL 1 + */ + +/* + * When BR_SLOW_MUL15 is enabled, short multplications (on 15-bit words) + * are assumed to be substantially slow with regards to other integer + * operations, thus making it worth to make more integer operations if + * it allows using less multiplications. + * +#define BR_SLOW_MUL15 1 + */ + +/* + * When BR_CT_MUL31 is enabled, multiplications of 31-bit values (used + * in the "i31" big integer implementation) use an alternate implementation + * which is slower and larger than the normal multiplication, but should + * ensure constant-time multiplications even on architectures where the + * multiplication opcode takes a variable number of cycles to complete. + * +#define BR_CT_MUL31 1 + */ + +/* + * When BR_CT_MUL15 is enabled, multiplications of 15-bit values (held + * in 32-bit words) use an alternate implementation which is slower and + * larger than the normal multiplication, but should ensure + * constant-time multiplications on most/all architectures where the + * basic multiplication is not constant-time. +#define BR_CT_MUL15 1 + */ + +/* + * When BR_NO_ARITH_SHIFT is enabled, arithmetic right shifts (with sign + * extension) are performed with a sequence of operations which is bigger + * and slower than a simple right shift on a signed value. This avoids + * relying on an implementation-defined behaviour. However, most if not + * all C compilers use sign extension for right shifts on signed values, + * so this alternate macro is disabled by default. +#define BR_NO_ARITH_SHIFT 1 + */ + +/* + * When BR_RDRAND is enabled, the SSL engine will use the RDRAND opcode + * to automatically obtain quality randomness for seeding its internal + * PRNG. Since that opcode is present only in recent x86 CPU, its + * support is dynamically tested; if the current CPU does not support + * it, then another random source will be used, such as /dev/urandom or + * CryptGenRandom(). + * +#define BR_RDRAND 1 + */ + +/* + * When BR_USE_GETENTROPY is enabled, the SSL engine will use the + * getentropy() function to obtain quality randomness for seeding its + * internal PRNG. On Linux and FreeBSD, getentropy() is implemented by + * the standard library with the system call getrandom(); on OpenBSD, + * getentropy() is the system call, and there is no getrandom() wrapper, + * hence the use of the getentropy() function for maximum portability. + * + * If the getentropy() call fails, and BR_USE_URANDOM is not explicitly + * disabled, then /dev/urandom will be used as a fallback mechanism. On + * FreeBSD and OpenBSD, this does not change much, since /dev/urandom + * will block if not enough entropy has been obtained since last boot. + * On Linux, /dev/urandom might not block, which can be troublesome in + * early boot stages, which is why getentropy() is preferred. + * +#define BR_USE_GETENTROPY 1 + */ + +/* + * When BR_USE_URANDOM is enabled, the SSL engine will use /dev/urandom + * to automatically obtain quality randomness for seeding its internal + * PRNG. + * +#define BR_USE_URANDOM 1 + */ + +/* + * When BR_USE_WIN32_RAND is enabled, the SSL engine will use the Win32 + * (CryptoAPI) functions (CryptAcquireContext(), CryptGenRandom()...) to + * automatically obtain quality randomness for seeding its internal PRNG. + * + * Note: if both BR_USE_URANDOM and BR_USE_WIN32_RAND are defined, the + * former takes precedence. + * +#define BR_USE_WIN32_RAND 1 + */ + +/* + * When BR_ARMEL_CORTEXM_GCC is enabled, some operations are replaced with + * inline assembly which is shorter and/or faster. This should be used + * only when all of the following are true: + * - target architecture is ARM in Thumb mode + * - target endianness is little-endian + * - compiler is GCC (or GCC-compatible for inline assembly syntax) + * + * This is meant for the low-end cores (Cortex M0, M0+, M1, M3). + * Note: if BR_LOMUL is not explicitly enabled or disabled, then + * enabling BR_ARMEL_CORTEXM_GCC also enables BR_LOMUL. + * +#define BR_ARMEL_CORTEXM_GCC 1 + */ + +/* + * When BR_AES_X86NI is enabled, the AES implementation using the x86 "NI" + * instructions (dedicated AES opcodes) will be compiled. If this is not + * enabled explicitly, then that AES implementation will be compiled only + * if a compatible compiler is detected. If set explicitly to 0, the + * implementation will not be compiled at all. + * +#define BR_AES_X86NI 1 + */ + +/* + * When BR_SSE2 is enabled, SSE2 intrinsics will be used for some + * algorithm implementations that use them (e.g. chacha20_sse2). If this + * is not enabled explicitly, then support for SSE2 intrinsics will be + * automatically detected. If set explicitly to 0, then SSE2 code will + * not be compiled at all. + * +#define BR_SSE2 1 + */ + +/* + * When BR_POWER8 is enabled, the AES implementation using the POWER ISA + * 2.07 opcodes (available on POWER8 processors and later) is compiled. + * If this is not enabled explicitly, then that implementation will be + * compiled only if a compatible compiler is detected, _and_ the target + * architecture is POWER8 or later. + * +#define BR_POWER8 1 + */ + +/* + * When BR_INT128 is enabled, then code using the 'unsigned __int64' + * and 'unsigned __int128' types will be used to leverage 64x64->128 + * unsigned multiplications. This should work with GCC and compatible + * compilers on 64-bit architectures. + * +#define BR_INT128 1 + */ + +/* + * When BR_UMUL128 is enabled, then code using the '_umul128()' and + * '_addcarry_u64()' intrinsics will be used to implement 64x64->128 + * unsigned multiplications. This should work on Visual C on x64 systems. + * +#define BR_UMUL128 1 + */ + +/* + * When BR_LE_UNALIGNED is enabled, then the current architecture is + * assumed to use little-endian encoding for integers, and to tolerate + * unaligned accesses with no or minimal time penalty. + * +#define BR_LE_UNALIGNED 1 + */ + +/* + * When BR_BE_UNALIGNED is enabled, then the current architecture is + * assumed to use big-endian encoding for integers, and to tolerate + * unaligned accesses with no or minimal time penalty. + * +#define BR_BE_UNALIGNED 1 + */ + +#endif diff --git a/third_party/bearssl/src/dec16be.c b/third_party/bearssl/src/dec16be.c new file mode 100644 index 0000000..4f3f7f4 --- /dev/null +++ b/third_party/bearssl/src/dec16be.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_dec16be(uint16_t *v, size_t num, const void *src) +{ + const unsigned char *buf; + + buf = src; + while (num -- > 0) { + *v ++ = br_dec16be(buf); + buf += 2; + } +} diff --git a/third_party/bearssl/src/dec16le.c b/third_party/bearssl/src/dec16le.c new file mode 100644 index 0000000..84d8536 --- /dev/null +++ b/third_party/bearssl/src/dec16le.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_dec16le(uint16_t *v, size_t num, const void *src) +{ + const unsigned char *buf; + + buf = src; + while (num -- > 0) { + *v ++ = br_dec16le(buf); + buf += 2; + } +} diff --git a/third_party/bearssl/src/dec32be.c b/third_party/bearssl/src/dec32be.c new file mode 100644 index 0000000..5a8fc59 --- /dev/null +++ b/third_party/bearssl/src/dec32be.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_dec32be(uint32_t *v, size_t num, const void *src) +{ + const unsigned char *buf; + + buf = src; + while (num -- > 0) { + *v ++ = br_dec32be(buf); + buf += 4; + } +} diff --git a/third_party/bearssl/src/dec32le.c b/third_party/bearssl/src/dec32le.c new file mode 100644 index 0000000..ed36e71 --- /dev/null +++ b/third_party/bearssl/src/dec32le.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_dec32le(uint32_t *v, size_t num, const void *src) +{ + const unsigned char *buf; + + buf = src; + while (num -- > 0) { + *v ++ = br_dec32le(buf); + buf += 4; + } +} diff --git a/third_party/bearssl/src/dec64be.c b/third_party/bearssl/src/dec64be.c new file mode 100644 index 0000000..0c40a76 --- /dev/null +++ b/third_party/bearssl/src/dec64be.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_dec64be(uint64_t *v, size_t num, const void *src) +{ + const unsigned char *buf; + + buf = src; + while (num -- > 0) { + *v ++ = br_dec64be(buf); + buf += 8; + } +} diff --git a/third_party/bearssl/src/dec64le.c b/third_party/bearssl/src/dec64le.c new file mode 100644 index 0000000..cbd02c2 --- /dev/null +++ b/third_party/bearssl/src/dec64le.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_dec64le(uint64_t *v, size_t num, const void *src) +{ + const unsigned char *buf; + + buf = src; + while (num -- > 0) { + *v ++ = br_dec64le(buf); + buf += 8; + } +} diff --git a/third_party/bearssl/src/des_ct.c b/third_party/bearssl/src/des_ct.c new file mode 100644 index 0000000..581c0ab --- /dev/null +++ b/third_party/bearssl/src/des_ct.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * During key schedule, we need to apply bit extraction PC-2 then permute + * things into our bitslice representation. PC-2 extracts 48 bits out + * of two 28-bit words (kl and kr), and we store these bits into two + * 32-bit words sk0 and sk1. + * + * -- bit 16+x of sk0 comes from bit QL0[x] of kl + * -- bit x of sk0 comes from bit QR0[x] of kr + * -- bit 16+x of sk1 comes from bit QL1[x] of kl + * -- bit x of sk1 comes from bit QR1[x] of kr + */ + +static const unsigned char QL0[] = { + 17, 4, 27, 23, 13, 22, 7, 18, + 16, 24, 2, 20, 1, 8, 15, 26 +}; + +static const unsigned char QR0[] = { + 25, 19, 9, 1, 5, 11, 23, 8, + 17, 0, 22, 3, 6, 20, 27, 24 +}; + +static const unsigned char QL1[] = { + 28, 28, 14, 11, 28, 28, 25, 0, + 28, 28, 5, 9, 28, 28, 12, 21 +}; + +static const unsigned char QR1[] = { + 28, 28, 15, 4, 28, 28, 26, 16, + 28, 28, 12, 7, 28, 28, 10, 14 +}; + +/* + * 32-bit rotation. The C compiler is supposed to recognize it as a + * rotation and use the local architecture rotation opcode (if available). + */ +static inline uint32_t +rotl(uint32_t x, int n) +{ + return (x << n) | (x >> (32 - n)); +} + +/* + * Compute key schedule for 8 key bytes (produces 32 subkey words). + */ +static void +keysched_unit(uint32_t *skey, const void *key) +{ + int i; + + br_des_keysched_unit(skey, key); + + /* + * Apply PC-2 + bitslicing. + */ + for (i = 0; i < 16; i ++) { + uint32_t kl, kr, sk0, sk1; + int j; + + kl = skey[(i << 1) + 0]; + kr = skey[(i << 1) + 1]; + sk0 = 0; + sk1 = 0; + for (j = 0; j < 16; j ++) { + sk0 <<= 1; + sk1 <<= 1; + sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16; + sk0 |= (kr >> QR0[j]) & (uint32_t)1; + sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16; + sk1 |= (kr >> QR1[j]) & (uint32_t)1; + } + + skey[(i << 1) + 0] = sk0; + skey[(i << 1) + 1] = sk1; + } + +#if 0 + /* + * Speed-optimized version for PC-2 + bitslicing. + * (Unused. Kept for reference only.) + */ + sk0 = kl & (uint32_t)0x00100000; + sk0 |= (kl & (uint32_t)0x08008000) << 2; + sk0 |= (kl & (uint32_t)0x00400000) << 4; + sk0 |= (kl & (uint32_t)0x00800000) << 5; + sk0 |= (kl & (uint32_t)0x00040000) << 6; + sk0 |= (kl & (uint32_t)0x00010000) << 7; + sk0 |= (kl & (uint32_t)0x00000100) << 10; + sk0 |= (kl & (uint32_t)0x00022000) << 14; + sk0 |= (kl & (uint32_t)0x00000082) << 18; + sk0 |= (kl & (uint32_t)0x00000004) << 19; + sk0 |= (kl & (uint32_t)0x04000000) >> 10; + sk0 |= (kl & (uint32_t)0x00000010) << 26; + sk0 |= (kl & (uint32_t)0x01000000) >> 2; + + sk0 |= kr & (uint32_t)0x00000100; + sk0 |= (kr & (uint32_t)0x00000008) << 1; + sk0 |= (kr & (uint32_t)0x00000200) << 4; + sk0 |= rotl(kr & (uint32_t)0x08000021, 6); + sk0 |= (kr & (uint32_t)0x01000000) >> 24; + sk0 |= (kr & (uint32_t)0x00000002) << 11; + sk0 |= (kr & (uint32_t)0x00100000) >> 18; + sk0 |= (kr & (uint32_t)0x00400000) >> 17; + sk0 |= (kr & (uint32_t)0x00800000) >> 14; + sk0 |= (kr & (uint32_t)0x02020000) >> 10; + sk0 |= (kr & (uint32_t)0x00080000) >> 5; + sk0 |= (kr & (uint32_t)0x00000040) >> 3; + sk0 |= (kr & (uint32_t)0x00000800) >> 1; + + sk1 = kl & (uint32_t)0x02000000; + sk1 |= (kl & (uint32_t)0x00001000) << 5; + sk1 |= (kl & (uint32_t)0x00000200) << 11; + sk1 |= (kl & (uint32_t)0x00004000) << 15; + sk1 |= (kl & (uint32_t)0x00000020) << 16; + sk1 |= (kl & (uint32_t)0x00000800) << 17; + sk1 |= (kl & (uint32_t)0x00000001) << 24; + sk1 |= (kl & (uint32_t)0x00200000) >> 5; + + sk1 |= (kr & (uint32_t)0x00000010) << 8; + sk1 |= (kr & (uint32_t)0x04000000) >> 17; + sk1 |= (kr & (uint32_t)0x00004000) >> 14; + sk1 |= (kr & (uint32_t)0x00000400) >> 9; + sk1 |= (kr & (uint32_t)0x00010000) >> 8; + sk1 |= (kr & (uint32_t)0x00001000) >> 7; + sk1 |= (kr & (uint32_t)0x00000080) >> 3; + sk1 |= (kr & (uint32_t)0x00008000) >> 2; +#endif +} + +/* see inner.h */ +unsigned +br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len) +{ + switch (key_len) { + case 8: + keysched_unit(skey, key); + return 1; + case 16: + keysched_unit(skey, key); + keysched_unit(skey + 32, (const unsigned char *)key + 8); + br_des_rev_skey(skey + 32); + memcpy(skey + 64, skey, 32 * sizeof *skey); + return 3; + default: + keysched_unit(skey, key); + keysched_unit(skey + 32, (const unsigned char *)key + 8); + br_des_rev_skey(skey + 32); + keysched_unit(skey + 64, (const unsigned char *)key + 16); + return 3; + } +} + +/* + * DES confusion function. This function performs expansion E (32 to + * 48 bits), XOR with subkey, S-boxes, and permutation P. + */ +static inline uint32_t +Fconf(uint32_t r0, const uint32_t *sk) +{ + /* + * Each 6->4 S-box is virtually turned into four 6->1 boxes; we + * thus end up with 32 boxes that we call "T-boxes" here. We will + * evaluate them with bitslice code. + * + * Each T-box is a circuit of multiplexers (sort of) and thus + * takes 70 inputs: the 6 actual T-box inputs, and 64 constants + * that describe the T-box output for all combinations of the + * 6 inputs. With this model, all T-boxes are identical (with + * distinct inputs) and thus can be executed in parallel with + * bitslice code. + * + * T-boxes are numbered from 0 to 31, in least-to-most + * significant order. Thus, S-box S1 corresponds to T-boxes 31, + * 30, 29 and 28, in that order. T-box 'n' is computed with the + * bits at rank 'n' in the 32-bit words. + * + * Words x0 to x5 contain the T-box inputs 0 to 5. + */ + uint32_t x0, x1, x2, x3, x4, x5, z0; + uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; + uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29; + uint32_t y30; + + /* + * Spread input bits over the 6 input words x*. + */ + x1 = r0 & (uint32_t)0x11111111; + x2 = (r0 >> 1) & (uint32_t)0x11111111; + x3 = (r0 >> 2) & (uint32_t)0x11111111; + x4 = (r0 >> 3) & (uint32_t)0x11111111; + x1 = (x1 << 4) - x1; + x2 = (x2 << 4) - x2; + x3 = (x3 << 4) - x3; + x4 = (x4 << 4) - x4; + x0 = (x4 << 4) | (x4 >> 28); + x5 = (x1 >> 4) | (x1 << 28); + + /* + * XOR with the subkey for this round. + */ + x0 ^= sk[0]; + x1 ^= sk[1]; + x2 ^= sk[2]; + x3 ^= sk[3]; + x4 ^= sk[4]; + x5 ^= sk[5]; + + /* + * The T-boxes are done in parallel, since they all use a + * "tree of multiplexer". We use "fake multiplexers": + * + * y = a ^ (x & b) + * + * computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1). + */ + y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C); + y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821); + y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809); + y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28); + y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820); + y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801); + y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929); + y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1); + y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A); + y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B); + y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A); + y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572); + y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083); + y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000); + y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA); + /* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */ + y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9); + y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE); + y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816); + y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B); + y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88); + y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25); + y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D); + y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0); + y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100); + y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011); + y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959); + y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A); + y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8); + y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B); + y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD); + /* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */ + + y0 = y0 ^ (x1 & y1); + y1 = y2 ^ (x1 & y3); + y2 = y4 ^ (x1 & y5); + y3 = y6 ^ (x1 & y7); + y4 = y8 ^ (x1 & y9); + y5 = y10 ^ (x1 & y11); + y6 = y12 ^ (x1 & y13); + y7 = y14; /* was: y14 ^ (x1 & y15) */ + y8 = y16 ^ (x1 & y17); + y9 = y18 ^ (x1 & y19); + y10 = y20 ^ (x1 & y21); + y11 = y22 ^ (x1 & y23); + y12 = y24 ^ (x1 & y25); + y13 = y26 ^ (x1 & y27); + y14 = y28 ^ (x1 & y29); + y15 = y30; /* was: y30 ^ (x1 & y31) */ + + y0 = y0 ^ (x2 & y1); + y1 = y2 ^ (x2 & y3); + y2 = y4 ^ (x2 & y5); + y3 = y6 ^ (x2 & y7); + y4 = y8 ^ (x2 & y9); + y5 = y10 ^ (x2 & y11); + y6 = y12 ^ (x2 & y13); + y7 = y14 ^ (x2 & y15); + + y0 = y0 ^ (x3 & y1); + y1 = y2 ^ (x3 & y3); + y2 = y4 ^ (x3 & y5); + y3 = y6 ^ (x3 & y7); + + y0 = y0 ^ (x4 & y1); + y1 = y2 ^ (x4 & y3); + + y0 = y0 ^ (x5 & y1); + + /* + * The P permutation: + * -- Each bit move is converted into a mask + left rotation. + * -- Rotations that use the same movement are coalesced together. + * -- Left and right shifts are used as alternatives to a rotation + * where appropriate (this will help architectures that do not have + * a rotation opcode). + */ + z0 = (y0 & (uint32_t)0x00000004) << 3; + z0 |= (y0 & (uint32_t)0x00004000) << 4; + z0 |= rotl(y0 & 0x12020120, 5); + z0 |= (y0 & (uint32_t)0x00100000) << 6; + z0 |= (y0 & (uint32_t)0x00008000) << 9; + z0 |= (y0 & (uint32_t)0x04000000) >> 22; + z0 |= (y0 & (uint32_t)0x00000001) << 11; + z0 |= rotl(y0 & 0x20000200, 12); + z0 |= (y0 & (uint32_t)0x00200000) >> 19; + z0 |= (y0 & (uint32_t)0x00000040) << 14; + z0 |= (y0 & (uint32_t)0x00010000) << 15; + z0 |= (y0 & (uint32_t)0x00000002) << 16; + z0 |= rotl(y0 & 0x40801800, 17); + z0 |= (y0 & (uint32_t)0x00080000) >> 13; + z0 |= (y0 & (uint32_t)0x00000010) << 21; + z0 |= (y0 & (uint32_t)0x01000000) >> 10; + z0 |= rotl(y0 & 0x88000008, 24); + z0 |= (y0 & (uint32_t)0x00000480) >> 7; + z0 |= (y0 & (uint32_t)0x00442000) >> 6; + return z0; +} + +/* + * Process one block through 16 successive rounds, omitting the swap + * in the final round. + */ +static void +process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp) +{ + int i; + uint32_t l, r; + + l = *pl; + r = *pr; + for (i = 0; i < 16; i ++) { + uint32_t t; + + t = l ^ Fconf(r, sk_exp); + l = r; + r = t; + sk_exp += 6; + } + *pl = r; + *pr = l; +} + +/* see inner.h */ +void +br_des_ct_process_block(unsigned num_rounds, + const uint32_t *sk_exp, void *block) +{ + unsigned char *buf; + uint32_t l, r; + + buf = block; + l = br_dec32be(buf); + r = br_dec32be(buf + 4); + br_des_do_IP(&l, &r); + while (num_rounds -- > 0) { + process_block_unit(&l, &r, sk_exp); + sk_exp += 96; + } + br_des_do_invIP(&l, &r); + br_enc32be(buf, l); + br_enc32be(buf + 4, r); +} + +/* see inner.h */ +void +br_des_ct_skey_expand(uint32_t *sk_exp, + unsigned num_rounds, const uint32_t *skey) +{ + num_rounds <<= 4; + while (num_rounds -- > 0) { + uint32_t v, w0, w1, w2, w3; + + v = *skey ++; + w0 = v & 0x11111111; + w1 = (v >> 1) & 0x11111111; + w2 = (v >> 2) & 0x11111111; + w3 = (v >> 3) & 0x11111111; + *sk_exp ++ = (w0 << 4) - w0; + *sk_exp ++ = (w1 << 4) - w1; + *sk_exp ++ = (w2 << 4) - w2; + *sk_exp ++ = (w3 << 4) - w3; + v = *skey ++; + w0 = v & 0x11111111; + w1 = (v >> 1) & 0x11111111; + *sk_exp ++ = (w0 << 4) - w0; + *sk_exp ++ = (w1 << 4) - w1; + } +} diff --git a/third_party/bearssl/src/des_ct_cbcdec.c b/third_party/bearssl/src/des_ct_cbcdec.c new file mode 100644 index 0000000..d208a3d --- /dev/null +++ b/third_party/bearssl/src/des_ct_cbcdec.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_des_ct_cbcdec_vtable; + ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len); + if (len == 8) { + br_des_rev_skey(ctx->skey); + } else { + int i; + + for (i = 0; i < 48; i += 2) { + uint32_t t; + + t = ctx->skey[i]; + ctx->skey[i] = ctx->skey[94 - i]; + ctx->skey[94 - i] = t; + t = ctx->skey[i + 1]; + ctx->skey[i + 1] = ctx->skey[95 - i]; + ctx->skey[95 - i] = t; + } + } +} + +/* see bearssl_block.h */ +void +br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + uint32_t sk_exp[288]; + + br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + ivbuf = iv; + buf = data; + while (len > 0) { + unsigned char tmp[8]; + int i; + + memcpy(tmp, buf, 8); + br_des_ct_process_block(ctx->num_rounds, sk_exp, buf); + for (i = 0; i < 8; i ++) { + buf[i] ^= ivbuf[i]; + } + memcpy(ivbuf, tmp, 8); + buf += 8; + len -= 8; + } +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_des_ct_cbcdec_vtable = { + sizeof(br_des_ct_cbcdec_keys), + 8, + 3, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_des_ct_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_des_ct_cbcdec_run +}; diff --git a/third_party/bearssl/src/des_ct_cbcenc.c b/third_party/bearssl/src/des_ct_cbcenc.c new file mode 100644 index 0000000..4b3610e --- /dev/null +++ b/third_party/bearssl/src/des_ct_cbcenc.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_des_ct_cbcenc_vtable; + ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + uint32_t sk_exp[288]; + + br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); + ivbuf = iv; + buf = data; + while (len > 0) { + int i; + + for (i = 0; i < 8; i ++) { + buf[i] ^= ivbuf[i]; + } + br_des_ct_process_block(ctx->num_rounds, sk_exp, buf); + memcpy(ivbuf, buf, 8); + buf += 8; + len -= 8; + } +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_des_ct_cbcenc_vtable = { + sizeof(br_des_ct_cbcenc_keys), + 8, + 3, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_des_ct_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_des_ct_cbcenc_run +}; diff --git a/third_party/bearssl/src/des_support.c b/third_party/bearssl/src/des_support.c new file mode 100644 index 0000000..37f6db3 --- /dev/null +++ b/third_party/bearssl/src/des_support.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_des_do_IP(uint32_t *xl, uint32_t *xr) +{ + /* + * Permutation algorithm is initially from Richard Outerbridge; + * implementation here is adapted from Crypto++ "des.cpp" file + * (which is in public domain). + */ + uint32_t l, r, t; + + l = *xl; + r = *xr; + t = ((l >> 4) ^ r) & (uint32_t)0x0F0F0F0F; + r ^= t; + l ^= t << 4; + t = ((l >> 16) ^ r) & (uint32_t)0x0000FFFF; + r ^= t; + l ^= t << 16; + t = ((r >> 2) ^ l) & (uint32_t)0x33333333; + l ^= t; + r ^= t << 2; + t = ((r >> 8) ^ l) & (uint32_t)0x00FF00FF; + l ^= t; + r ^= t << 8; + t = ((l >> 1) ^ r) & (uint32_t)0x55555555; + r ^= t; + l ^= t << 1; + *xl = l; + *xr = r; +} + +/* see inner.h */ +void +br_des_do_invIP(uint32_t *xl, uint32_t *xr) +{ + /* + * See br_des_do_IP(). + */ + uint32_t l, r, t; + + l = *xl; + r = *xr; + t = ((l >> 1) ^ r) & 0x55555555; + r ^= t; + l ^= t << 1; + t = ((r >> 8) ^ l) & 0x00FF00FF; + l ^= t; + r ^= t << 8; + t = ((r >> 2) ^ l) & 0x33333333; + l ^= t; + r ^= t << 2; + t = ((l >> 16) ^ r) & 0x0000FFFF; + r ^= t; + l ^= t << 16; + t = ((l >> 4) ^ r) & 0x0F0F0F0F; + r ^= t; + l ^= t << 4; + *xl = l; + *xr = r; +} + +/* see inner.h */ +void +br_des_keysched_unit(uint32_t *skey, const void *key) +{ + uint32_t xl, xr, kl, kr; + int i; + + xl = br_dec32be(key); + xr = br_dec32be((const unsigned char *)key + 4); + + /* + * Permutation PC-1 is quite similar to the IP permutation. + * Definition of IP (in FIPS 46-3 notations) is: + * 58 50 42 34 26 18 10 2 + * 60 52 44 36 28 20 12 4 + * 62 54 46 38 30 22 14 6 + * 64 56 48 40 32 24 16 8 + * 57 49 41 33 25 17 9 1 + * 59 51 43 35 27 19 11 3 + * 61 53 45 37 29 21 13 5 + * 63 55 47 39 31 23 15 7 + * + * Definition of PC-1 is: + * 57 49 41 33 25 17 9 1 + * 58 50 42 34 26 18 10 2 + * 59 51 43 35 27 19 11 3 + * 60 52 44 36 + * 63 55 47 39 31 23 15 7 + * 62 54 46 38 30 22 14 6 + * 61 53 45 37 29 21 13 5 + * 28 20 12 4 + */ + br_des_do_IP(&xl, &xr); + kl = ((xr & (uint32_t)0xFF000000) >> 4) + | ((xl & (uint32_t)0xFF000000) >> 12) + | ((xr & (uint32_t)0x00FF0000) >> 12) + | ((xl & (uint32_t)0x00FF0000) >> 20); + kr = ((xr & (uint32_t)0x000000FF) << 20) + | ((xl & (uint32_t)0x0000FF00) << 4) + | ((xr & (uint32_t)0x0000FF00) >> 4) + | ((xl & (uint32_t)0x000F0000) >> 16); + + /* + * For each round, rotate the two 28-bit words kl and kr. + * The extraction of the 48-bit subkey (PC-2) is not done yet. + */ + for (i = 0; i < 16; i ++) { + if ((1 << i) & 0x8103) { + kl = (kl << 1) | (kl >> 27); + kr = (kr << 1) | (kr >> 27); + } else { + kl = (kl << 2) | (kl >> 26); + kr = (kr << 2) | (kr >> 26); + } + kl &= (uint32_t)0x0FFFFFFF; + kr &= (uint32_t)0x0FFFFFFF; + skey[(i << 1) + 0] = kl; + skey[(i << 1) + 1] = kr; + } +} + +/* see inner.h */ +void +br_des_rev_skey(uint32_t *skey) +{ + int i; + + for (i = 0; i < 16; i += 2) { + uint32_t t; + + t = skey[i + 0]; + skey[i + 0] = skey[30 - i]; + skey[30 - i] = t; + t = skey[i + 1]; + skey[i + 1] = skey[31 - i]; + skey[31 - i] = t; + } +} diff --git a/third_party/bearssl/src/des_tab.c b/third_party/bearssl/src/des_tab.c new file mode 100644 index 0000000..3f8e4f9 --- /dev/null +++ b/third_party/bearssl/src/des_tab.c @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * PC2left[x] tells where bit x goes when applying PC-2. 'x' is a bit + * position in the left rotated key word. Both position are in normal + * order (rightmost bit is 0). + */ +static const unsigned char PC2left[] = { + 16, 3, 7, 24, 20, 11, 24, + 13, 2, 10, 24, 22, 5, 15, + 23, 1, 9, 21, 12, 24, 6, + 4, 14, 18, 8, 17, 0, 19 +}; + +/* + * Similar to PC2left[x], for the right rotated key word. + */ +static const unsigned char PC2right[] = { + 8, 18, 24, 6, 22, 15, 3, + 10, 12, 19, 5, 14, 11, 24, + 4, 23, 16, 9, 24, 20, 2, + 24, 7, 13, 0, 21, 17, 1 +}; + +/* + * S-boxes and PC-1 merged. + */ +static const uint32_t S1[] = { + 0x00808200, 0x00000000, 0x00008000, 0x00808202, + 0x00808002, 0x00008202, 0x00000002, 0x00008000, + 0x00000200, 0x00808200, 0x00808202, 0x00000200, + 0x00800202, 0x00808002, 0x00800000, 0x00000002, + 0x00000202, 0x00800200, 0x00800200, 0x00008200, + 0x00008200, 0x00808000, 0x00808000, 0x00800202, + 0x00008002, 0x00800002, 0x00800002, 0x00008002, + 0x00000000, 0x00000202, 0x00008202, 0x00800000, + 0x00008000, 0x00808202, 0x00000002, 0x00808000, + 0x00808200, 0x00800000, 0x00800000, 0x00000200, + 0x00808002, 0x00008000, 0x00008200, 0x00800002, + 0x00000200, 0x00000002, 0x00800202, 0x00008202, + 0x00808202, 0x00008002, 0x00808000, 0x00800202, + 0x00800002, 0x00000202, 0x00008202, 0x00808200, + 0x00000202, 0x00800200, 0x00800200, 0x00000000, + 0x00008002, 0x00008200, 0x00000000, 0x00808002 +}; + +static const uint32_t S2[] = { + 0x40084010, 0x40004000, 0x00004000, 0x00084010, + 0x00080000, 0x00000010, 0x40080010, 0x40004010, + 0x40000010, 0x40084010, 0x40084000, 0x40000000, + 0x40004000, 0x00080000, 0x00000010, 0x40080010, + 0x00084000, 0x00080010, 0x40004010, 0x00000000, + 0x40000000, 0x00004000, 0x00084010, 0x40080000, + 0x00080010, 0x40000010, 0x00000000, 0x00084000, + 0x00004010, 0x40084000, 0x40080000, 0x00004010, + 0x00000000, 0x00084010, 0x40080010, 0x00080000, + 0x40004010, 0x40080000, 0x40084000, 0x00004000, + 0x40080000, 0x40004000, 0x00000010, 0x40084010, + 0x00084010, 0x00000010, 0x00004000, 0x40000000, + 0x00004010, 0x40084000, 0x00080000, 0x40000010, + 0x00080010, 0x40004010, 0x40000010, 0x00080010, + 0x00084000, 0x00000000, 0x40004000, 0x00004010, + 0x40000000, 0x40080010, 0x40084010, 0x00084000 +}; + +static const uint32_t S3[] = { + 0x00000104, 0x04010100, 0x00000000, 0x04010004, + 0x04000100, 0x00000000, 0x00010104, 0x04000100, + 0x00010004, 0x04000004, 0x04000004, 0x00010000, + 0x04010104, 0x00010004, 0x04010000, 0x00000104, + 0x04000000, 0x00000004, 0x04010100, 0x00000100, + 0x00010100, 0x04010000, 0x04010004, 0x00010104, + 0x04000104, 0x00010100, 0x00010000, 0x04000104, + 0x00000004, 0x04010104, 0x00000100, 0x04000000, + 0x04010100, 0x04000000, 0x00010004, 0x00000104, + 0x00010000, 0x04010100, 0x04000100, 0x00000000, + 0x00000100, 0x00010004, 0x04010104, 0x04000100, + 0x04000004, 0x00000100, 0x00000000, 0x04010004, + 0x04000104, 0x00010000, 0x04000000, 0x04010104, + 0x00000004, 0x00010104, 0x00010100, 0x04000004, + 0x04010000, 0x04000104, 0x00000104, 0x04010000, + 0x00010104, 0x00000004, 0x04010004, 0x00010100 +}; + +static const uint32_t S4[] = { + 0x80401000, 0x80001040, 0x80001040, 0x00000040, + 0x00401040, 0x80400040, 0x80400000, 0x80001000, + 0x00000000, 0x00401000, 0x00401000, 0x80401040, + 0x80000040, 0x00000000, 0x00400040, 0x80400000, + 0x80000000, 0x00001000, 0x00400000, 0x80401000, + 0x00000040, 0x00400000, 0x80001000, 0x00001040, + 0x80400040, 0x80000000, 0x00001040, 0x00400040, + 0x00001000, 0x00401040, 0x80401040, 0x80000040, + 0x00400040, 0x80400000, 0x00401000, 0x80401040, + 0x80000040, 0x00000000, 0x00000000, 0x00401000, + 0x00001040, 0x00400040, 0x80400040, 0x80000000, + 0x80401000, 0x80001040, 0x80001040, 0x00000040, + 0x80401040, 0x80000040, 0x80000000, 0x00001000, + 0x80400000, 0x80001000, 0x00401040, 0x80400040, + 0x80001000, 0x00001040, 0x00400000, 0x80401000, + 0x00000040, 0x00400000, 0x00001000, 0x00401040 +}; + +static const uint32_t S5[] = { + 0x00000080, 0x01040080, 0x01040000, 0x21000080, + 0x00040000, 0x00000080, 0x20000000, 0x01040000, + 0x20040080, 0x00040000, 0x01000080, 0x20040080, + 0x21000080, 0x21040000, 0x00040080, 0x20000000, + 0x01000000, 0x20040000, 0x20040000, 0x00000000, + 0x20000080, 0x21040080, 0x21040080, 0x01000080, + 0x21040000, 0x20000080, 0x00000000, 0x21000000, + 0x01040080, 0x01000000, 0x21000000, 0x00040080, + 0x00040000, 0x21000080, 0x00000080, 0x01000000, + 0x20000000, 0x01040000, 0x21000080, 0x20040080, + 0x01000080, 0x20000000, 0x21040000, 0x01040080, + 0x20040080, 0x00000080, 0x01000000, 0x21040000, + 0x21040080, 0x00040080, 0x21000000, 0x21040080, + 0x01040000, 0x00000000, 0x20040000, 0x21000000, + 0x00040080, 0x01000080, 0x20000080, 0x00040000, + 0x00000000, 0x20040000, 0x01040080, 0x20000080 +}; + +static const uint32_t S6[] = { + 0x10000008, 0x10200000, 0x00002000, 0x10202008, + 0x10200000, 0x00000008, 0x10202008, 0x00200000, + 0x10002000, 0x00202008, 0x00200000, 0x10000008, + 0x00200008, 0x10002000, 0x10000000, 0x00002008, + 0x00000000, 0x00200008, 0x10002008, 0x00002000, + 0x00202000, 0x10002008, 0x00000008, 0x10200008, + 0x10200008, 0x00000000, 0x00202008, 0x10202000, + 0x00002008, 0x00202000, 0x10202000, 0x10000000, + 0x10002000, 0x00000008, 0x10200008, 0x00202000, + 0x10202008, 0x00200000, 0x00002008, 0x10000008, + 0x00200000, 0x10002000, 0x10000000, 0x00002008, + 0x10000008, 0x10202008, 0x00202000, 0x10200000, + 0x00202008, 0x10202000, 0x00000000, 0x10200008, + 0x00000008, 0x00002000, 0x10200000, 0x00202008, + 0x00002000, 0x00200008, 0x10002008, 0x00000000, + 0x10202000, 0x10000000, 0x00200008, 0x10002008 +}; + +static const uint32_t S7[] = { + 0x00100000, 0x02100001, 0x02000401, 0x00000000, + 0x00000400, 0x02000401, 0x00100401, 0x02100400, + 0x02100401, 0x00100000, 0x00000000, 0x02000001, + 0x00000001, 0x02000000, 0x02100001, 0x00000401, + 0x02000400, 0x00100401, 0x00100001, 0x02000400, + 0x02000001, 0x02100000, 0x02100400, 0x00100001, + 0x02100000, 0x00000400, 0x00000401, 0x02100401, + 0x00100400, 0x00000001, 0x02000000, 0x00100400, + 0x02000000, 0x00100400, 0x00100000, 0x02000401, + 0x02000401, 0x02100001, 0x02100001, 0x00000001, + 0x00100001, 0x02000000, 0x02000400, 0x00100000, + 0x02100400, 0x00000401, 0x00100401, 0x02100400, + 0x00000401, 0x02000001, 0x02100401, 0x02100000, + 0x00100400, 0x00000000, 0x00000001, 0x02100401, + 0x00000000, 0x00100401, 0x02100000, 0x00000400, + 0x02000001, 0x02000400, 0x00000400, 0x00100001 +}; + +static const uint32_t S8[] = { + 0x08000820, 0x00000800, 0x00020000, 0x08020820, + 0x08000000, 0x08000820, 0x00000020, 0x08000000, + 0x00020020, 0x08020000, 0x08020820, 0x00020800, + 0x08020800, 0x00020820, 0x00000800, 0x00000020, + 0x08020000, 0x08000020, 0x08000800, 0x00000820, + 0x00020800, 0x00020020, 0x08020020, 0x08020800, + 0x00000820, 0x00000000, 0x00000000, 0x08020020, + 0x08000020, 0x08000800, 0x00020820, 0x00020000, + 0x00020820, 0x00020000, 0x08020800, 0x00000800, + 0x00000020, 0x08020020, 0x00000800, 0x00020820, + 0x08000800, 0x00000020, 0x08000020, 0x08020000, + 0x08020020, 0x08000000, 0x00020000, 0x08000820, + 0x00000000, 0x08020820, 0x00020020, 0x08000020, + 0x08020000, 0x08000800, 0x08000820, 0x00000000, + 0x08020820, 0x00020800, 0x00020800, 0x00000820, + 0x00000820, 0x00020020, 0x08000000, 0x08020800 +}; + +static inline uint32_t +Fconf(uint32_t r0, uint32_t skl, uint32_t skr) +{ + uint32_t r1; + + r1 = (r0 << 16) | (r0 >> 16); + return + S1[((r1 >> 11) ^ (skl >> 18)) & 0x3F] + | S2[((r0 >> 23) ^ (skl >> 12)) & 0x3F] + | S3[((r0 >> 19) ^ (skl >> 6)) & 0x3F] + | S4[((r0 >> 15) ^ (skl )) & 0x3F] + | S5[((r0 >> 11) ^ (skr >> 18)) & 0x3F] + | S6[((r0 >> 7) ^ (skr >> 12)) & 0x3F] + | S7[((r0 >> 3) ^ (skr >> 6)) & 0x3F] + | S8[((r1 >> 15) ^ (skr )) & 0x3F]; +} + +static void +process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *skey) +{ + int i; + uint32_t l, r; + + l = *pl; + r = *pr; + for (i = 0; i < 16; i ++) { + uint32_t t; + + t = l ^ Fconf(r, skey[(i << 1) + 0], skey[(i << 1) + 1]); + l = r; + r = t; + } + *pl = r; + *pr = l; +} + +/* see inner.h */ +void +br_des_tab_process_block(unsigned num_rounds, const uint32_t *skey, void *block) +{ + unsigned char *buf; + uint32_t l, r; + + buf = block; + l = br_dec32be(buf); + r = br_dec32be(buf + 4); + br_des_do_IP(&l, &r); + while (num_rounds -- > 0) { + process_block_unit(&l, &r, skey); + skey += 32; + } + br_des_do_invIP(&l, &r); + br_enc32be(buf, l); + br_enc32be(buf + 4, r); +} + +static void +keysched_unit(uint32_t *skey, const void *key) +{ + int i; + + br_des_keysched_unit(skey, key); + + /* + * Apply PC-2 to get the 48-bit subkeys. + */ + for (i = 0; i < 16; i ++) { + uint32_t xl, xr, ul, ur; + int j; + + xl = skey[(i << 1) + 0]; + xr = skey[(i << 1) + 1]; + ul = 0; + ur = 0; + for (j = 0; j < 28; j ++) { + ul |= (xl & 1) << PC2left[j]; + ur |= (xr & 1) << PC2right[j]; + xl >>= 1; + xr >>= 1; + } + skey[(i << 1) + 0] = ul; + skey[(i << 1) + 1] = ur; + } +} + +/* see inner.h */ +unsigned +br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len) +{ + switch (key_len) { + case 8: + keysched_unit(skey, key); + return 1; + case 16: + keysched_unit(skey, key); + keysched_unit(skey + 32, (const unsigned char *)key + 8); + br_des_rev_skey(skey + 32); + memcpy(skey + 64, skey, 32 * sizeof *skey); + return 3; + default: + keysched_unit(skey, key); + keysched_unit(skey + 32, (const unsigned char *)key + 8); + br_des_rev_skey(skey + 32); + keysched_unit(skey + 64, (const unsigned char *)key + 16); + return 3; + } +} diff --git a/third_party/bearssl/src/des_tab_cbcdec.c b/third_party/bearssl/src/des_tab_cbcdec.c new file mode 100644 index 0000000..e7eabe9 --- /dev/null +++ b/third_party/bearssl/src/des_tab_cbcdec.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_des_tab_cbcdec_vtable; + ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len); + if (len == 8) { + br_des_rev_skey(ctx->skey); + } else { + int i; + + for (i = 0; i < 48; i += 2) { + uint32_t t; + + t = ctx->skey[i]; + ctx->skey[i] = ctx->skey[94 - i]; + ctx->skey[94 - i] = t; + t = ctx->skey[i + 1]; + ctx->skey[i + 1] = ctx->skey[95 - i]; + ctx->skey[95 - i] = t; + } + } +} + +/* see bearssl_block.h */ +void +br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + + ivbuf = iv; + buf = data; + while (len > 0) { + unsigned char tmp[8]; + int i; + + memcpy(tmp, buf, 8); + br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf); + for (i = 0; i < 8; i ++) { + buf[i] ^= ivbuf[i]; + } + memcpy(ivbuf, tmp, 8); + buf += 8; + len -= 8; + } +} + +/* see bearssl_block.h */ +const br_block_cbcdec_class br_des_tab_cbcdec_vtable = { + sizeof(br_des_tab_cbcdec_keys), + 8, + 3, + (void (*)(const br_block_cbcdec_class **, const void *, size_t)) + &br_des_tab_cbcdec_init, + (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t)) + &br_des_tab_cbcdec_run +}; diff --git a/third_party/bearssl/src/des_tab_cbcenc.c b/third_party/bearssl/src/des_tab_cbcenc.c new file mode 100644 index 0000000..3a45ba3 --- /dev/null +++ b/third_party/bearssl/src/des_tab_cbcenc.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_block.h */ +void +br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx, + const void *key, size_t len) +{ + ctx->vtable = &br_des_tab_cbcenc_vtable; + ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len); +} + +/* see bearssl_block.h */ +void +br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx, + void *iv, void *data, size_t len) +{ + unsigned char *buf, *ivbuf; + + ivbuf = iv; + buf = data; + while (len > 0) { + int i; + + for (i = 0; i < 8; i ++) { + buf[i] ^= ivbuf[i]; + } + br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf); + memcpy(ivbuf, buf, 8); + buf += 8; + len -= 8; + } +} + +/* see bearssl_block.h */ +const br_block_cbcenc_class br_des_tab_cbcenc_vtable = { + sizeof(br_des_tab_cbcenc_keys), + 8, + 3, + (void (*)(const br_block_cbcenc_class **, const void *, size_t)) + &br_des_tab_cbcenc_init, + (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) + &br_des_tab_cbcenc_run +}; diff --git a/third_party/bearssl/src/dig_oid.c b/third_party/bearssl/src/dig_oid.c new file mode 100644 index 0000000..cd9692c --- /dev/null +++ b/third_party/bearssl/src/dig_oid.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This file contains the encoded OID for the standard hash functions. + * Such OID appear in, for instance, the PKCS#1 v1.5 padding for RSA + * signatures. + */ + +static const unsigned char md5_OID[] = { + 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05 +}; + +static const unsigned char sha1_OID[] = { + 0x2B, 0x0E, 0x03, 0x02, 0x1A +}; + +static const unsigned char sha224_OID[] = { + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04 +}; + +static const unsigned char sha256_OID[] = { + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01 +}; + +static const unsigned char sha384_OID[] = { + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02 +}; + +static const unsigned char sha512_OID[] = { + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03 +}; + +/* see inner.h */ +const unsigned char * +br_digest_OID(int digest_id, size_t *len) +{ + switch (digest_id) { + case br_md5_ID: + *len = sizeof md5_OID; + return md5_OID; + case br_sha1_ID: + *len = sizeof sha1_OID; + return sha1_OID; + case br_sha224_ID: + *len = sizeof sha224_OID; + return sha224_OID; + case br_sha256_ID: + *len = sizeof sha256_OID; + return sha256_OID; + case br_sha384_ID: + *len = sizeof sha384_OID; + return sha384_OID; + case br_sha512_ID: + *len = sizeof sha512_OID; + return sha512_OID; + default: + *len = 0; + return NULL; + } +} diff --git a/third_party/bearssl/src/dig_size.c b/third_party/bearssl/src/dig_size.c new file mode 100644 index 0000000..4625d2c --- /dev/null +++ b/third_party/bearssl/src/dig_size.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +size_t +br_digest_size_by_ID(int digest_id) +{ + switch (digest_id) { + case br_md5sha1_ID: + return br_md5_SIZE + br_sha1_SIZE; + case br_md5_ID: + return br_md5_SIZE; + case br_sha1_ID: + return br_sha1_SIZE; + case br_sha224_ID: + return br_sha224_SIZE; + case br_sha256_ID: + return br_sha256_SIZE; + case br_sha384_ID: + return br_sha384_SIZE; + case br_sha512_ID: + return br_sha512_SIZE; + default: + /* abort(); */ + return 0; + } +} diff --git a/third_party/bearssl/src/eax.c b/third_party/bearssl/src/eax.c new file mode 100644 index 0000000..bcc704a --- /dev/null +++ b/third_party/bearssl/src/eax.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Implementation Notes + * ==================== + * + * The combined CTR + CBC-MAC functions can only handle full blocks, + * so some buffering is necessary. Moreover, EAX has a special padding + * rule for CBC-MAC, which implies that we cannot compute the MAC over + * the last received full block until we know whether we are at the + * end of the data or not. + * + * - 'ptr' contains a value from 1 to 16, which is the number of bytes + * accumulated in buf[] that still needs to be processed with the + * current OMAC computation. Beware that this can go to 16: a + * complete block cannot be processed until it is known whether it + * is the last block or not. However, it can never be 0, because + * OMAC^t works on an input that is at least one-block long. + * + * - When processing the message itself, CTR encryption/decryption is + * also done at the same time. The first 'ptr' bytes of buf[] then + * contains the encrypted bytes, while the last '16 - ptr' bytes of + * buf[] are the remnants of the stream block, to be used against + * the next input bytes, when available. + * + * - The current counter and running CBC-MAC values are kept in 'ctr' + * and 'cbcmac', respectively. + * + * - The derived keys for padding are kept in L2 and L4 (double and + * quadruple of Enc_K(0^n), in GF(2^128), respectively). + */ + +/* + * Start an OMAC computation; the first block is the big-endian + * representation of the provided value ('val' must fit on one byte). + * We make it a delayed block because it may also be the last one, + */ +static void +omac_start(br_eax_context *ctx, unsigned val) +{ + memset(ctx->cbcmac, 0, sizeof ctx->cbcmac); + memset(ctx->buf, 0, sizeof ctx->buf); + ctx->buf[15] = val; + ctx->ptr = 16; +} + +/* + * Double a value in finite field GF(2^128), defined with modulus + * X^128+X^7+X^2+X+1. + */ +static void +double_gf128(unsigned char *dst, const unsigned char *src) +{ + unsigned cc; + int i; + + cc = 0x87 & -((unsigned)src[0] >> 7); + for (i = 15; i >= 0; i --) { + unsigned z; + + z = (src[i] << 1) ^ cc; + cc = z >> 8; + dst[i] = (unsigned char)z; + } +} + +/* + * Apply padding to the last block, currently in ctx->buf (with + * ctx->ptr bytes), and finalize OMAC computation. + */ +static void +do_pad(br_eax_context *ctx) +{ + unsigned char *pad; + size_t ptr, u; + + ptr = ctx->ptr; + if (ptr == 16) { + pad = ctx->L2; + } else { + ctx->buf[ptr ++] = 0x80; + memset(ctx->buf + ptr, 0x00, 16 - ptr); + pad = ctx->L4; + } + for (u = 0; u < sizeof ctx->buf; u ++) { + ctx->buf[u] ^= pad[u]; + } + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf); +} + +/* + * Apply CBC-MAC on the provided data, with buffering management. + * + * Upon entry, two situations are acceptable: + * + * ctx->ptr == 0: there is no data to process in ctx->buf + * ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf + * + * Upon exit, ctx->ptr may be zero only if it was already zero on entry, + * and len == 0. In all other situations, ctx->ptr will be non-zero on + * exit (and may have value 16). + */ +static void +do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len) +{ + size_t ptr; + + if (len == 0) { + return; + } + ptr = len & (size_t)15; + if (ptr == 0) { + len -= 16; + ptr = 16; + } else { + len -= ptr; + } + if (ctx->ptr == 16) { + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + } + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len); + memcpy(ctx->buf, (const unsigned char *)data + len, ptr); + ctx->ptr = ptr; +} + +/* see bearssl_aead.h */ +void +br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx) +{ + unsigned char tmp[16], iv[16]; + + ctx->vtable = &br_eax_vtable; + ctx->bctx = bctx; + + /* + * Encrypt a whole-zero block to compute L2 and L4. + */ + memset(tmp, 0, sizeof tmp); + memset(iv, 0, sizeof iv); + (*bctx)->ctr(bctx, iv, tmp, sizeof tmp); + double_gf128(ctx->L2, tmp); + double_gf128(ctx->L4, ctx->L2); +} + +/* see bearssl_aead.h */ +void +br_eax_capture(const br_eax_context *ctx, br_eax_state *st) +{ + /* + * We capture the three OMAC* states _after_ processing the + * initial block (assuming that nonce, message and AAD are + * all non-empty). + */ + int i; + + memset(st->st, 0, sizeof st->st); + for (i = 0; i < 3; i ++) { + unsigned char tmp[16]; + + memset(tmp, 0, sizeof tmp); + tmp[15] = (unsigned char)i; + (*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp); + } +} + +/* see bearssl_aead.h */ +void +br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len) +{ + /* + * Process nonce with OMAC^0. + */ + omac_start(ctx, 0); + do_cbcmac_chunk(ctx, nonce, len); + do_pad(ctx); + memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); + + /* + * Start OMAC^1 for the AAD ("header" in the EAX specification). + */ + omac_start(ctx, 1); + + /* + * We use ctx->head[0] as temporary flag to mark that we are + * using a "normal" reset(). + */ + ctx->head[0] = 0; +} + +/* see bearssl_aead.h */ +void +br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st, + const void *nonce, size_t len) +{ + if (len == 0) { + omac_start(ctx, 0); + } else { + memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac); + ctx->ptr = 0; + do_cbcmac_chunk(ctx, nonce, len); + } + do_pad(ctx); + memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); + + memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac); + ctx->ptr = 0; + + memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr); + + /* + * We use ctx->head[0] as a flag to indicate that we use a + * a recorded state, with ctx->ctr containing the preprocessed + * first block for OMAC^2. + */ + ctx->head[0] = 1; +} + +/* see bearssl_aead.h */ +void +br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st, + const void *nonce, size_t len) +{ + if (len == 0) { + omac_start(ctx, 0); + } else { + memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac); + ctx->ptr = 0; + do_cbcmac_chunk(ctx, nonce, len); + } + do_pad(ctx); + memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); + memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce); + + memcpy(ctx->head, st->st[1], sizeof ctx->head); + + memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac); + ctx->ptr = 0; +} + +/* see bearssl_aead.h */ +void +br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len) +{ + size_t ptr; + + ptr = ctx->ptr; + + /* + * If there is a partial block, first complete it. + */ + if (ptr < 16) { + size_t clen; + + clen = 16 - ptr; + if (len <= clen) { + memcpy(ctx->buf + ptr, data, len); + ctx->ptr = ptr + len; + return; + } + memcpy(ctx->buf + ptr, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + } + + /* + * We now have a full block in buf[], and this is not the last + * block. + */ + do_cbcmac_chunk(ctx, data, len); +} + +/* see bearssl_aead.h */ +void +br_eax_flip(br_eax_context *ctx) +{ + int from_capture; + + /* + * ctx->head[0] may be non-zero if the context was reset with + * a pre-AAD captured state. In that case, ctx->ctr[] contains + * the state for OMAC^2 _after_ processing the first block. + */ + from_capture = ctx->head[0]; + + /* + * Complete the OMAC computation on the AAD. + */ + do_pad(ctx); + memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac); + + /* + * Start OMAC^2 for the encrypted data. + * If the context was initialized from a captured state, then + * the OMAC^2 value is in the ctr[] array. + */ + if (from_capture) { + memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac); + ctx->ptr = 0; + } else { + omac_start(ctx, 2); + } + + /* + * Initial counter value for CTR is the processed nonce. + */ + memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce); +} + +/* see bearssl_aead.h */ +void +br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len) +{ + unsigned char *dbuf; + size_t ptr; + + /* + * Ensure that there is actual data to process. + */ + if (len == 0) { + return; + } + + dbuf = data; + ptr = ctx->ptr; + + /* + * We may have ptr == 0 here if we initialized from a captured + * state. In that case, there is no partially consumed block + * or unprocessed data. + */ + if (ptr != 0 && ptr != 16) { + /* + * We have a partially consumed block. + */ + size_t u, clen; + + clen = 16 - ptr; + if (len <= clen) { + clen = len; + } + if (encrypt) { + for (u = 0; u < clen; u ++) { + ctx->buf[ptr + u] ^= dbuf[u]; + } + memcpy(dbuf, ctx->buf + ptr, clen); + } else { + for (u = 0; u < clen; u ++) { + unsigned dx, sx; + + sx = ctx->buf[ptr + u]; + dx = dbuf[u]; + ctx->buf[ptr + u] = dx; + dbuf[u] = sx ^ dx; + } + } + + if (len <= clen) { + ctx->ptr = ptr + clen; + return; + } + dbuf += clen; + len -= clen; + } + + /* + * We now have a complete encrypted block in buf[] that must still + * be processed with OMAC, and this is not the final buf. + * Exception: when ptr == 0, no block has been produced yet. + */ + if (ptr != 0) { + (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, + ctx->buf, sizeof ctx->buf); + } + + /* + * Do CTR encryption or decryption and CBC-MAC for all full blocks + * except the last. + */ + ptr = len & (size_t)15; + if (ptr == 0) { + len -= 16; + ptr = 16; + } else { + len -= ptr; + } + if (encrypt) { + (*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } else { + (*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, + dbuf, len); + } + dbuf += len; + + /* + * Compute next block of CTR stream, and use it to finish + * encrypting or decrypting the data. + */ + memset(ctx->buf, 0, sizeof ctx->buf); + (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf); + if (encrypt) { + size_t u; + + for (u = 0; u < ptr; u ++) { + ctx->buf[u] ^= dbuf[u]; + } + memcpy(dbuf, ctx->buf, ptr); + } else { + size_t u; + + for (u = 0; u < ptr; u ++) { + unsigned dx, sx; + + sx = ctx->buf[u]; + dx = dbuf[u]; + ctx->buf[u] = dx; + dbuf[u] = sx ^ dx; + } + } + ctx->ptr = ptr; +} + +/* + * Complete tag computation. The final tag is written in ctx->cbcmac. + */ +static void +do_final(br_eax_context *ctx) +{ + size_t u; + + do_pad(ctx); + + /* + * Authentication tag is the XOR of the three OMAC outputs for + * the nonce, AAD and encrypted data. + */ + for (u = 0; u < 16; u ++) { + ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u]; + } +} + +/* see bearssl_aead.h */ +void +br_eax_get_tag(br_eax_context *ctx, void *tag) +{ + do_final(ctx); + memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac); +} + +/* see bearssl_aead.h */ +void +br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len) +{ + do_final(ctx); + memcpy(tag, ctx->cbcmac, len); +} + +/* see bearssl_aead.h */ +uint32_t +br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len) +{ + unsigned char tmp[16]; + size_t u; + int x; + + br_eax_get_tag(ctx, tmp); + x = 0; + for (u = 0; u < len; u ++) { + x |= tmp[u] ^ ((const unsigned char *)tag)[u]; + } + return EQ0(x); +} + +/* see bearssl_aead.h */ +uint32_t +br_eax_check_tag(br_eax_context *ctx, const void *tag) +{ + return br_eax_check_tag_trunc(ctx, tag, 16); +} + +/* see bearssl_aead.h */ +const br_aead_class br_eax_vtable = { + 16, + (void (*)(const br_aead_class **, const void *, size_t)) + &br_eax_reset, + (void (*)(const br_aead_class **, const void *, size_t)) + &br_eax_aad_inject, + (void (*)(const br_aead_class **)) + &br_eax_flip, + (void (*)(const br_aead_class **, int, void *, size_t)) + &br_eax_run, + (void (*)(const br_aead_class **, void *)) + &br_eax_get_tag, + (uint32_t (*)(const br_aead_class **, const void *)) + &br_eax_check_tag, + (void (*)(const br_aead_class **, void *, size_t)) + &br_eax_get_tag_trunc, + (uint32_t (*)(const br_aead_class **, const void *, size_t)) + &br_eax_check_tag_trunc +}; diff --git a/third_party/bearssl/src/ec_all_m15.c b/third_party/bearssl/src/ec_all_m15.c new file mode 100644 index 0000000..bb550e1 --- /dev/null +++ b/third_party/bearssl/src/ec_all_m15.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + switch (curve) { + case BR_EC_secp256r1: + return br_ec_p256_m15.generator(curve, len); + case BR_EC_curve25519: + return br_ec_c25519_m15.generator(curve, len); + default: + return br_ec_prime_i15.generator(curve, len); + } +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + switch (curve) { + case BR_EC_secp256r1: + return br_ec_p256_m15.order(curve, len); + case BR_EC_curve25519: + return br_ec_c25519_m15.order(curve, len); + default: + return br_ec_prime_i15.order(curve, len); + } +} + +static size_t +api_xoff(int curve, size_t *len) +{ + switch (curve) { + case BR_EC_secp256r1: + return br_ec_p256_m15.xoff(curve, len); + case BR_EC_curve25519: + return br_ec_c25519_m15.xoff(curve, len); + default: + return br_ec_prime_i15.xoff(curve, len); + } +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + switch (curve) { + case BR_EC_secp256r1: + return br_ec_p256_m15.mul(G, Glen, kb, kblen, curve); + case BR_EC_curve25519: + return br_ec_c25519_m15.mul(G, Glen, kb, kblen, curve); + default: + return br_ec_prime_i15.mul(G, Glen, kb, kblen, curve); + } +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + switch (curve) { + case BR_EC_secp256r1: + return br_ec_p256_m15.mulgen(R, x, xlen, curve); + case BR_EC_curve25519: + return br_ec_c25519_m15.mulgen(R, x, xlen, curve); + default: + return br_ec_prime_i15.mulgen(R, x, xlen, curve); + } +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + switch (curve) { + case BR_EC_secp256r1: + return br_ec_p256_m15.muladd(A, B, len, + x, xlen, y, ylen, curve); + case BR_EC_curve25519: + return br_ec_c25519_m15.muladd(A, B, len, + x, xlen, y, ylen, curve); + default: + return br_ec_prime_i15.muladd(A, B, len, + x, xlen, y, ylen, curve); + } +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_all_m15 = { + (uint32_t)0x23800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_all_m31.c b/third_party/bearssl/src/ec_all_m31.c new file mode 100644 index 0000000..8fd8c3c --- /dev/null +++ b/third_party/bearssl/src/ec_all_m31.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + switch (curve) { + case BR_EC_secp256r1: +#if BR_INT128 || BR_UMUL128 + return br_ec_p256_m64.generator(curve, len); +#else + return br_ec_p256_m31.generator(curve, len); +#endif + case BR_EC_curve25519: +#if BR_INT128 || BR_UMUL128 + return br_ec_c25519_m64.generator(curve, len); +#else + return br_ec_c25519_m31.generator(curve, len); +#endif + default: + return br_ec_prime_i31.generator(curve, len); + } +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + switch (curve) { + case BR_EC_secp256r1: +#if BR_INT128 || BR_UMUL128 + return br_ec_p256_m64.order(curve, len); +#else + return br_ec_p256_m31.order(curve, len); +#endif + case BR_EC_curve25519: +#if BR_INT128 || BR_UMUL128 + return br_ec_c25519_m64.order(curve, len); +#else + return br_ec_c25519_m31.order(curve, len); +#endif + default: + return br_ec_prime_i31.order(curve, len); + } +} + +static size_t +api_xoff(int curve, size_t *len) +{ + switch (curve) { + case BR_EC_secp256r1: +#if BR_INT128 || BR_UMUL128 + return br_ec_p256_m64.xoff(curve, len); +#else + return br_ec_p256_m31.xoff(curve, len); +#endif + case BR_EC_curve25519: +#if BR_INT128 || BR_UMUL128 + return br_ec_c25519_m64.xoff(curve, len); +#else + return br_ec_c25519_m31.xoff(curve, len); +#endif + default: + return br_ec_prime_i31.xoff(curve, len); + } +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + switch (curve) { + case BR_EC_secp256r1: +#if BR_INT128 || BR_UMUL128 + return br_ec_p256_m64.mul(G, Glen, kb, kblen, curve); +#else + return br_ec_p256_m31.mul(G, Glen, kb, kblen, curve); +#endif + case BR_EC_curve25519: +#if BR_INT128 || BR_UMUL128 + return br_ec_c25519_m64.mul(G, Glen, kb, kblen, curve); +#else + return br_ec_c25519_m31.mul(G, Glen, kb, kblen, curve); +#endif + default: + return br_ec_prime_i31.mul(G, Glen, kb, kblen, curve); + } +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + switch (curve) { + case BR_EC_secp256r1: +#if BR_INT128 || BR_UMUL128 + return br_ec_p256_m64.mulgen(R, x, xlen, curve); +#else + return br_ec_p256_m31.mulgen(R, x, xlen, curve); +#endif + case BR_EC_curve25519: +#if BR_INT128 || BR_UMUL128 + return br_ec_c25519_m64.mulgen(R, x, xlen, curve); +#else + return br_ec_c25519_m31.mulgen(R, x, xlen, curve); +#endif + default: + return br_ec_prime_i31.mulgen(R, x, xlen, curve); + } +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + switch (curve) { + case BR_EC_secp256r1: +#if BR_INT128 || BR_UMUL128 + return br_ec_p256_m64.muladd(A, B, len, + x, xlen, y, ylen, curve); +#else + return br_ec_p256_m31.muladd(A, B, len, + x, xlen, y, ylen, curve); +#endif + case BR_EC_curve25519: +#if BR_INT128 || BR_UMUL128 + return br_ec_c25519_m64.muladd(A, B, len, + x, xlen, y, ylen, curve); +#else + return br_ec_c25519_m31.muladd(A, B, len, + x, xlen, y, ylen, curve); +#endif + default: + return br_ec_prime_i31.muladd(A, B, len, + x, xlen, y, ylen, curve); + } +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_all_m31 = { + (uint32_t)0x23800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_c25519_i15.c b/third_party/bearssl/src/ec_c25519_i15.c new file mode 100644 index 0000000..8fadcf4 --- /dev/null +++ b/third_party/bearssl/src/ec_c25519_i15.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Parameters for the field: + * - field modulus p = 2^255-19 + * - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p) + */ + +static const uint16_t C255_P[] = { + 0x0110, + 0x7FED, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF +}; + +#define P0I 0x4A1B + +static const uint16_t C255_R2[] = { + 0x0110, + 0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000 +}; + +/* obsolete +#include <stdio.h> +#include <stdlib.h> +static void +print_int_mont(const char *name, const uint16_t *x) +{ + uint16_t y[18]; + unsigned char tmp[32]; + size_t u; + + printf("%s = ", name); + memcpy(y, x, sizeof y); + br_i15_from_monty(y, C255_P, P0I); + br_i15_encode(tmp, sizeof tmp, y); + for (u = 0; u < sizeof tmp; u ++) { + printf("%02X", tmp[u]); + } + printf("\n"); +} +*/ + +static const uint16_t C255_A24[] = { + 0x0110, + 0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000 +}; + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return GEN; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return ORDER; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + +static void +cswap(uint16_t *a, uint16_t *b, uint32_t ctl) +{ + int i; + + ctl = -ctl; + for (i = 0; i < 18; i ++) { + uint32_t aw, bw, tw; + + aw = a[i]; + bw = b[i]; + tw = ctl & (aw ^ bw); + a[i] = aw ^ tw; + b[i] = bw ^ tw; + } +} + +static void +c255_add(uint16_t *d, const uint16_t *a, const uint16_t *b) +{ + uint32_t ctl; + uint16_t t[18]; + + memcpy(t, a, sizeof t); + ctl = br_i15_add(t, b, 1); + ctl |= NOT(br_i15_sub(t, C255_P, 0)); + br_i15_sub(t, C255_P, ctl); + memcpy(d, t, sizeof t); +} + +static void +c255_sub(uint16_t *d, const uint16_t *a, const uint16_t *b) +{ + uint16_t t[18]; + + memcpy(t, a, sizeof t); + br_i15_add(t, C255_P, br_i15_sub(t, b, 1)); + memcpy(d, t, sizeof t); +} + +static void +c255_mul(uint16_t *d, const uint16_t *a, const uint16_t *b) +{ + uint16_t t[18]; + + br_i15_montymul(t, a, b, C255_P, P0I); + memcpy(d, t, sizeof t); +} + +static void +byteswap(unsigned char *G) +{ + int i; + + for (i = 0; i < 16; i ++) { + unsigned char t; + + t = G[i]; + G[i] = G[31 - i]; + G[31 - i] = t; + } +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ +#define ILEN (18 * sizeof(uint16_t)) + + /* + * The a[] and b[] arrays have an extra word to allow for + * decoding without using br_i15_decode_reduce(). + */ + uint16_t x1[18], x2[18], x3[18], z2[18], z3[18]; + uint16_t a[19], aa[18], b[19], bb[18]; + uint16_t c[18], d[18], e[18], da[18], cb[18]; + unsigned char k[32]; + uint32_t swap; + int i; + + (void)curve; + + /* + * Points are encoded over exactly 32 bytes. Multipliers must fit + * in 32 bytes as well. + * RFC 7748 mandates that the high bit of the last point byte must + * be ignored/cleared. + */ + if (Glen != 32 || kblen > 32) { + return 0; + } + G[31] &= 0x7F; + + /* + * Byteswap the point encoding, because it uses little-endian, and + * the generic decoding routine uses big-endian. + */ + byteswap(G); + + /* + * Decode the point ('u' coordinate). This should be reduced + * modulo p, but we prefer to avoid the dependency on + * br_i15_decode_reduce(). Instead, we use br_i15_decode_mod() + * with a synthetic modulus of value 2^255 (this must work + * since G was truncated to 255 bits), then use a conditional + * subtraction. We use br_i15_decode_mod() and not + * br_i15_decode(), because the ec_prime_i15 implementation uses + * the former but not the latter. + * br_i15_decode_reduce(a, G, 32, C255_P); + */ + br_i15_zero(b, 0x111); + b[18] = 1; + br_i15_decode_mod(a, G, 32, b); + a[0] = 0x110; + br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0))); + + /* + * Initialise variables x1, x2, z2, x3 and z3. We set all of them + * into Montgomery representation. + */ + br_i15_montymul(x1, a, C255_R2, C255_P, P0I); + memcpy(x3, x1, ILEN); + br_i15_zero(z2, C255_P[0]); + memcpy(x2, z2, ILEN); + x2[1] = 19; + memcpy(z3, x2, ILEN); + + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + /* obsolete + print_int_mont("x1", x1); + */ + + swap = 0; + for (i = 254; i >= 0; i --) { + uint32_t kt; + + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; + swap ^= kt; + cswap(x2, x3, swap); + cswap(z2, z3, swap); + swap = kt; + + /* obsolete + print_int_mont("x2", x2); + print_int_mont("z2", z2); + print_int_mont("x3", x3); + print_int_mont("z3", z3); + */ + + c255_add(a, x2, z2); + c255_mul(aa, a, a); + c255_sub(b, x2, z2); + c255_mul(bb, b, b); + c255_sub(e, aa, bb); + c255_add(c, x3, z3); + c255_sub(d, x3, z3); + c255_mul(da, d, a); + c255_mul(cb, c, b); + + /* obsolete + print_int_mont("a ", a); + print_int_mont("aa", aa); + print_int_mont("b ", b); + print_int_mont("bb", bb); + print_int_mont("e ", e); + print_int_mont("c ", c); + print_int_mont("d ", d); + print_int_mont("da", da); + print_int_mont("cb", cb); + */ + + c255_add(x3, da, cb); + c255_mul(x3, x3, x3); + c255_sub(z3, da, cb); + c255_mul(z3, z3, z3); + c255_mul(z3, z3, x1); + c255_mul(x2, aa, bb); + c255_mul(z2, C255_A24, e); + c255_add(z2, z2, aa); + c255_mul(z2, e, z2); + + /* obsolete + print_int_mont("x2", x2); + print_int_mont("z2", z2); + print_int_mont("x3", x3); + print_int_mont("z3", z3); + */ + } + cswap(x2, x3, swap); + cswap(z2, z3, swap); + + /* + * Inverse z2 with a modular exponentiation. This is a simple + * square-and-multiply algorithm; we mutualise most non-squarings + * since the exponent contains almost only ones. + */ + memcpy(a, z2, ILEN); + for (i = 0; i < 15; i ++) { + c255_mul(a, a, a); + c255_mul(a, a, z2); + } + memcpy(b, a, ILEN); + for (i = 0; i < 14; i ++) { + int j; + + for (j = 0; j < 16; j ++) { + c255_mul(b, b, b); + } + c255_mul(b, b, a); + } + for (i = 14; i >= 0; i --) { + c255_mul(b, b, b); + if ((0xFFEB >> i) & 1) { + c255_mul(b, z2, b); + } + } + c255_mul(b, x2, b); + + /* + * To avoid a dependency on br_i15_from_monty(), we use a + * Montgomery multiplication with 1. + * memcpy(x2, b, ILEN); + * br_i15_from_monty(x2, C255_P, P0I); + */ + br_i15_zero(a, C255_P[0]); + a[1] = 1; + br_i15_montymul(x2, a, b, C255_P, P0I); + + br_i15_encode(G, 32, x2); + byteswap(G); + return 1; + +#undef ILEN +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We don't implement this method, since it is used for ECDSA + * only, and there is no ECDSA over Curve25519 (which instead + * uses EdDSA). + */ + (void)A; + (void)B; + (void)len; + (void)x; + (void)xlen; + (void)y; + (void)ylen; + (void)curve; + return 0; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_c25519_i15 = { + (uint32_t)0x20000000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_c25519_i31.c b/third_party/bearssl/src/ec_c25519_i31.c new file mode 100644 index 0000000..f8ffc2c --- /dev/null +++ b/third_party/bearssl/src/ec_c25519_i31.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Parameters for the field: + * - field modulus p = 2^255-19 + * - R^2 mod p (R = 2^(31k) for the smallest k such that R >= p) + */ + +static const uint32_t C255_P[] = { + 0x00000107, + 0x7FFFFFED, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0000007F +}; + +#define P0I 0x286BCA1B + +static const uint32_t C255_R2[] = { + 0x00000107, + 0x00000000, 0x02D20000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + +static const uint32_t C255_A24[] = { + 0x00000107, + 0x53000000, 0x0000468B, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + +/* obsolete +#include <stdio.h> +#include <stdlib.h> +static void +print_int_mont(const char *name, const uint32_t *x) +{ + uint32_t y[10]; + unsigned char tmp[32]; + size_t u; + + printf("%s = ", name); + memcpy(y, x, sizeof y); + br_i31_from_monty(y, C255_P, P0I); + br_i31_encode(tmp, sizeof tmp, y); + for (u = 0; u < sizeof tmp; u ++) { + printf("%02X", tmp[u]); + } + printf("\n"); +} +*/ + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return GEN; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return ORDER; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + +static void +cswap(uint32_t *a, uint32_t *b, uint32_t ctl) +{ + int i; + + ctl = -ctl; + for (i = 0; i < 10; i ++) { + uint32_t aw, bw, tw; + + aw = a[i]; + bw = b[i]; + tw = ctl & (aw ^ bw); + a[i] = aw ^ tw; + b[i] = bw ^ tw; + } +} + +static void +c255_add(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t ctl; + uint32_t t[10]; + + memcpy(t, a, sizeof t); + ctl = br_i31_add(t, b, 1); + ctl |= NOT(br_i31_sub(t, C255_P, 0)); + br_i31_sub(t, C255_P, ctl); + memcpy(d, t, sizeof t); +} + +static void +c255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[10]; + + memcpy(t, a, sizeof t); + br_i31_add(t, C255_P, br_i31_sub(t, b, 1)); + memcpy(d, t, sizeof t); +} + +static void +c255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[10]; + + br_i31_montymul(t, a, b, C255_P, P0I); + memcpy(d, t, sizeof t); +} + +static void +byteswap(unsigned char *G) +{ + int i; + + for (i = 0; i < 16; i ++) { + unsigned char t; + + t = G[i]; + G[i] = G[31 - i]; + G[31 - i] = t; + } +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + uint32_t x1[10], x2[10], x3[10], z2[10], z3[10]; + uint32_t a[10], aa[10], b[10], bb[10]; + uint32_t c[10], d[10], e[10], da[10], cb[10]; + unsigned char k[32]; + uint32_t swap; + int i; + + (void)curve; + + /* + * Points are encoded over exactly 32 bytes. Multipliers must fit + * in 32 bytes as well. + * RFC 7748 mandates that the high bit of the last point byte must + * be ignored/cleared. + */ + if (Glen != 32 || kblen > 32) { + return 0; + } + G[31] &= 0x7F; + + /* + * Byteswap the point encoding, because it uses little-endian, and + * the generic decoding routine uses big-endian. + */ + byteswap(G); + + /* + * Decode the point ('u' coordinate). This should be reduced + * modulo p, but we prefer to avoid the dependency on + * br_i31_decode_reduce(). Instead, we use br_i31_decode_mod() + * with a synthetic modulus of value 2^255 (this must work + * since G was truncated to 255 bits), then use a conditional + * subtraction. We use br_i31_decode_mod() and not + * br_i31_decode(), because the ec_prime_i31 implementation uses + * the former but not the latter. + * br_i31_decode_reduce(a, G, 32, C255_P); + */ + br_i31_zero(b, 0x108); + b[9] = 0x0080; + br_i31_decode_mod(a, G, 32, b); + a[0] = 0x107; + br_i31_sub(a, C255_P, NOT(br_i31_sub(a, C255_P, 0))); + + /* + * Initialise variables x1, x2, z2, x3 and z3. We set all of them + * into Montgomery representation. + */ + br_i31_montymul(x1, a, C255_R2, C255_P, P0I); + memcpy(x3, x1, sizeof x1); + br_i31_zero(z2, C255_P[0]); + memcpy(x2, z2, sizeof z2); + x2[1] = 0x13000000; + memcpy(z3, x2, sizeof x2); + + /* + * kb[] is in big-endian notation, but possibly shorter than k[]. + */ + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + /* obsolete + print_int_mont("x1", x1); + */ + + swap = 0; + for (i = 254; i >= 0; i --) { + uint32_t kt; + + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; + swap ^= kt; + cswap(x2, x3, swap); + cswap(z2, z3, swap); + swap = kt; + + /* obsolete + print_int_mont("x2", x2); + print_int_mont("z2", z2); + print_int_mont("x3", x3); + print_int_mont("z3", z3); + */ + + c255_add(a, x2, z2); + c255_mul(aa, a, a); + c255_sub(b, x2, z2); + c255_mul(bb, b, b); + c255_sub(e, aa, bb); + c255_add(c, x3, z3); + c255_sub(d, x3, z3); + c255_mul(da, d, a); + c255_mul(cb, c, b); + + /* obsolete + print_int_mont("a ", a); + print_int_mont("aa", aa); + print_int_mont("b ", b); + print_int_mont("bb", bb); + print_int_mont("e ", e); + print_int_mont("c ", c); + print_int_mont("d ", d); + print_int_mont("da", da); + print_int_mont("cb", cb); + */ + + c255_add(x3, da, cb); + c255_mul(x3, x3, x3); + c255_sub(z3, da, cb); + c255_mul(z3, z3, z3); + c255_mul(z3, z3, x1); + c255_mul(x2, aa, bb); + c255_mul(z2, C255_A24, e); + c255_add(z2, z2, aa); + c255_mul(z2, e, z2); + + /* obsolete + print_int_mont("x2", x2); + print_int_mont("z2", z2); + print_int_mont("x3", x3); + print_int_mont("z3", z3); + */ + } + cswap(x2, x3, swap); + cswap(z2, z3, swap); + + /* + * Inverse z2 with a modular exponentiation. This is a simple + * square-and-multiply algorithm; we mutualise most non-squarings + * since the exponent contains almost only ones. + */ + memcpy(a, z2, sizeof z2); + for (i = 0; i < 15; i ++) { + c255_mul(a, a, a); + c255_mul(a, a, z2); + } + memcpy(b, a, sizeof a); + for (i = 0; i < 14; i ++) { + int j; + + for (j = 0; j < 16; j ++) { + c255_mul(b, b, b); + } + c255_mul(b, b, a); + } + for (i = 14; i >= 0; i --) { + c255_mul(b, b, b); + if ((0xFFEB >> i) & 1) { + c255_mul(b, z2, b); + } + } + c255_mul(b, x2, b); + + /* + * To avoid a dependency on br_i31_from_monty(), we use + * a Montgomery multiplication with 1. + * memcpy(x2, b, sizeof b); + * br_i31_from_monty(x2, C255_P, P0I); + */ + br_i31_zero(a, C255_P[0]); + a[1] = 1; + br_i31_montymul(x2, a, b, C255_P, P0I); + + br_i31_encode(G, 32, x2); + byteswap(G); + return 1; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We don't implement this method, since it is used for ECDSA + * only, and there is no ECDSA over Curve25519 (which instead + * uses EdDSA). + */ + (void)A; + (void)B; + (void)len; + (void)x; + (void)xlen; + (void)y; + (void)ylen; + (void)curve; + return 0; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_c25519_i31 = { + (uint32_t)0x20000000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_c25519_m15.c b/third_party/bearssl/src/ec_c25519_m15.c new file mode 100644 index 0000000..deff55b --- /dev/null +++ b/third_party/bearssl/src/ec_c25519_m15.c @@ -0,0 +1,1478 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* obsolete +#include <stdio.h> +#include <stdlib.h> +static void +print_int(const char *name, const uint32_t *x) +{ + size_t u; + unsigned char tmp[36]; + + printf("%s = ", name); + for (u = 0; u < 20; u ++) { + if (x[u] > 0x1FFF) { + printf("INVALID:"); + for (u = 0; u < 20; u ++) { + printf(" %04X", x[u]); + } + printf("\n"); + return; + } + } + memset(tmp, 0, sizeof tmp); + for (u = 0; u < 20; u ++) { + uint32_t w; + int j, k; + + w = x[u]; + j = 13 * (int)u; + k = j & 7; + if (k != 0) { + w <<= k; + j -= k; + } + k = j >> 3; + tmp[35 - k] |= (unsigned char)w; + tmp[34 - k] |= (unsigned char)(w >> 8); + tmp[33 - k] |= (unsigned char)(w >> 16); + tmp[32 - k] |= (unsigned char)(w >> 24); + } + for (u = 4; u < 36; u ++) { + printf("%02X", tmp[u]); + } + printf("\n"); +} +*/ + +/* + * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_ + * that right-shifting a signed negative integer copies the sign bit + * (arithmetic right-shift). This is "implementation-defined behaviour", + * i.e. it is not undefined, but it may differ between compilers. Each + * compiler is supposed to document its behaviour in that respect. GCC + * explicitly defines that an arithmetic right shift is used. We expect + * all other compilers to do the same, because underlying CPU offer an + * arithmetic right shift opcode that could not be used otherwise. + */ +#if BR_NO_ARITH_SHIFT +#define ARSH(x, n) (((uint32_t)(x) >> (n)) \ + | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) +#else +#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) +#endif + +/* + * Convert an integer from unsigned little-endian encoding to a sequence of + * 13-bit words in little-endian order. The final "partial" word is + * returned. + */ +static uint32_t +le8_to_le13(uint32_t *dst, const unsigned char *src, size_t len) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + acc |= (uint32_t)(*src ++) << acc_len; + acc_len += 8; + if (acc_len >= 13) { + *dst ++ = acc & 0x1FFF; + acc >>= 13; + acc_len -= 13; + } + } + return acc; +} + +/* + * Convert an integer (13-bit words, little-endian) to unsigned + * little-endian encoding. The total encoding length is provided; all + * the destination bytes will be filled. + */ +static void +le13_to_le8(unsigned char *dst, size_t len, const uint32_t *src) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + if (acc_len < 8) { + acc |= (*src ++) << acc_len; + acc_len += 13; + } + *dst ++ = (unsigned char)acc; + acc >>= 8; + acc_len -= 8; + } +} + +/* + * Normalise an array of words to a strict 13 bits per word. Returned + * value is the resulting carry. The source (w) and destination (d) + * arrays may be identical, but shall not overlap partially. + */ +static inline uint32_t +norm13(uint32_t *d, const uint32_t *w, size_t len) +{ + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < len; u ++) { + int32_t z; + + z = w[u] + cc; + d[u] = z & 0x1FFF; + cc = ARSH(z, 13); + } + return cc; +} + +/* + * mul20() multiplies two 260-bit integers together. Each word must fit + * on 13 bits; source operands use 20 words, destination operand + * receives 40 words. All overlaps allowed. + * + * square20() computes the square of a 260-bit integer. Each word must + * fit on 13 bits; source operand uses 20 words, destination operand + * receives 40 words. All overlaps allowed. + */ + +#if BR_SLOW_MUL15 + +static void +mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * Two-level Karatsuba: turns a 20x20 multiplication into + * nine 5x5 multiplications. We use 13-bit words but do not + * propagate carries immediately, so words may expand: + * + * - First Karatsuba decomposition turns the 20x20 mul on + * 13-bit words into three 10x10 muls, two on 13-bit words + * and one on 14-bit words. + * + * - Second Karatsuba decomposition further splits these into: + * + * * four 5x5 muls on 13-bit words + * * four 5x5 muls on 14-bit words + * * one 5x5 mul on 15-bit words + * + * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit + * or 15-bit words, respectively. + */ + uint32_t u[45], v[45], w[90]; + uint32_t cc; + int i; + +#define ZADD(dw, d_off, s1w, s1_off, s2w, s2_off) do { \ + (dw)[5 * (d_off) + 0] = (s1w)[5 * (s1_off) + 0] \ + + (s2w)[5 * (s2_off) + 0]; \ + (dw)[5 * (d_off) + 1] = (s1w)[5 * (s1_off) + 1] \ + + (s2w)[5 * (s2_off) + 1]; \ + (dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \ + + (s2w)[5 * (s2_off) + 2]; \ + (dw)[5 * (d_off) + 3] = (s1w)[5 * (s1_off) + 3] \ + + (s2w)[5 * (s2_off) + 3]; \ + (dw)[5 * (d_off) + 4] = (s1w)[5 * (s1_off) + 4] \ + + (s2w)[5 * (s2_off) + 4]; \ + } while (0) + +#define ZADDT(dw, d_off, sw, s_off) do { \ + (dw)[5 * (d_off) + 0] += (sw)[5 * (s_off) + 0]; \ + (dw)[5 * (d_off) + 1] += (sw)[5 * (s_off) + 1]; \ + (dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \ + (dw)[5 * (d_off) + 3] += (sw)[5 * (s_off) + 3]; \ + (dw)[5 * (d_off) + 4] += (sw)[5 * (s_off) + 4]; \ + } while (0) + +#define ZSUB2F(dw, d_off, s1w, s1_off, s2w, s2_off) do { \ + (dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \ + + (s2w)[5 * (s2_off) + 0]; \ + (dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \ + + (s2w)[5 * (s2_off) + 1]; \ + (dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \ + + (s2w)[5 * (s2_off) + 2]; \ + (dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \ + + (s2w)[5 * (s2_off) + 3]; \ + (dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \ + + (s2w)[5 * (s2_off) + 4]; \ + } while (0) + +#define CPR1(w, cprcc) do { \ + uint32_t cprz = (w) + cprcc; \ + (w) = cprz & 0x1FFF; \ + cprcc = cprz >> 13; \ + } while (0) + +#define CPR(dw, d_off) do { \ + uint32_t cprcc; \ + cprcc = 0; \ + CPR1((dw)[(d_off) + 0], cprcc); \ + CPR1((dw)[(d_off) + 1], cprcc); \ + CPR1((dw)[(d_off) + 2], cprcc); \ + CPR1((dw)[(d_off) + 3], cprcc); \ + CPR1((dw)[(d_off) + 4], cprcc); \ + CPR1((dw)[(d_off) + 5], cprcc); \ + CPR1((dw)[(d_off) + 6], cprcc); \ + CPR1((dw)[(d_off) + 7], cprcc); \ + CPR1((dw)[(d_off) + 8], cprcc); \ + (dw)[(d_off) + 9] = cprcc; \ + } while (0) + + memcpy(u, a, 20 * sizeof *a); + ZADD(u, 4, a, 0, a, 1); + ZADD(u, 5, a, 2, a, 3); + ZADD(u, 6, a, 0, a, 2); + ZADD(u, 7, a, 1, a, 3); + ZADD(u, 8, u, 6, u, 7); + + memcpy(v, b, 20 * sizeof *b); + ZADD(v, 4, b, 0, b, 1); + ZADD(v, 5, b, 2, b, 3); + ZADD(v, 6, b, 0, b, 2); + ZADD(v, 7, b, 1, b, 3); + ZADD(v, 8, v, 6, v, 7); + + /* + * Do the eight first 8x8 muls. Source words are at most 16382 + * each, so we can add product results together "as is" in 32-bit + * words. + */ + for (i = 0; i < 40; i += 5) { + w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]); + w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1]) + + MUL15(u[i + 1], v[i + 0]); + w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2]) + + MUL15(u[i + 1], v[i + 1]) + + MUL15(u[i + 2], v[i + 0]); + w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3]) + + MUL15(u[i + 1], v[i + 2]) + + MUL15(u[i + 2], v[i + 1]) + + MUL15(u[i + 3], v[i + 0]); + w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4]) + + MUL15(u[i + 1], v[i + 3]) + + MUL15(u[i + 2], v[i + 2]) + + MUL15(u[i + 3], v[i + 1]) + + MUL15(u[i + 4], v[i + 0]); + w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4]) + + MUL15(u[i + 2], v[i + 3]) + + MUL15(u[i + 3], v[i + 2]) + + MUL15(u[i + 4], v[i + 1]); + w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4]) + + MUL15(u[i + 3], v[i + 3]) + + MUL15(u[i + 4], v[i + 2]); + w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4]) + + MUL15(u[i + 4], v[i + 3]); + w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]); + w[(i << 1) + 9] = 0; + } + + /* + * For the 9th multiplication, source words are up to 32764, + * so we must do some carry propagation. If we add up to + * 4 products and the carry is no more than 524224, then the + * result fits in 32 bits, and the next carry will be no more + * than 524224 (because 4*(32764^2)+524224 < 8192*524225). + * + * We thus just skip one of the products in the middle word, + * then do a carry propagation (this reduces words to 13 bits + * each, except possibly the last, which may use up to 17 bits + * or so), then add the missing product. + */ + w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]); + w[80 + 1] = MUL15(u[40 + 0], v[40 + 1]) + + MUL15(u[40 + 1], v[40 + 0]); + w[80 + 2] = MUL15(u[40 + 0], v[40 + 2]) + + MUL15(u[40 + 1], v[40 + 1]) + + MUL15(u[40 + 2], v[40 + 0]); + w[80 + 3] = MUL15(u[40 + 0], v[40 + 3]) + + MUL15(u[40 + 1], v[40 + 2]) + + MUL15(u[40 + 2], v[40 + 1]) + + MUL15(u[40 + 3], v[40 + 0]); + w[80 + 4] = MUL15(u[40 + 0], v[40 + 4]) + + MUL15(u[40 + 1], v[40 + 3]) + + MUL15(u[40 + 2], v[40 + 2]) + + MUL15(u[40 + 3], v[40 + 1]); + /* + MUL15(u[40 + 4], v[40 + 0]) */ + w[80 + 5] = MUL15(u[40 + 1], v[40 + 4]) + + MUL15(u[40 + 2], v[40 + 3]) + + MUL15(u[40 + 3], v[40 + 2]) + + MUL15(u[40 + 4], v[40 + 1]); + w[80 + 6] = MUL15(u[40 + 2], v[40 + 4]) + + MUL15(u[40 + 3], v[40 + 3]) + + MUL15(u[40 + 4], v[40 + 2]); + w[80 + 7] = MUL15(u[40 + 3], v[40 + 4]) + + MUL15(u[40 + 4], v[40 + 3]); + w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]); + + CPR(w, 80); + + w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]); + + /* + * The products on 14-bit words in slots 6 and 7 yield values + * up to 5*(16382^2) each, and we need to subtract two such + * values from the higher word. We need the subtraction to fit + * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit. + * However, 10*(16382^2) does not fit. So we must perform a + * bit of reduction here. + */ + CPR(w, 60); + CPR(w, 70); + + /* + * Recompose results. + */ + + /* 0..1*0..1 into 0..3 */ + ZSUB2F(w, 8, w, 0, w, 2); + ZSUB2F(w, 9, w, 1, w, 3); + ZADDT(w, 1, w, 8); + ZADDT(w, 2, w, 9); + + /* 2..3*2..3 into 4..7 */ + ZSUB2F(w, 10, w, 4, w, 6); + ZSUB2F(w, 11, w, 5, w, 7); + ZADDT(w, 5, w, 10); + ZADDT(w, 6, w, 11); + + /* (0..1+2..3)*(0..1+2..3) into 12..15 */ + ZSUB2F(w, 16, w, 12, w, 14); + ZSUB2F(w, 17, w, 13, w, 15); + ZADDT(w, 13, w, 16); + ZADDT(w, 14, w, 17); + + /* first-level recomposition */ + ZSUB2F(w, 12, w, 0, w, 4); + ZSUB2F(w, 13, w, 1, w, 5); + ZSUB2F(w, 14, w, 2, w, 6); + ZSUB2F(w, 15, w, 3, w, 7); + ZADDT(w, 2, w, 12); + ZADDT(w, 3, w, 13); + ZADDT(w, 4, w, 14); + ZADDT(w, 5, w, 15); + + /* + * Perform carry propagation to bring all words down to 13 bits. + */ + cc = norm13(d, w, 40); + d[39] += (cc << 13); + +#undef ZADD +#undef ZADDT +#undef ZSUB2F +#undef CPR1 +#undef CPR +} + +static inline void +square20(uint32_t *d, const uint32_t *a) +{ + mul20(d, a, a); +} + +#else + +static void +mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[39]; + + t[ 0] = MUL15(a[ 0], b[ 0]); + t[ 1] = MUL15(a[ 0], b[ 1]) + + MUL15(a[ 1], b[ 0]); + t[ 2] = MUL15(a[ 0], b[ 2]) + + MUL15(a[ 1], b[ 1]) + + MUL15(a[ 2], b[ 0]); + t[ 3] = MUL15(a[ 0], b[ 3]) + + MUL15(a[ 1], b[ 2]) + + MUL15(a[ 2], b[ 1]) + + MUL15(a[ 3], b[ 0]); + t[ 4] = MUL15(a[ 0], b[ 4]) + + MUL15(a[ 1], b[ 3]) + + MUL15(a[ 2], b[ 2]) + + MUL15(a[ 3], b[ 1]) + + MUL15(a[ 4], b[ 0]); + t[ 5] = MUL15(a[ 0], b[ 5]) + + MUL15(a[ 1], b[ 4]) + + MUL15(a[ 2], b[ 3]) + + MUL15(a[ 3], b[ 2]) + + MUL15(a[ 4], b[ 1]) + + MUL15(a[ 5], b[ 0]); + t[ 6] = MUL15(a[ 0], b[ 6]) + + MUL15(a[ 1], b[ 5]) + + MUL15(a[ 2], b[ 4]) + + MUL15(a[ 3], b[ 3]) + + MUL15(a[ 4], b[ 2]) + + MUL15(a[ 5], b[ 1]) + + MUL15(a[ 6], b[ 0]); + t[ 7] = MUL15(a[ 0], b[ 7]) + + MUL15(a[ 1], b[ 6]) + + MUL15(a[ 2], b[ 5]) + + MUL15(a[ 3], b[ 4]) + + MUL15(a[ 4], b[ 3]) + + MUL15(a[ 5], b[ 2]) + + MUL15(a[ 6], b[ 1]) + + MUL15(a[ 7], b[ 0]); + t[ 8] = MUL15(a[ 0], b[ 8]) + + MUL15(a[ 1], b[ 7]) + + MUL15(a[ 2], b[ 6]) + + MUL15(a[ 3], b[ 5]) + + MUL15(a[ 4], b[ 4]) + + MUL15(a[ 5], b[ 3]) + + MUL15(a[ 6], b[ 2]) + + MUL15(a[ 7], b[ 1]) + + MUL15(a[ 8], b[ 0]); + t[ 9] = MUL15(a[ 0], b[ 9]) + + MUL15(a[ 1], b[ 8]) + + MUL15(a[ 2], b[ 7]) + + MUL15(a[ 3], b[ 6]) + + MUL15(a[ 4], b[ 5]) + + MUL15(a[ 5], b[ 4]) + + MUL15(a[ 6], b[ 3]) + + MUL15(a[ 7], b[ 2]) + + MUL15(a[ 8], b[ 1]) + + MUL15(a[ 9], b[ 0]); + t[10] = MUL15(a[ 0], b[10]) + + MUL15(a[ 1], b[ 9]) + + MUL15(a[ 2], b[ 8]) + + MUL15(a[ 3], b[ 7]) + + MUL15(a[ 4], b[ 6]) + + MUL15(a[ 5], b[ 5]) + + MUL15(a[ 6], b[ 4]) + + MUL15(a[ 7], b[ 3]) + + MUL15(a[ 8], b[ 2]) + + MUL15(a[ 9], b[ 1]) + + MUL15(a[10], b[ 0]); + t[11] = MUL15(a[ 0], b[11]) + + MUL15(a[ 1], b[10]) + + MUL15(a[ 2], b[ 9]) + + MUL15(a[ 3], b[ 8]) + + MUL15(a[ 4], b[ 7]) + + MUL15(a[ 5], b[ 6]) + + MUL15(a[ 6], b[ 5]) + + MUL15(a[ 7], b[ 4]) + + MUL15(a[ 8], b[ 3]) + + MUL15(a[ 9], b[ 2]) + + MUL15(a[10], b[ 1]) + + MUL15(a[11], b[ 0]); + t[12] = MUL15(a[ 0], b[12]) + + MUL15(a[ 1], b[11]) + + MUL15(a[ 2], b[10]) + + MUL15(a[ 3], b[ 9]) + + MUL15(a[ 4], b[ 8]) + + MUL15(a[ 5], b[ 7]) + + MUL15(a[ 6], b[ 6]) + + MUL15(a[ 7], b[ 5]) + + MUL15(a[ 8], b[ 4]) + + MUL15(a[ 9], b[ 3]) + + MUL15(a[10], b[ 2]) + + MUL15(a[11], b[ 1]) + + MUL15(a[12], b[ 0]); + t[13] = MUL15(a[ 0], b[13]) + + MUL15(a[ 1], b[12]) + + MUL15(a[ 2], b[11]) + + MUL15(a[ 3], b[10]) + + MUL15(a[ 4], b[ 9]) + + MUL15(a[ 5], b[ 8]) + + MUL15(a[ 6], b[ 7]) + + MUL15(a[ 7], b[ 6]) + + MUL15(a[ 8], b[ 5]) + + MUL15(a[ 9], b[ 4]) + + MUL15(a[10], b[ 3]) + + MUL15(a[11], b[ 2]) + + MUL15(a[12], b[ 1]) + + MUL15(a[13], b[ 0]); + t[14] = MUL15(a[ 0], b[14]) + + MUL15(a[ 1], b[13]) + + MUL15(a[ 2], b[12]) + + MUL15(a[ 3], b[11]) + + MUL15(a[ 4], b[10]) + + MUL15(a[ 5], b[ 9]) + + MUL15(a[ 6], b[ 8]) + + MUL15(a[ 7], b[ 7]) + + MUL15(a[ 8], b[ 6]) + + MUL15(a[ 9], b[ 5]) + + MUL15(a[10], b[ 4]) + + MUL15(a[11], b[ 3]) + + MUL15(a[12], b[ 2]) + + MUL15(a[13], b[ 1]) + + MUL15(a[14], b[ 0]); + t[15] = MUL15(a[ 0], b[15]) + + MUL15(a[ 1], b[14]) + + MUL15(a[ 2], b[13]) + + MUL15(a[ 3], b[12]) + + MUL15(a[ 4], b[11]) + + MUL15(a[ 5], b[10]) + + MUL15(a[ 6], b[ 9]) + + MUL15(a[ 7], b[ 8]) + + MUL15(a[ 8], b[ 7]) + + MUL15(a[ 9], b[ 6]) + + MUL15(a[10], b[ 5]) + + MUL15(a[11], b[ 4]) + + MUL15(a[12], b[ 3]) + + MUL15(a[13], b[ 2]) + + MUL15(a[14], b[ 1]) + + MUL15(a[15], b[ 0]); + t[16] = MUL15(a[ 0], b[16]) + + MUL15(a[ 1], b[15]) + + MUL15(a[ 2], b[14]) + + MUL15(a[ 3], b[13]) + + MUL15(a[ 4], b[12]) + + MUL15(a[ 5], b[11]) + + MUL15(a[ 6], b[10]) + + MUL15(a[ 7], b[ 9]) + + MUL15(a[ 8], b[ 8]) + + MUL15(a[ 9], b[ 7]) + + MUL15(a[10], b[ 6]) + + MUL15(a[11], b[ 5]) + + MUL15(a[12], b[ 4]) + + MUL15(a[13], b[ 3]) + + MUL15(a[14], b[ 2]) + + MUL15(a[15], b[ 1]) + + MUL15(a[16], b[ 0]); + t[17] = MUL15(a[ 0], b[17]) + + MUL15(a[ 1], b[16]) + + MUL15(a[ 2], b[15]) + + MUL15(a[ 3], b[14]) + + MUL15(a[ 4], b[13]) + + MUL15(a[ 5], b[12]) + + MUL15(a[ 6], b[11]) + + MUL15(a[ 7], b[10]) + + MUL15(a[ 8], b[ 9]) + + MUL15(a[ 9], b[ 8]) + + MUL15(a[10], b[ 7]) + + MUL15(a[11], b[ 6]) + + MUL15(a[12], b[ 5]) + + MUL15(a[13], b[ 4]) + + MUL15(a[14], b[ 3]) + + MUL15(a[15], b[ 2]) + + MUL15(a[16], b[ 1]) + + MUL15(a[17], b[ 0]); + t[18] = MUL15(a[ 0], b[18]) + + MUL15(a[ 1], b[17]) + + MUL15(a[ 2], b[16]) + + MUL15(a[ 3], b[15]) + + MUL15(a[ 4], b[14]) + + MUL15(a[ 5], b[13]) + + MUL15(a[ 6], b[12]) + + MUL15(a[ 7], b[11]) + + MUL15(a[ 8], b[10]) + + MUL15(a[ 9], b[ 9]) + + MUL15(a[10], b[ 8]) + + MUL15(a[11], b[ 7]) + + MUL15(a[12], b[ 6]) + + MUL15(a[13], b[ 5]) + + MUL15(a[14], b[ 4]) + + MUL15(a[15], b[ 3]) + + MUL15(a[16], b[ 2]) + + MUL15(a[17], b[ 1]) + + MUL15(a[18], b[ 0]); + t[19] = MUL15(a[ 0], b[19]) + + MUL15(a[ 1], b[18]) + + MUL15(a[ 2], b[17]) + + MUL15(a[ 3], b[16]) + + MUL15(a[ 4], b[15]) + + MUL15(a[ 5], b[14]) + + MUL15(a[ 6], b[13]) + + MUL15(a[ 7], b[12]) + + MUL15(a[ 8], b[11]) + + MUL15(a[ 9], b[10]) + + MUL15(a[10], b[ 9]) + + MUL15(a[11], b[ 8]) + + MUL15(a[12], b[ 7]) + + MUL15(a[13], b[ 6]) + + MUL15(a[14], b[ 5]) + + MUL15(a[15], b[ 4]) + + MUL15(a[16], b[ 3]) + + MUL15(a[17], b[ 2]) + + MUL15(a[18], b[ 1]) + + MUL15(a[19], b[ 0]); + t[20] = MUL15(a[ 1], b[19]) + + MUL15(a[ 2], b[18]) + + MUL15(a[ 3], b[17]) + + MUL15(a[ 4], b[16]) + + MUL15(a[ 5], b[15]) + + MUL15(a[ 6], b[14]) + + MUL15(a[ 7], b[13]) + + MUL15(a[ 8], b[12]) + + MUL15(a[ 9], b[11]) + + MUL15(a[10], b[10]) + + MUL15(a[11], b[ 9]) + + MUL15(a[12], b[ 8]) + + MUL15(a[13], b[ 7]) + + MUL15(a[14], b[ 6]) + + MUL15(a[15], b[ 5]) + + MUL15(a[16], b[ 4]) + + MUL15(a[17], b[ 3]) + + MUL15(a[18], b[ 2]) + + MUL15(a[19], b[ 1]); + t[21] = MUL15(a[ 2], b[19]) + + MUL15(a[ 3], b[18]) + + MUL15(a[ 4], b[17]) + + MUL15(a[ 5], b[16]) + + MUL15(a[ 6], b[15]) + + MUL15(a[ 7], b[14]) + + MUL15(a[ 8], b[13]) + + MUL15(a[ 9], b[12]) + + MUL15(a[10], b[11]) + + MUL15(a[11], b[10]) + + MUL15(a[12], b[ 9]) + + MUL15(a[13], b[ 8]) + + MUL15(a[14], b[ 7]) + + MUL15(a[15], b[ 6]) + + MUL15(a[16], b[ 5]) + + MUL15(a[17], b[ 4]) + + MUL15(a[18], b[ 3]) + + MUL15(a[19], b[ 2]); + t[22] = MUL15(a[ 3], b[19]) + + MUL15(a[ 4], b[18]) + + MUL15(a[ 5], b[17]) + + MUL15(a[ 6], b[16]) + + MUL15(a[ 7], b[15]) + + MUL15(a[ 8], b[14]) + + MUL15(a[ 9], b[13]) + + MUL15(a[10], b[12]) + + MUL15(a[11], b[11]) + + MUL15(a[12], b[10]) + + MUL15(a[13], b[ 9]) + + MUL15(a[14], b[ 8]) + + MUL15(a[15], b[ 7]) + + MUL15(a[16], b[ 6]) + + MUL15(a[17], b[ 5]) + + MUL15(a[18], b[ 4]) + + MUL15(a[19], b[ 3]); + t[23] = MUL15(a[ 4], b[19]) + + MUL15(a[ 5], b[18]) + + MUL15(a[ 6], b[17]) + + MUL15(a[ 7], b[16]) + + MUL15(a[ 8], b[15]) + + MUL15(a[ 9], b[14]) + + MUL15(a[10], b[13]) + + MUL15(a[11], b[12]) + + MUL15(a[12], b[11]) + + MUL15(a[13], b[10]) + + MUL15(a[14], b[ 9]) + + MUL15(a[15], b[ 8]) + + MUL15(a[16], b[ 7]) + + MUL15(a[17], b[ 6]) + + MUL15(a[18], b[ 5]) + + MUL15(a[19], b[ 4]); + t[24] = MUL15(a[ 5], b[19]) + + MUL15(a[ 6], b[18]) + + MUL15(a[ 7], b[17]) + + MUL15(a[ 8], b[16]) + + MUL15(a[ 9], b[15]) + + MUL15(a[10], b[14]) + + MUL15(a[11], b[13]) + + MUL15(a[12], b[12]) + + MUL15(a[13], b[11]) + + MUL15(a[14], b[10]) + + MUL15(a[15], b[ 9]) + + MUL15(a[16], b[ 8]) + + MUL15(a[17], b[ 7]) + + MUL15(a[18], b[ 6]) + + MUL15(a[19], b[ 5]); + t[25] = MUL15(a[ 6], b[19]) + + MUL15(a[ 7], b[18]) + + MUL15(a[ 8], b[17]) + + MUL15(a[ 9], b[16]) + + MUL15(a[10], b[15]) + + MUL15(a[11], b[14]) + + MUL15(a[12], b[13]) + + MUL15(a[13], b[12]) + + MUL15(a[14], b[11]) + + MUL15(a[15], b[10]) + + MUL15(a[16], b[ 9]) + + MUL15(a[17], b[ 8]) + + MUL15(a[18], b[ 7]) + + MUL15(a[19], b[ 6]); + t[26] = MUL15(a[ 7], b[19]) + + MUL15(a[ 8], b[18]) + + MUL15(a[ 9], b[17]) + + MUL15(a[10], b[16]) + + MUL15(a[11], b[15]) + + MUL15(a[12], b[14]) + + MUL15(a[13], b[13]) + + MUL15(a[14], b[12]) + + MUL15(a[15], b[11]) + + MUL15(a[16], b[10]) + + MUL15(a[17], b[ 9]) + + MUL15(a[18], b[ 8]) + + MUL15(a[19], b[ 7]); + t[27] = MUL15(a[ 8], b[19]) + + MUL15(a[ 9], b[18]) + + MUL15(a[10], b[17]) + + MUL15(a[11], b[16]) + + MUL15(a[12], b[15]) + + MUL15(a[13], b[14]) + + MUL15(a[14], b[13]) + + MUL15(a[15], b[12]) + + MUL15(a[16], b[11]) + + MUL15(a[17], b[10]) + + MUL15(a[18], b[ 9]) + + MUL15(a[19], b[ 8]); + t[28] = MUL15(a[ 9], b[19]) + + MUL15(a[10], b[18]) + + MUL15(a[11], b[17]) + + MUL15(a[12], b[16]) + + MUL15(a[13], b[15]) + + MUL15(a[14], b[14]) + + MUL15(a[15], b[13]) + + MUL15(a[16], b[12]) + + MUL15(a[17], b[11]) + + MUL15(a[18], b[10]) + + MUL15(a[19], b[ 9]); + t[29] = MUL15(a[10], b[19]) + + MUL15(a[11], b[18]) + + MUL15(a[12], b[17]) + + MUL15(a[13], b[16]) + + MUL15(a[14], b[15]) + + MUL15(a[15], b[14]) + + MUL15(a[16], b[13]) + + MUL15(a[17], b[12]) + + MUL15(a[18], b[11]) + + MUL15(a[19], b[10]); + t[30] = MUL15(a[11], b[19]) + + MUL15(a[12], b[18]) + + MUL15(a[13], b[17]) + + MUL15(a[14], b[16]) + + MUL15(a[15], b[15]) + + MUL15(a[16], b[14]) + + MUL15(a[17], b[13]) + + MUL15(a[18], b[12]) + + MUL15(a[19], b[11]); + t[31] = MUL15(a[12], b[19]) + + MUL15(a[13], b[18]) + + MUL15(a[14], b[17]) + + MUL15(a[15], b[16]) + + MUL15(a[16], b[15]) + + MUL15(a[17], b[14]) + + MUL15(a[18], b[13]) + + MUL15(a[19], b[12]); + t[32] = MUL15(a[13], b[19]) + + MUL15(a[14], b[18]) + + MUL15(a[15], b[17]) + + MUL15(a[16], b[16]) + + MUL15(a[17], b[15]) + + MUL15(a[18], b[14]) + + MUL15(a[19], b[13]); + t[33] = MUL15(a[14], b[19]) + + MUL15(a[15], b[18]) + + MUL15(a[16], b[17]) + + MUL15(a[17], b[16]) + + MUL15(a[18], b[15]) + + MUL15(a[19], b[14]); + t[34] = MUL15(a[15], b[19]) + + MUL15(a[16], b[18]) + + MUL15(a[17], b[17]) + + MUL15(a[18], b[16]) + + MUL15(a[19], b[15]); + t[35] = MUL15(a[16], b[19]) + + MUL15(a[17], b[18]) + + MUL15(a[18], b[17]) + + MUL15(a[19], b[16]); + t[36] = MUL15(a[17], b[19]) + + MUL15(a[18], b[18]) + + MUL15(a[19], b[17]); + t[37] = MUL15(a[18], b[19]) + + MUL15(a[19], b[18]); + t[38] = MUL15(a[19], b[19]); + + d[39] = norm13(d, t, 39); +} + +static void +square20(uint32_t *d, const uint32_t *a) +{ + uint32_t t[39]; + + t[ 0] = MUL15(a[ 0], a[ 0]); + t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1); + t[ 2] = MUL15(a[ 1], a[ 1]) + + ((MUL15(a[ 0], a[ 2])) << 1); + t[ 3] = ((MUL15(a[ 0], a[ 3]) + + MUL15(a[ 1], a[ 2])) << 1); + t[ 4] = MUL15(a[ 2], a[ 2]) + + ((MUL15(a[ 0], a[ 4]) + + MUL15(a[ 1], a[ 3])) << 1); + t[ 5] = ((MUL15(a[ 0], a[ 5]) + + MUL15(a[ 1], a[ 4]) + + MUL15(a[ 2], a[ 3])) << 1); + t[ 6] = MUL15(a[ 3], a[ 3]) + + ((MUL15(a[ 0], a[ 6]) + + MUL15(a[ 1], a[ 5]) + + MUL15(a[ 2], a[ 4])) << 1); + t[ 7] = ((MUL15(a[ 0], a[ 7]) + + MUL15(a[ 1], a[ 6]) + + MUL15(a[ 2], a[ 5]) + + MUL15(a[ 3], a[ 4])) << 1); + t[ 8] = MUL15(a[ 4], a[ 4]) + + ((MUL15(a[ 0], a[ 8]) + + MUL15(a[ 1], a[ 7]) + + MUL15(a[ 2], a[ 6]) + + MUL15(a[ 3], a[ 5])) << 1); + t[ 9] = ((MUL15(a[ 0], a[ 9]) + + MUL15(a[ 1], a[ 8]) + + MUL15(a[ 2], a[ 7]) + + MUL15(a[ 3], a[ 6]) + + MUL15(a[ 4], a[ 5])) << 1); + t[10] = MUL15(a[ 5], a[ 5]) + + ((MUL15(a[ 0], a[10]) + + MUL15(a[ 1], a[ 9]) + + MUL15(a[ 2], a[ 8]) + + MUL15(a[ 3], a[ 7]) + + MUL15(a[ 4], a[ 6])) << 1); + t[11] = ((MUL15(a[ 0], a[11]) + + MUL15(a[ 1], a[10]) + + MUL15(a[ 2], a[ 9]) + + MUL15(a[ 3], a[ 8]) + + MUL15(a[ 4], a[ 7]) + + MUL15(a[ 5], a[ 6])) << 1); + t[12] = MUL15(a[ 6], a[ 6]) + + ((MUL15(a[ 0], a[12]) + + MUL15(a[ 1], a[11]) + + MUL15(a[ 2], a[10]) + + MUL15(a[ 3], a[ 9]) + + MUL15(a[ 4], a[ 8]) + + MUL15(a[ 5], a[ 7])) << 1); + t[13] = ((MUL15(a[ 0], a[13]) + + MUL15(a[ 1], a[12]) + + MUL15(a[ 2], a[11]) + + MUL15(a[ 3], a[10]) + + MUL15(a[ 4], a[ 9]) + + MUL15(a[ 5], a[ 8]) + + MUL15(a[ 6], a[ 7])) << 1); + t[14] = MUL15(a[ 7], a[ 7]) + + ((MUL15(a[ 0], a[14]) + + MUL15(a[ 1], a[13]) + + MUL15(a[ 2], a[12]) + + MUL15(a[ 3], a[11]) + + MUL15(a[ 4], a[10]) + + MUL15(a[ 5], a[ 9]) + + MUL15(a[ 6], a[ 8])) << 1); + t[15] = ((MUL15(a[ 0], a[15]) + + MUL15(a[ 1], a[14]) + + MUL15(a[ 2], a[13]) + + MUL15(a[ 3], a[12]) + + MUL15(a[ 4], a[11]) + + MUL15(a[ 5], a[10]) + + MUL15(a[ 6], a[ 9]) + + MUL15(a[ 7], a[ 8])) << 1); + t[16] = MUL15(a[ 8], a[ 8]) + + ((MUL15(a[ 0], a[16]) + + MUL15(a[ 1], a[15]) + + MUL15(a[ 2], a[14]) + + MUL15(a[ 3], a[13]) + + MUL15(a[ 4], a[12]) + + MUL15(a[ 5], a[11]) + + MUL15(a[ 6], a[10]) + + MUL15(a[ 7], a[ 9])) << 1); + t[17] = ((MUL15(a[ 0], a[17]) + + MUL15(a[ 1], a[16]) + + MUL15(a[ 2], a[15]) + + MUL15(a[ 3], a[14]) + + MUL15(a[ 4], a[13]) + + MUL15(a[ 5], a[12]) + + MUL15(a[ 6], a[11]) + + MUL15(a[ 7], a[10]) + + MUL15(a[ 8], a[ 9])) << 1); + t[18] = MUL15(a[ 9], a[ 9]) + + ((MUL15(a[ 0], a[18]) + + MUL15(a[ 1], a[17]) + + MUL15(a[ 2], a[16]) + + MUL15(a[ 3], a[15]) + + MUL15(a[ 4], a[14]) + + MUL15(a[ 5], a[13]) + + MUL15(a[ 6], a[12]) + + MUL15(a[ 7], a[11]) + + MUL15(a[ 8], a[10])) << 1); + t[19] = ((MUL15(a[ 0], a[19]) + + MUL15(a[ 1], a[18]) + + MUL15(a[ 2], a[17]) + + MUL15(a[ 3], a[16]) + + MUL15(a[ 4], a[15]) + + MUL15(a[ 5], a[14]) + + MUL15(a[ 6], a[13]) + + MUL15(a[ 7], a[12]) + + MUL15(a[ 8], a[11]) + + MUL15(a[ 9], a[10])) << 1); + t[20] = MUL15(a[10], a[10]) + + ((MUL15(a[ 1], a[19]) + + MUL15(a[ 2], a[18]) + + MUL15(a[ 3], a[17]) + + MUL15(a[ 4], a[16]) + + MUL15(a[ 5], a[15]) + + MUL15(a[ 6], a[14]) + + MUL15(a[ 7], a[13]) + + MUL15(a[ 8], a[12]) + + MUL15(a[ 9], a[11])) << 1); + t[21] = ((MUL15(a[ 2], a[19]) + + MUL15(a[ 3], a[18]) + + MUL15(a[ 4], a[17]) + + MUL15(a[ 5], a[16]) + + MUL15(a[ 6], a[15]) + + MUL15(a[ 7], a[14]) + + MUL15(a[ 8], a[13]) + + MUL15(a[ 9], a[12]) + + MUL15(a[10], a[11])) << 1); + t[22] = MUL15(a[11], a[11]) + + ((MUL15(a[ 3], a[19]) + + MUL15(a[ 4], a[18]) + + MUL15(a[ 5], a[17]) + + MUL15(a[ 6], a[16]) + + MUL15(a[ 7], a[15]) + + MUL15(a[ 8], a[14]) + + MUL15(a[ 9], a[13]) + + MUL15(a[10], a[12])) << 1); + t[23] = ((MUL15(a[ 4], a[19]) + + MUL15(a[ 5], a[18]) + + MUL15(a[ 6], a[17]) + + MUL15(a[ 7], a[16]) + + MUL15(a[ 8], a[15]) + + MUL15(a[ 9], a[14]) + + MUL15(a[10], a[13]) + + MUL15(a[11], a[12])) << 1); + t[24] = MUL15(a[12], a[12]) + + ((MUL15(a[ 5], a[19]) + + MUL15(a[ 6], a[18]) + + MUL15(a[ 7], a[17]) + + MUL15(a[ 8], a[16]) + + MUL15(a[ 9], a[15]) + + MUL15(a[10], a[14]) + + MUL15(a[11], a[13])) << 1); + t[25] = ((MUL15(a[ 6], a[19]) + + MUL15(a[ 7], a[18]) + + MUL15(a[ 8], a[17]) + + MUL15(a[ 9], a[16]) + + MUL15(a[10], a[15]) + + MUL15(a[11], a[14]) + + MUL15(a[12], a[13])) << 1); + t[26] = MUL15(a[13], a[13]) + + ((MUL15(a[ 7], a[19]) + + MUL15(a[ 8], a[18]) + + MUL15(a[ 9], a[17]) + + MUL15(a[10], a[16]) + + MUL15(a[11], a[15]) + + MUL15(a[12], a[14])) << 1); + t[27] = ((MUL15(a[ 8], a[19]) + + MUL15(a[ 9], a[18]) + + MUL15(a[10], a[17]) + + MUL15(a[11], a[16]) + + MUL15(a[12], a[15]) + + MUL15(a[13], a[14])) << 1); + t[28] = MUL15(a[14], a[14]) + + ((MUL15(a[ 9], a[19]) + + MUL15(a[10], a[18]) + + MUL15(a[11], a[17]) + + MUL15(a[12], a[16]) + + MUL15(a[13], a[15])) << 1); + t[29] = ((MUL15(a[10], a[19]) + + MUL15(a[11], a[18]) + + MUL15(a[12], a[17]) + + MUL15(a[13], a[16]) + + MUL15(a[14], a[15])) << 1); + t[30] = MUL15(a[15], a[15]) + + ((MUL15(a[11], a[19]) + + MUL15(a[12], a[18]) + + MUL15(a[13], a[17]) + + MUL15(a[14], a[16])) << 1); + t[31] = ((MUL15(a[12], a[19]) + + MUL15(a[13], a[18]) + + MUL15(a[14], a[17]) + + MUL15(a[15], a[16])) << 1); + t[32] = MUL15(a[16], a[16]) + + ((MUL15(a[13], a[19]) + + MUL15(a[14], a[18]) + + MUL15(a[15], a[17])) << 1); + t[33] = ((MUL15(a[14], a[19]) + + MUL15(a[15], a[18]) + + MUL15(a[16], a[17])) << 1); + t[34] = MUL15(a[17], a[17]) + + ((MUL15(a[15], a[19]) + + MUL15(a[16], a[18])) << 1); + t[35] = ((MUL15(a[16], a[19]) + + MUL15(a[17], a[18])) << 1); + t[36] = MUL15(a[18], a[18]) + + ((MUL15(a[17], a[19])) << 1); + t[37] = ((MUL15(a[18], a[19])) << 1); + t[38] = MUL15(a[19], a[19]); + + d[39] = norm13(d, t, 39); +} + +#endif + +/* + * Perform a "final reduction" in field F255 (field for Curve25519) + * The source value must be less than twice the modulus. If the value + * is not lower than the modulus, then the modulus is subtracted and + * this function returns 1; otherwise, it leaves it untouched and it + * returns 0. + */ +static uint32_t +reduce_final_f255(uint32_t *d) +{ + uint32_t t[20]; + uint32_t cc; + int i; + + memcpy(t, d, sizeof t); + cc = 19; + for (i = 0; i < 20; i ++) { + uint32_t w; + + w = t[i] + cc; + cc = w >> 13; + t[i] = w & 0x1FFF; + } + cc = t[19] >> 8; + t[19] &= 0xFF; + CCOPY(cc, d, t, sizeof t); + return cc; +} + +static void +f255_mulgen(uint32_t *d, const uint32_t *a, const uint32_t *b, int square) +{ + uint32_t t[40], cc, w; + + /* + * Compute raw multiplication. All result words fit in 13 bits + * each; upper word (t[39]) must fit on 5 bits, since the product + * of two 256-bit integers must fit on 512 bits. + */ + if (square) { + square20(t, a); + } else { + mul20(t, a, b); + } + + /* + * Modular reduction: each high word is added where necessary. + * Since the modulus is 2^255-19 and word 20 corresponds to + * offset 20*13 = 260, word 20+k must be added to word k with + * a factor of 19*2^5 = 608. The extra bits in word 19 are also + * added that way. + */ + cc = MUL15(t[19] >> 8, 19); + t[19] &= 0xFF; + +#define MM1(x) do { \ + w = t[x] + cc + MUL15(t[(x) + 20], 608); \ + t[x] = w & 0x1FFF; \ + cc = w >> 13; \ + } while (0) + + MM1( 0); + MM1( 1); + MM1( 2); + MM1( 3); + MM1( 4); + MM1( 5); + MM1( 6); + MM1( 7); + MM1( 8); + MM1( 9); + MM1(10); + MM1(11); + MM1(12); + MM1(13); + MM1(14); + MM1(15); + MM1(16); + MM1(17); + MM1(18); + MM1(19); + +#undef MM1 + + cc = MUL15(w >> 8, 19); + t[19] &= 0xFF; + +#define MM2(x) do { \ + w = t[x] + cc; \ + d[x] = w & 0x1FFF; \ + cc = w >> 13; \ + } while (0) + + MM2( 0); + MM2( 1); + MM2( 2); + MM2( 3); + MM2( 4); + MM2( 5); + MM2( 6); + MM2( 7); + MM2( 8); + MM2( 9); + MM2(10); + MM2(11); + MM2(12); + MM2(13); + MM2(14); + MM2(15); + MM2(16); + MM2(17); + MM2(18); + MM2(19); + +#undef MM2 +} + +/* + * Perform a multiplication of two integers modulo 2^255-19. + * Operands are arrays of 20 words, each containing 13 bits of data, in + * little-endian order. Input value may be up to 2^256-1; on output, value + * fits on 256 bits and is lower than twice the modulus. + * + * f255_mul() is the general multiplication, f255_square() is specialised + * for squarings. + */ +#define f255_mul(d, a, b) f255_mulgen(d, a, b, 0) +#define f255_square(d, a) f255_mulgen(d, a, a, 1) + +/* + * Add two values in F255. Partial reduction is performed (down to less + * than twice the modulus). + */ +static void +f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + int i; + uint32_t cc, w; + + cc = 0; + for (i = 0; i < 20; i ++) { + w = a[i] + b[i] + cc; + d[i] = w & 0x1FFF; + cc = w >> 13; + } + cc = MUL15(w >> 8, 19); + d[19] &= 0xFF; + for (i = 0; i < 20; i ++) { + w = d[i] + cc; + d[i] = w & 0x1FFF; + cc = w >> 13; + } +} + +/* + * Subtract one value from another in F255. Partial reduction is + * performed (down to less than twice the modulus). + */ +static void +f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * We actually compute a - b + 2*p, so that the final value is + * necessarily positive. + */ + int i; + uint32_t cc, w; + + cc = (uint32_t)-38; + for (i = 0; i < 20; i ++) { + w = a[i] - b[i] + cc; + d[i] = w & 0x1FFF; + cc = ARSH(w, 13); + } + cc = MUL15((w + 0x200) >> 8, 19); + d[19] &= 0xFF; + for (i = 0; i < 20; i ++) { + w = d[i] + cc; + d[i] = w & 0x1FFF; + cc = w >> 13; + } +} + +/* + * Multiply an integer by the 'A24' constant (121665). Partial reduction + * is performed (down to less than twice the modulus). + */ +static void +f255_mul_a24(uint32_t *d, const uint32_t *a) +{ + int i; + uint32_t cc, w; + + cc = 0; + for (i = 0; i < 20; i ++) { + w = MUL15(a[i], 121665) + cc; + d[i] = w & 0x1FFF; + cc = w >> 13; + } + cc = MUL15(w >> 8, 19); + d[19] &= 0xFF; + for (i = 0; i < 20; i ++) { + w = d[i] + cc; + d[i] = w & 0x1FFF; + cc = w >> 13; + } +} + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return GEN; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return ORDER; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + +static void +cswap(uint32_t *a, uint32_t *b, uint32_t ctl) +{ + int i; + + ctl = -ctl; + for (i = 0; i < 20; i ++) { + uint32_t aw, bw, tw; + + aw = a[i]; + bw = b[i]; + tw = ctl & (aw ^ bw); + a[i] = aw ^ tw; + b[i] = bw ^ tw; + } +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + uint32_t x1[20], x2[20], x3[20], z2[20], z3[20]; + uint32_t a[20], aa[20], b[20], bb[20]; + uint32_t c[20], d[20], e[20], da[20], cb[20]; + unsigned char k[32]; + uint32_t swap; + int i; + + (void)curve; + + /* + * Points are encoded over exactly 32 bytes. Multipliers must fit + * in 32 bytes as well. + * RFC 7748 mandates that the high bit of the last point byte must + * be ignored/cleared. + */ + if (Glen != 32 || kblen > 32) { + return 0; + } + G[31] &= 0x7F; + + /* + * Initialise variables x1, x2, z2, x3 and z3. We set all of them + * into Montgomery representation. + */ + x1[19] = le8_to_le13(x1, G, 32); + memcpy(x3, x1, sizeof x1); + memset(z2, 0, sizeof z2); + memset(x2, 0, sizeof x2); + x2[0] = 1; + memset(z3, 0, sizeof z3); + z3[0] = 1; + + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + /* obsolete + print_int("x1", x1); + */ + + swap = 0; + for (i = 254; i >= 0; i --) { + uint32_t kt; + + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; + swap ^= kt; + cswap(x2, x3, swap); + cswap(z2, z3, swap); + swap = kt; + + /* obsolete + print_int("x2", x2); + print_int("z2", z2); + print_int("x3", x3); + print_int("z3", z3); + */ + + f255_add(a, x2, z2); + f255_square(aa, a); + f255_sub(b, x2, z2); + f255_square(bb, b); + f255_sub(e, aa, bb); + f255_add(c, x3, z3); + f255_sub(d, x3, z3); + f255_mul(da, d, a); + f255_mul(cb, c, b); + + /* obsolete + print_int("a ", a); + print_int("aa", aa); + print_int("b ", b); + print_int("bb", bb); + print_int("e ", e); + print_int("c ", c); + print_int("d ", d); + print_int("da", da); + print_int("cb", cb); + */ + + f255_add(x3, da, cb); + f255_square(x3, x3); + f255_sub(z3, da, cb); + f255_square(z3, z3); + f255_mul(z3, z3, x1); + f255_mul(x2, aa, bb); + f255_mul_a24(z2, e); + f255_add(z2, z2, aa); + f255_mul(z2, e, z2); + + /* obsolete + print_int("x2", x2); + print_int("z2", z2); + print_int("x3", x3); + print_int("z3", z3); + */ + } + cswap(x2, x3, swap); + cswap(z2, z3, swap); + + /* + * Inverse z2 with a modular exponentiation. This is a simple + * square-and-multiply algorithm; we mutualise most non-squarings + * since the exponent contains almost only ones. + */ + memcpy(a, z2, sizeof z2); + for (i = 0; i < 15; i ++) { + f255_square(a, a); + f255_mul(a, a, z2); + } + memcpy(b, a, sizeof a); + for (i = 0; i < 14; i ++) { + int j; + + for (j = 0; j < 16; j ++) { + f255_square(b, b); + } + f255_mul(b, b, a); + } + for (i = 14; i >= 0; i --) { + f255_square(b, b); + if ((0xFFEB >> i) & 1) { + f255_mul(b, z2, b); + } + } + f255_mul(x2, x2, b); + reduce_final_f255(x2); + le13_to_le8(G, 32, x2); + return 1; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We don't implement this method, since it is used for ECDSA + * only, and there is no ECDSA over Curve25519 (which instead + * uses EdDSA). + */ + (void)A; + (void)B; + (void)len; + (void)x; + (void)xlen; + (void)y; + (void)ylen; + (void)curve; + return 0; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_c25519_m15 = { + (uint32_t)0x20000000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_c25519_m31.c b/third_party/bearssl/src/ec_c25519_m31.c new file mode 100644 index 0000000..1dd6d51 --- /dev/null +++ b/third_party/bearssl/src/ec_c25519_m31.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* obsolete +#include <stdio.h> +#include <stdlib.h> +static void +print_int(const char *name, const uint32_t *x) +{ + size_t u; + unsigned char tmp[40]; + + printf("%s = ", name); + for (u = 0; u < 9; u ++) { + if (x[u] > 0x3FFFFFFF) { + printf("INVALID:"); + for (u = 0; u < 9; u ++) { + printf(" %08X", x[u]); + } + printf("\n"); + return; + } + } + memset(tmp, 0, sizeof tmp); + for (u = 0; u < 9; u ++) { + uint64_t w; + int j, k; + + w = x[u]; + j = 30 * (int)u; + k = j & 7; + if (k != 0) { + w <<= k; + j -= k; + } + k = j >> 3; + for (j = 0; j < 8; j ++) { + tmp[39 - k - j] |= (unsigned char)w; + w >>= 8; + } + } + for (u = 8; u < 40; u ++) { + printf("%02X", tmp[u]); + } + printf("\n"); +} +*/ + +/* + * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_ + * that right-shifting a signed negative integer copies the sign bit + * (arithmetic right-shift). This is "implementation-defined behaviour", + * i.e. it is not undefined, but it may differ between compilers. Each + * compiler is supposed to document its behaviour in that respect. GCC + * explicitly defines that an arithmetic right shift is used. We expect + * all other compilers to do the same, because underlying CPU offer an + * arithmetic right shift opcode that could not be used otherwise. + */ +#if BR_NO_ARITH_SHIFT +#define ARSH(x, n) (((uint32_t)(x) >> (n)) \ + | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) +#else +#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) +#endif + +/* + * Convert an integer from unsigned little-endian encoding to a sequence of + * 30-bit words in little-endian order. The final "partial" word is + * returned. + */ +static uint32_t +le8_to_le30(uint32_t *dst, const unsigned char *src, size_t len) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + uint32_t b; + + b = *src ++; + if (acc_len < 22) { + acc |= b << acc_len; + acc_len += 8; + } else { + *dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF; + acc = b >> (30 - acc_len); + acc_len -= 22; + } + } + return acc; +} + +/* + * Convert an integer (30-bit words, little-endian) to unsigned + * little-endian encoding. The total encoding length is provided; all + * the destination bytes will be filled. + */ +static void +le30_to_le8(unsigned char *dst, size_t len, const uint32_t *src) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + if (acc_len < 8) { + uint32_t w; + + w = *src ++; + *dst ++ = (unsigned char)(acc | (w << acc_len)); + acc = w >> (8 - acc_len); + acc_len += 22; + } else { + *dst ++ = (unsigned char)acc; + acc >>= 8; + acc_len -= 8; + } + } +} + +/* + * Multiply two integers. Source integers are represented as arrays of + * nine 30-bit words, for values up to 2^270-1. Result is encoded over + * 18 words of 30 bits each. + */ +static void +mul9(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * Maximum intermediate result is no more than + * 10376293531797946367, which fits in 64 bits. Reason: + * + * 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406 + * 10376293531797946367 < 9663676407 * 2^30 + * + * Thus, adding together 9 products of 30-bit integers, with + * a carry of at most 9663676406, yields an integer that fits + * on 64 bits and generates a carry of at most 9663676406. + */ + uint64_t t[17]; + uint64_t cc; + int i; + + t[ 0] = MUL31(a[0], b[0]); + t[ 1] = MUL31(a[0], b[1]) + + MUL31(a[1], b[0]); + t[ 2] = MUL31(a[0], b[2]) + + MUL31(a[1], b[1]) + + MUL31(a[2], b[0]); + t[ 3] = MUL31(a[0], b[3]) + + MUL31(a[1], b[2]) + + MUL31(a[2], b[1]) + + MUL31(a[3], b[0]); + t[ 4] = MUL31(a[0], b[4]) + + MUL31(a[1], b[3]) + + MUL31(a[2], b[2]) + + MUL31(a[3], b[1]) + + MUL31(a[4], b[0]); + t[ 5] = MUL31(a[0], b[5]) + + MUL31(a[1], b[4]) + + MUL31(a[2], b[3]) + + MUL31(a[3], b[2]) + + MUL31(a[4], b[1]) + + MUL31(a[5], b[0]); + t[ 6] = MUL31(a[0], b[6]) + + MUL31(a[1], b[5]) + + MUL31(a[2], b[4]) + + MUL31(a[3], b[3]) + + MUL31(a[4], b[2]) + + MUL31(a[5], b[1]) + + MUL31(a[6], b[0]); + t[ 7] = MUL31(a[0], b[7]) + + MUL31(a[1], b[6]) + + MUL31(a[2], b[5]) + + MUL31(a[3], b[4]) + + MUL31(a[4], b[3]) + + MUL31(a[5], b[2]) + + MUL31(a[6], b[1]) + + MUL31(a[7], b[0]); + t[ 8] = MUL31(a[0], b[8]) + + MUL31(a[1], b[7]) + + MUL31(a[2], b[6]) + + MUL31(a[3], b[5]) + + MUL31(a[4], b[4]) + + MUL31(a[5], b[3]) + + MUL31(a[6], b[2]) + + MUL31(a[7], b[1]) + + MUL31(a[8], b[0]); + t[ 9] = MUL31(a[1], b[8]) + + MUL31(a[2], b[7]) + + MUL31(a[3], b[6]) + + MUL31(a[4], b[5]) + + MUL31(a[5], b[4]) + + MUL31(a[6], b[3]) + + MUL31(a[7], b[2]) + + MUL31(a[8], b[1]); + t[10] = MUL31(a[2], b[8]) + + MUL31(a[3], b[7]) + + MUL31(a[4], b[6]) + + MUL31(a[5], b[5]) + + MUL31(a[6], b[4]) + + MUL31(a[7], b[3]) + + MUL31(a[8], b[2]); + t[11] = MUL31(a[3], b[8]) + + MUL31(a[4], b[7]) + + MUL31(a[5], b[6]) + + MUL31(a[6], b[5]) + + MUL31(a[7], b[4]) + + MUL31(a[8], b[3]); + t[12] = MUL31(a[4], b[8]) + + MUL31(a[5], b[7]) + + MUL31(a[6], b[6]) + + MUL31(a[7], b[5]) + + MUL31(a[8], b[4]); + t[13] = MUL31(a[5], b[8]) + + MUL31(a[6], b[7]) + + MUL31(a[7], b[6]) + + MUL31(a[8], b[5]); + t[14] = MUL31(a[6], b[8]) + + MUL31(a[7], b[7]) + + MUL31(a[8], b[6]); + t[15] = MUL31(a[7], b[8]) + + MUL31(a[8], b[7]); + t[16] = MUL31(a[8], b[8]); + + /* + * Propagate carries. + */ + cc = 0; + for (i = 0; i < 17; i ++) { + uint64_t w; + + w = t[i] + cc; + d[i] = (uint32_t)w & 0x3FFFFFFF; + cc = w >> 30; + } + d[17] = (uint32_t)cc; +} + +/* + * Square a 270-bit integer, represented as an array of nine 30-bit words. + * Result uses 18 words of 30 bits each. + */ +static void +square9(uint32_t *d, const uint32_t *a) +{ + uint64_t t[17]; + uint64_t cc; + int i; + + t[ 0] = MUL31(a[0], a[0]); + t[ 1] = ((MUL31(a[0], a[1])) << 1); + t[ 2] = MUL31(a[1], a[1]) + + ((MUL31(a[0], a[2])) << 1); + t[ 3] = ((MUL31(a[0], a[3]) + + MUL31(a[1], a[2])) << 1); + t[ 4] = MUL31(a[2], a[2]) + + ((MUL31(a[0], a[4]) + + MUL31(a[1], a[3])) << 1); + t[ 5] = ((MUL31(a[0], a[5]) + + MUL31(a[1], a[4]) + + MUL31(a[2], a[3])) << 1); + t[ 6] = MUL31(a[3], a[3]) + + ((MUL31(a[0], a[6]) + + MUL31(a[1], a[5]) + + MUL31(a[2], a[4])) << 1); + t[ 7] = ((MUL31(a[0], a[7]) + + MUL31(a[1], a[6]) + + MUL31(a[2], a[5]) + + MUL31(a[3], a[4])) << 1); + t[ 8] = MUL31(a[4], a[4]) + + ((MUL31(a[0], a[8]) + + MUL31(a[1], a[7]) + + MUL31(a[2], a[6]) + + MUL31(a[3], a[5])) << 1); + t[ 9] = ((MUL31(a[1], a[8]) + + MUL31(a[2], a[7]) + + MUL31(a[3], a[6]) + + MUL31(a[4], a[5])) << 1); + t[10] = MUL31(a[5], a[5]) + + ((MUL31(a[2], a[8]) + + MUL31(a[3], a[7]) + + MUL31(a[4], a[6])) << 1); + t[11] = ((MUL31(a[3], a[8]) + + MUL31(a[4], a[7]) + + MUL31(a[5], a[6])) << 1); + t[12] = MUL31(a[6], a[6]) + + ((MUL31(a[4], a[8]) + + MUL31(a[5], a[7])) << 1); + t[13] = ((MUL31(a[5], a[8]) + + MUL31(a[6], a[7])) << 1); + t[14] = MUL31(a[7], a[7]) + + ((MUL31(a[6], a[8])) << 1); + t[15] = ((MUL31(a[7], a[8])) << 1); + t[16] = MUL31(a[8], a[8]); + + /* + * Propagate carries. + */ + cc = 0; + for (i = 0; i < 17; i ++) { + uint64_t w; + + w = t[i] + cc; + d[i] = (uint32_t)w & 0x3FFFFFFF; + cc = w >> 30; + } + d[17] = (uint32_t)cc; +} + +/* + * Perform a "final reduction" in field F255 (field for Curve25519) + * The source value must be less than twice the modulus. If the value + * is not lower than the modulus, then the modulus is subtracted and + * this function returns 1; otherwise, it leaves it untouched and it + * returns 0. + */ +static uint32_t +reduce_final_f255(uint32_t *d) +{ + uint32_t t[9]; + uint32_t cc; + int i; + + memcpy(t, d, sizeof t); + cc = 19; + for (i = 0; i < 9; i ++) { + uint32_t w; + + w = t[i] + cc; + cc = w >> 30; + t[i] = w & 0x3FFFFFFF; + } + cc = t[8] >> 15; + t[8] &= 0x7FFF; + CCOPY(cc, d, t, sizeof t); + return cc; +} + +/* + * Perform a multiplication of two integers modulo 2^255-19. + * Operands are arrays of 9 words, each containing 30 bits of data, in + * little-endian order. Input value may be up to 2^256-1; on output, value + * fits on 256 bits and is lower than twice the modulus. + */ +static void +f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[18], cc; + int i; + + /* + * Compute raw multiplication. All result words fit in 30 bits + * each; upper word (t[17]) must fit on 2 bits, since the product + * of two 256-bit integers must fit on 512 bits. + */ + mul9(t, a, b); + + /* + * Modular reduction: each high word is added where necessary. + * Since the modulus is 2^255-19 and word 9 corresponds to + * offset 9*30 = 270, word 9+k must be added to word k with + * a factor of 19*2^15 = 622592. The extra bits in word 8 are also + * added that way. + * + * Keeping the carry on 32 bits helps with 32-bit architectures, + * and does not noticeably impact performance on 64-bit systems. + */ + cc = MUL15(t[8] >> 15, 19); /* at most 19*(2^15-1) = 622573 */ + t[8] &= 0x7FFF; + for (i = 0; i < 9; i ++) { + uint64_t w; + + w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592); + t[i] = (uint32_t)w & 0x3FFFFFFF; + cc = (uint32_t)(w >> 30); /* at most 622592 */ + } + + /* + * Original product was up to (2^256-1)^2, i.e. a 512-bit integer. + * This was split into two parts (upper of 257 bits, lower of 255 + * bits), and the upper was added to the lower with a factor 19, + * which means that the intermediate value is less than 77*2^255 + * (19*2^257 + 2^255). Therefore, the extra bits "t[8] >> 15" are + * less than 77, and the initial carry cc is at most 76*19 = 1444. + */ + cc = MUL15(t[8] >> 15, 19); + t[8] &= 0x7FFF; + for (i = 0; i < 9; i ++) { + uint32_t z; + + z = t[i] + cc; + d[i] = z & 0x3FFFFFFF; + cc = z >> 30; + } + + /* + * Final result is at most 2^255 + 1443. In particular, the last + * carry is necessarily 0, since t[8] was truncated to 15 bits. + */ +} + +/* + * Perform a squaring of an integer modulo 2^255-19. + * Operands are arrays of 9 words, each containing 30 bits of data, in + * little-endian order. Input value may be up to 2^256-1; on output, value + * fits on 256 bits and is lower than twice the modulus. + */ +static void +f255_square(uint32_t *d, const uint32_t *a) +{ + uint32_t t[18], cc; + int i; + + /* + * Compute raw squaring. All result words fit in 30 bits + * each; upper word (t[17]) must fit on 2 bits, since the square + * of a 256-bit integers must fit on 512 bits. + */ + square9(t, a); + + /* + * Modular reduction: each high word is added where necessary. + * See f255_mul() for details on the reduction and carry limits. + */ + cc = MUL15(t[8] >> 15, 19); + t[8] &= 0x7FFF; + for (i = 0; i < 9; i ++) { + uint64_t w; + + w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592); + t[i] = (uint32_t)w & 0x3FFFFFFF; + cc = (uint32_t)(w >> 30); + } + cc = MUL15(t[8] >> 15, 19); + t[8] &= 0x7FFF; + for (i = 0; i < 9; i ++) { + uint32_t z; + + z = t[i] + cc; + d[i] = z & 0x3FFFFFFF; + cc = z >> 30; + } +} + +/* + * Add two values in F255. Partial reduction is performed (down to less + * than twice the modulus). + */ +static void +f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * Since operand words fit on 30 bits, we can use 32-bit + * variables throughout. + */ + int i; + uint32_t cc, w; + + cc = 0; + for (i = 0; i < 9; i ++) { + w = a[i] + b[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = w >> 30; + } + cc = MUL15(w >> 15, 19); + d[8] &= 0x7FFF; + for (i = 0; i < 9; i ++) { + w = d[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = w >> 30; + } +} + +/* + * Subtract one value from another in F255. Partial reduction is + * performed (down to less than twice the modulus). + */ +static void +f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * We actually compute a - b + 2*p, so that the final value is + * necessarily positive. + */ + int i; + uint32_t cc, w; + + cc = (uint32_t)-38; + for (i = 0; i < 9; i ++) { + w = a[i] - b[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = ARSH(w, 30); + } + cc = MUL15((w + 0x10000) >> 15, 19); + d[8] &= 0x7FFF; + for (i = 0; i < 9; i ++) { + w = d[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = w >> 30; + } +} + +/* + * Multiply an integer by the 'A24' constant (121665). Partial reduction + * is performed (down to less than twice the modulus). + */ +static void +f255_mul_a24(uint32_t *d, const uint32_t *a) +{ + int i; + uint64_t w; + uint32_t cc; + + /* + * a[] is over 256 bits, thus a[8] has length at most 16 bits. + * We single out the processing of the last word: intermediate + * value w is up to 121665*2^16, yielding a carry for the next + * loop of at most 19*(121665*2^16/2^15) = 4623289. + */ + cc = 0; + for (i = 0; i < 8; i ++) { + w = MUL31(a[i], 121665) + (uint64_t)cc; + d[i] = (uint32_t)w & 0x3FFFFFFF; + cc = (uint32_t)(w >> 30); + } + w = MUL31(a[8], 121665) + (uint64_t)cc; + d[8] = (uint32_t)w & 0x7FFF; + cc = MUL15((uint32_t)(w >> 15), 19); + + for (i = 0; i < 9; i ++) { + uint32_t z; + + z = d[i] + cc; + d[i] = z & 0x3FFFFFFF; + cc = z >> 30; + } +} + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return GEN; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return ORDER; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + +static void +cswap(uint32_t *a, uint32_t *b, uint32_t ctl) +{ + int i; + + ctl = -ctl; + for (i = 0; i < 9; i ++) { + uint32_t aw, bw, tw; + + aw = a[i]; + bw = b[i]; + tw = ctl & (aw ^ bw); + a[i] = aw ^ tw; + b[i] = bw ^ tw; + } +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + uint32_t x1[9], x2[9], x3[9], z2[9], z3[9]; + uint32_t a[9], aa[9], b[9], bb[9]; + uint32_t c[9], d[9], e[9], da[9], cb[9]; + unsigned char k[32]; + uint32_t swap; + int i; + + (void)curve; + + /* + * Points are encoded over exactly 32 bytes. Multipliers must fit + * in 32 bytes as well. + * RFC 7748 mandates that the high bit of the last point byte must + * be ignored/cleared. + */ + if (Glen != 32 || kblen > 32) { + return 0; + } + G[31] &= 0x7F; + + /* + * Initialise variables x1, x2, z2, x3 and z3. We set all of them + * into Montgomery representation. + */ + x1[8] = le8_to_le30(x1, G, 32); + memcpy(x3, x1, sizeof x1); + memset(z2, 0, sizeof z2); + memset(x2, 0, sizeof x2); + x2[0] = 1; + memset(z3, 0, sizeof z3); + z3[0] = 1; + + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + /* obsolete + print_int("x1", x1); + */ + + swap = 0; + for (i = 254; i >= 0; i --) { + uint32_t kt; + + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; + swap ^= kt; + cswap(x2, x3, swap); + cswap(z2, z3, swap); + swap = kt; + + /* obsolete + print_int("x2", x2); + print_int("z2", z2); + print_int("x3", x3); + print_int("z3", z3); + */ + + f255_add(a, x2, z2); + f255_square(aa, a); + f255_sub(b, x2, z2); + f255_square(bb, b); + f255_sub(e, aa, bb); + f255_add(c, x3, z3); + f255_sub(d, x3, z3); + f255_mul(da, d, a); + f255_mul(cb, c, b); + + /* obsolete + print_int("a ", a); + print_int("aa", aa); + print_int("b ", b); + print_int("bb", bb); + print_int("e ", e); + print_int("c ", c); + print_int("d ", d); + print_int("da", da); + print_int("cb", cb); + */ + + f255_add(x3, da, cb); + f255_square(x3, x3); + f255_sub(z3, da, cb); + f255_square(z3, z3); + f255_mul(z3, z3, x1); + f255_mul(x2, aa, bb); + f255_mul_a24(z2, e); + f255_add(z2, z2, aa); + f255_mul(z2, e, z2); + + /* obsolete + print_int("x2", x2); + print_int("z2", z2); + print_int("x3", x3); + print_int("z3", z3); + */ + } + cswap(x2, x3, swap); + cswap(z2, z3, swap); + + /* + * Inverse z2 with a modular exponentiation. This is a simple + * square-and-multiply algorithm; we mutualise most non-squarings + * since the exponent contains almost only ones. + */ + memcpy(a, z2, sizeof z2); + for (i = 0; i < 15; i ++) { + f255_square(a, a); + f255_mul(a, a, z2); + } + memcpy(b, a, sizeof a); + for (i = 0; i < 14; i ++) { + int j; + + for (j = 0; j < 16; j ++) { + f255_square(b, b); + } + f255_mul(b, b, a); + } + for (i = 14; i >= 0; i --) { + f255_square(b, b); + if ((0xFFEB >> i) & 1) { + f255_mul(b, z2, b); + } + } + f255_mul(x2, x2, b); + reduce_final_f255(x2); + le30_to_le8(G, 32, x2); + return 1; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We don't implement this method, since it is used for ECDSA + * only, and there is no ECDSA over Curve25519 (which instead + * uses EdDSA). + */ + (void)A; + (void)B; + (void)len; + (void)x; + (void)xlen; + (void)y; + (void)ylen; + (void)curve; + return 0; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_c25519_m31 = { + (uint32_t)0x20000000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_c25519_m62.c b/third_party/bearssl/src/ec_c25519_m62.c new file mode 100644 index 0000000..6b058eb --- /dev/null +++ b/third_party/bearssl/src/ec_c25519_m62.c @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#if BR_UMUL128 +#include <intrin.h> +#endif + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return GEN; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return ORDER; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + +/* + * A field element is encoded as five 64-bit integers, in basis 2^51. + * Limbs may be occasionally larger than 2^51, to save on carry + * propagation costs. + */ + +#define MASK51 (((uint64_t)1 << 51) - (uint64_t)1) + +/* + * Swap two field elements, conditionally on a flag. + */ +static inline void +f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl) +{ + uint64_t m, w; + + m = -(uint64_t)ctl; + w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w; + w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w; + w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w; + w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w; + w = m & (a[4] ^ b[4]); a[4] ^= w; b[4] ^= w; +} + +/* + * Addition with no carry propagation. Limbs double in size. + */ +static inline void +f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ + d[0] = a[0] + b[0]; + d[1] = a[1] + b[1]; + d[2] = a[2] + b[2]; + d[3] = a[3] + b[3]; + d[4] = a[4] + b[4]; +} + +/* + * Subtraction. + * On input, limbs must fit on 60 bits each. On output, result is + * partially reduced, with max value 2^255+19456; moreover, all + * limbs will fit on 51 bits, except the low limb, which may have + * value up to 2^51+19455. + */ +static inline void +f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ + uint64_t cc, w; + + /* + * We compute d = (2^255-19)*1024 + a - b. Since the limbs + * fit on 60 bits, the maximum value of operands are slightly + * more than 2^264, but much less than 2^265-19456. This + * ensures that the result is positive. + */ + + /* + * Initial carry is 19456, since we add 2^265-19456. Each + * individual subtraction may yield a carry up to 513. + */ + w = a[0] - b[0] - 19456; + d[0] = w & MASK51; + cc = -(w >> 51) & 0x3FF; + w = a[1] - b[1] - cc; + d[1] = w & MASK51; + cc = -(w >> 51) & 0x3FF; + w = a[2] - b[2] - cc; + d[2] = w & MASK51; + cc = -(w >> 51) & 0x3FF; + w = a[3] - b[3] - cc; + d[3] = w & MASK51; + cc = -(w >> 51) & 0x3FF; + d[4] = ((uint64_t)1 << 61) + a[4] - b[4] - cc; + + /* + * Partial reduction. The intermediate result may be up to + * slightly above 2^265, but less than 2^265+2^255. When we + * truncate to 255 bits, the upper bits will be at most 1024. + */ + d[0] += 19 * (d[4] >> 51); + d[4] &= MASK51; +} + +/* + * UMUL51(hi, lo, x, y) computes: + * + * hi = floor((x * y) / (2^51)) + * lo = x * y mod 2^51 + * + * Note that lo < 2^51, but "hi" may be larger, if the input operands are + * larger. + */ +#if BR_INT128 + +#define UMUL51(hi, lo, x, y) do { \ + unsigned __int128 umul_tmp; \ + umul_tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \ + (hi) = (uint64_t)(umul_tmp >> 51); \ + (lo) = (uint64_t)umul_tmp & MASK51; \ + } while (0) + +#elif BR_UMUL128 + +#define UMUL51(hi, lo, x, y) do { \ + uint64_t umul_hi, umul_lo; \ + umul_lo = _umul128((x), (y), &umul_hi); \ + (hi) = (umul_hi << 13) | (umul_lo >> 51); \ + (lo) = umul_lo & MASK51; \ + } while (0) + +#endif + +/* + * Multiplication. + * On input, limbs must fit on 54 bits each. + * On output, limb 0 is at most 2^51 + 155647, and other limbs fit + * on 51 bits each. + */ +static inline void +f255_mul(uint64_t *d, uint64_t *a, uint64_t *b) +{ + uint64_t t[10], hi, lo, w, cc; + + /* + * Perform cross products, accumulating values without carry + * propagation. + * + * Since input limbs fit on 54 bits each, each individual + * UMUL51 will produce a "hi" of less than 2^57. The maximum + * sum will be at most 5*(2^57-1) + 4*(2^51-1) (for t[5]), + * i.e. less than 324*2^51. + */ + + UMUL51(t[1], t[0], a[0], b[0]); + + UMUL51(t[2], lo, a[1], b[0]); t[1] += lo; + UMUL51(hi, lo, a[0], b[1]); t[1] += lo; t[2] += hi; + + UMUL51(t[3], lo, a[2], b[0]); t[2] += lo; + UMUL51(hi, lo, a[1], b[1]); t[2] += lo; t[3] += hi; + UMUL51(hi, lo, a[0], b[2]); t[2] += lo; t[3] += hi; + + UMUL51(t[4], lo, a[3], b[0]); t[3] += lo; + UMUL51(hi, lo, a[2], b[1]); t[3] += lo; t[4] += hi; + UMUL51(hi, lo, a[1], b[2]); t[3] += lo; t[4] += hi; + UMUL51(hi, lo, a[0], b[3]); t[3] += lo; t[4] += hi; + + UMUL51(t[5], lo, a[4], b[0]); t[4] += lo; + UMUL51(hi, lo, a[3], b[1]); t[4] += lo; t[5] += hi; + UMUL51(hi, lo, a[2], b[2]); t[4] += lo; t[5] += hi; + UMUL51(hi, lo, a[1], b[3]); t[4] += lo; t[5] += hi; + UMUL51(hi, lo, a[0], b[4]); t[4] += lo; t[5] += hi; + + UMUL51(t[6], lo, a[4], b[1]); t[5] += lo; + UMUL51(hi, lo, a[3], b[2]); t[5] += lo; t[6] += hi; + UMUL51(hi, lo, a[2], b[3]); t[5] += lo; t[6] += hi; + UMUL51(hi, lo, a[1], b[4]); t[5] += lo; t[6] += hi; + + UMUL51(t[7], lo, a[4], b[2]); t[6] += lo; + UMUL51(hi, lo, a[3], b[3]); t[6] += lo; t[7] += hi; + UMUL51(hi, lo, a[2], b[4]); t[6] += lo; t[7] += hi; + + UMUL51(t[8], lo, a[4], b[3]); t[7] += lo; + UMUL51(hi, lo, a[3], b[4]); t[7] += lo; t[8] += hi; + + UMUL51(t[9], lo, a[4], b[4]); t[8] += lo; + + /* + * The upper words t[5]..t[9] are folded back into the lower + * words, using the rule that 2^255 = 19 in the field. + * + * Since each t[i] is less than 324*2^51, the additions below + * will yield less than 6480*2^51 in each limb; this fits in + * 64 bits (6480*2^51 < 8192*2^51 = 2^64), hence there is + * no overflow. + */ + t[0] += 19 * t[5]; + t[1] += 19 * t[6]; + t[2] += 19 * t[7]; + t[3] += 19 * t[8]; + t[4] += 19 * t[9]; + + /* + * Propagate carries. + */ + w = t[0]; + d[0] = w & MASK51; + cc = w >> 51; + w = t[1] + cc; + d[1] = w & MASK51; + cc = w >> 51; + w = t[2] + cc; + d[2] = w & MASK51; + cc = w >> 51; + w = t[3] + cc; + d[3] = w & MASK51; + cc = w >> 51; + w = t[4] + cc; + d[4] = w & MASK51; + cc = w >> 51; + + /* + * Since the limbs were 64-bit values, the top carry is at + * most 8192 (in practice, that cannot be reached). We simply + * performed a partial reduction. + */ + d[0] += 19 * cc; +} + +/* + * Multiplication by A24 = 121665. + * Input must have limbs of 60 bits at most. + */ +static inline void +f255_mul_a24(uint64_t *d, const uint64_t *a) +{ + uint64_t t[5], cc, w; + + /* + * 121665 = 15 * 8111. We first multiply by 15, with carry + * propagation and partial reduction. + */ + w = a[0] * 15; + t[0] = w & MASK51; + cc = w >> 51; + w = a[1] * 15 + cc; + t[1] = w & MASK51; + cc = w >> 51; + w = a[2] * 15 + cc; + t[2] = w & MASK51; + cc = w >> 51; + w = a[3] * 15 + cc; + t[3] = w & MASK51; + cc = w >> 51; + w = a[4] * 15 + cc; + t[4] = w & MASK51; + t[0] += 19 * (w >> 51); + + /* + * Then multiplication by 8111. At that point, we known that + * t[0] is less than 2^51 + 19*8192, and other limbs are less + * than 2^51; thus, there will be no overflow. + */ + w = t[0] * 8111; + d[0] = w & MASK51; + cc = w >> 51; + w = t[1] * 8111 + cc; + d[1] = w & MASK51; + cc = w >> 51; + w = t[2] * 8111 + cc; + d[2] = w & MASK51; + cc = w >> 51; + w = t[3] * 8111 + cc; + d[3] = w & MASK51; + cc = w >> 51; + w = t[4] * 8111 + cc; + d[4] = w & MASK51; + d[0] += 19 * (w >> 51); +} + +/* + * Finalize reduction. + * On input, limbs must fit on 51 bits, except possibly the low limb, + * which may be slightly above 2^51. + */ +static inline void +f255_final_reduce(uint64_t *a) +{ + uint64_t t[5], cc, w; + + /* + * We add 19. If the result (in t[]) is below 2^255, then a[] + * is already less than 2^255-19, thus already reduced. + * Otherwise, we subtract 2^255 from t[], in which case we + * have t = a - (2^255-19), and that's our result. + */ + w = a[0] + 19; + t[0] = w & MASK51; + cc = w >> 51; + w = a[1] + cc; + t[1] = w & MASK51; + cc = w >> 51; + w = a[2] + cc; + t[2] = w & MASK51; + cc = w >> 51; + w = a[3] + cc; + t[3] = w & MASK51; + cc = w >> 51; + w = a[4] + cc; + t[4] = w & MASK51; + cc = w >> 51; + + /* + * The bit 255 of t is in cc. If that bit is 0, when a[] must + * be unchanged; otherwise, it must be replaced with t[]. + */ + cc = -cc; + a[0] ^= cc & (a[0] ^ t[0]); + a[1] ^= cc & (a[1] ^ t[1]); + a[2] ^= cc & (a[2] ^ t[2]); + a[3] ^= cc & (a[3] ^ t[3]); + a[4] ^= cc & (a[4] ^ t[4]); +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + unsigned char k[32]; + uint64_t x1[5], x2[5], z2[5], x3[5], z3[5]; + uint32_t swap; + int i; + + (void)curve; + + /* + * Points are encoded over exactly 32 bytes. Multipliers must fit + * in 32 bytes as well. + */ + if (Glen != 32 || kblen > 32) { + return 0; + } + + /* + * RFC 7748 mandates that the high bit of the last point byte must + * be ignored/cleared; the "& MASK51" in the initialization for + * x1[4] clears that bit. + */ + x1[0] = br_dec64le(&G[0]) & MASK51; + x1[1] = (br_dec64le(&G[6]) >> 3) & MASK51; + x1[2] = (br_dec64le(&G[12]) >> 6) & MASK51; + x1[3] = (br_dec64le(&G[19]) >> 1) & MASK51; + x1[4] = (br_dec64le(&G[24]) >> 12) & MASK51; + + /* + * We can use memset() to clear values, because exact-width types + * like uint64_t are guaranteed to have no padding bits or + * trap representations. + */ + memset(x2, 0, sizeof x2); + x2[0] = 1; + memset(z2, 0, sizeof z2); + memcpy(x3, x1, sizeof x1); + memcpy(z3, x2, sizeof x2); + + /* + * The multiplier is provided in big-endian notation, and + * possibly shorter than 32 bytes. + */ + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + swap = 0; + + for (i = 254; i >= 0; i --) { + uint64_t a[5], aa[5], b[5], bb[5], e[5]; + uint64_t c[5], d[5], da[5], cb[5]; + uint32_t kt; + + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; + swap ^= kt; + f255_cswap(x2, x3, swap); + f255_cswap(z2, z3, swap); + swap = kt; + + /* + * At that point, limbs of x_2 and z_2 are assumed to fit + * on at most 52 bits each. + * + * Each f255_add() adds one bit to the maximum range of + * the values, but f255_sub() and f255_mul() bring back + * the limbs into 52 bits. All f255_add() outputs are + * used only as inputs for f255_mul(), which ensures + * that limbs remain in the proper range. + */ + + /* A = x_2 + z_2 -- limbs fit on 53 bits each */ + f255_add(a, x2, z2); + + /* AA = A^2 */ + f255_mul(aa, a, a); + + /* B = x_2 - z_2 */ + f255_sub(b, x2, z2); + + /* BB = B^2 */ + f255_mul(bb, b, b); + + /* E = AA - BB */ + f255_sub(e, aa, bb); + + /* C = x_3 + z_3 -- limbs fit on 53 bits each */ + f255_add(c, x3, z3); + + /* D = x_3 - z_3 */ + f255_sub(d, x3, z3); + + /* DA = D * A */ + f255_mul(da, d, a); + + /* CB = C * B */ + f255_mul(cb, c, b); + + /* x_3 = (DA + CB)^2 */ + f255_add(x3, da, cb); + f255_mul(x3, x3, x3); + + /* z_3 = x_1 * (DA - CB)^2 */ + f255_sub(z3, da, cb); + f255_mul(z3, z3, z3); + f255_mul(z3, x1, z3); + + /* x_2 = AA * BB */ + f255_mul(x2, aa, bb); + + /* z_2 = E * (AA + a24 * E) */ + f255_mul_a24(z2, e); + f255_add(z2, aa, z2); + f255_mul(z2, e, z2); + } + + f255_cswap(x2, x3, swap); + f255_cswap(z2, z3, swap); + + /* + * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize + * most non-squarings. We use x1 and x3, now useless, as temporaries. + */ + memcpy(x1, z2, sizeof z2); + for (i = 0; i < 15; i ++) { + f255_mul(x1, x1, x1); + f255_mul(x1, x1, z2); + } + memcpy(x3, x1, sizeof x1); + for (i = 0; i < 14; i ++) { + int j; + + for (j = 0; j < 16; j ++) { + f255_mul(x3, x3, x3); + } + f255_mul(x3, x3, x1); + } + for (i = 14; i >= 0; i --) { + f255_mul(x3, x3, x3); + if ((0xFFEB >> i) & 1) { + f255_mul(x3, z2, x3); + } + } + + /* + * Compute x2/z2. We have 1/z2 in x3. + */ + f255_mul(x2, x2, x3); + f255_final_reduce(x2); + + /* + * Encode the final x2 value in little-endian. We first assemble + * the limbs into 64-bit values. + */ + x2[0] |= x2[1] << 51; + x2[1] = (x2[1] >> 13) | (x2[2] << 38); + x2[2] = (x2[2] >> 26) | (x2[3] << 25); + x2[3] = (x2[3] >> 39) | (x2[4] << 12); + br_enc64le(G, x2[0]); + br_enc64le(G + 8, x2[1]); + br_enc64le(G + 16, x2[2]); + br_enc64le(G + 24, x2[3]); + return 1; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We don't implement this method, since it is used for ECDSA + * only, and there is no ECDSA over Curve25519 (which instead + * uses EdDSA). + */ + (void)A; + (void)B; + (void)len; + (void)x; + (void)xlen; + (void)y; + (void)ylen; + (void)curve; + return 0; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_c25519_m62 = { + (uint32_t)0x20000000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_c25519_m62_get(void) +{ + return &br_ec_c25519_m62; +} + +#else + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_c25519_m62_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/ec_c25519_m64.c b/third_party/bearssl/src/ec_c25519_m64.c new file mode 100644 index 0000000..df48834 --- /dev/null +++ b/third_party/bearssl/src/ec_c25519_m64.c @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#if BR_UMUL128 +#include <intrin.h> +#endif + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return GEN; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return ORDER; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + +/* + * A field element is encoded as four 64-bit integers, in basis 2^63. + * Operations return partially reduced values, which may range up to + * 2^255+37. + */ + +#define MASK63 (((uint64_t)1 << 63) - (uint64_t)1) + +/* + * Swap two field elements, conditionally on a flag. + */ +static inline void +f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl) +{ + uint64_t m, w; + + m = -(uint64_t)ctl; + w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w; + w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w; + w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w; + w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w; +} + +/* + * Addition in the field. + */ +static inline void +f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ +#if BR_INT128 + + uint64_t t0, t1, t2, t3, cc; + unsigned __int128 z; + + z = (unsigned __int128)a[0] + (unsigned __int128)b[0]; + t0 = (uint64_t)z; + z = (unsigned __int128)a[1] + (unsigned __int128)b[1] + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)a[2] + (unsigned __int128)b[2] + (z >> 64); + t2 = (uint64_t)z; + z = (unsigned __int128)a[3] + (unsigned __int128)b[3] + (z >> 64); + t3 = (uint64_t)z & MASK63; + cc = (uint64_t)(z >> 63); + + /* + * Since operands are at most 2^255+37, the sum is at most + * 2^256+74; thus, the carry cc is equal to 0, 1 or 2. + * + * We use: 2^255 = 19 mod p. + * Since we add 0, 19 or 38 to a value that fits on 255 bits, + * the result is at most 2^255+37. + */ + z = (unsigned __int128)t0 + (unsigned __int128)(19 * cc); + d[0] = (uint64_t)z; + z = (unsigned __int128)t1 + (z >> 64); + d[1] = (uint64_t)z; + z = (unsigned __int128)t2 + (z >> 64); + d[2] = (uint64_t)z; + d[3] = t3 + (uint64_t)(z >> 64); + +#elif BR_UMUL128 + + uint64_t t0, t1, t2, t3, cc; + unsigned char k; + + k = _addcarry_u64(0, a[0], b[0], &t0); + k = _addcarry_u64(k, a[1], b[1], &t1); + k = _addcarry_u64(k, a[2], b[2], &t2); + k = _addcarry_u64(k, a[3], b[3], &t3); + cc = (k << 1) + (t3 >> 63); + t3 &= MASK63; + + /* + * Since operands are at most 2^255+37, the sum is at most + * 2^256+74; thus, the carry cc is equal to 0, 1 or 2. + * + * We use: 2^255 = 19 mod p. + * Since we add 0, 19 or 38 to a value that fits on 255 bits, + * the result is at most 2^255+37. + */ + k = _addcarry_u64(0, t0, 19 * cc, &d[0]); + k = _addcarry_u64(k, t1, 0, &d[1]); + k = _addcarry_u64(k, t2, 0, &d[2]); + (void)_addcarry_u64(k, t3, 0, &d[3]); + +#endif +} + +/* + * Subtraction. + */ +static inline void +f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ +#if BR_INT128 + + /* + * We compute t = 2^256 - 38 + a - b, which is necessarily + * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37 + * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending + * on the two upper bits of t (bits 255 and 256). + */ + + uint64_t t0, t1, t2, t3, t4, cc; + unsigned __int128 z; + + z = (unsigned __int128)a[0] - (unsigned __int128)b[0] - 38; + t0 = (uint64_t)z; + cc = -(uint64_t)(z >> 64); + z = (unsigned __int128)a[1] - (unsigned __int128)b[1] + - (unsigned __int128)cc; + t1 = (uint64_t)z; + cc = -(uint64_t)(z >> 64); + z = (unsigned __int128)a[2] - (unsigned __int128)b[2] + - (unsigned __int128)cc; + t2 = (uint64_t)z; + cc = -(uint64_t)(z >> 64); + z = (unsigned __int128)a[3] - (unsigned __int128)b[3] + - (unsigned __int128)cc; + t3 = (uint64_t)z; + t4 = 1 + (uint64_t)(z >> 64); + + /* + * We have a 257-bit result. The two top bits can be 00, 01 or 10, + * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1). + * Therefore, we can truncate to 255 bits, and add 0, 19 or 38. + * This guarantees that the result is at most 2^255+37. + */ + cc = (38 & -t4) + (19 & -(t3 >> 63)); + t3 &= MASK63; + z = (unsigned __int128)t0 + (unsigned __int128)cc; + d[0] = (uint64_t)z; + z = (unsigned __int128)t1 + (z >> 64); + d[1] = (uint64_t)z; + z = (unsigned __int128)t2 + (z >> 64); + d[2] = (uint64_t)z; + d[3] = t3 + (uint64_t)(z >> 64); + +#elif BR_UMUL128 + + /* + * We compute t = 2^256 - 38 + a - b, which is necessarily + * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37 + * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending + * on the two upper bits of t (bits 255 and 256). + */ + + uint64_t t0, t1, t2, t3, t4; + unsigned char k; + + k = _subborrow_u64(0, a[0], b[0], &t0); + k = _subborrow_u64(k, a[1], b[1], &t1); + k = _subborrow_u64(k, a[2], b[2], &t2); + k = _subborrow_u64(k, a[3], b[3], &t3); + (void)_subborrow_u64(k, 1, 0, &t4); + + k = _subborrow_u64(0, t0, 38, &t0); + k = _subborrow_u64(k, t1, 0, &t1); + k = _subborrow_u64(k, t2, 0, &t2); + k = _subborrow_u64(k, t3, 0, &t3); + (void)_subborrow_u64(k, t4, 0, &t4); + + /* + * We have a 257-bit result. The two top bits can be 00, 01 or 10, + * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1). + * Therefore, we can truncate to 255 bits, and add 0, 19 or 38. + * This guarantees that the result is at most 2^255+37. + */ + t4 = (38 & -t4) + (19 & -(t3 >> 63)); + t3 &= MASK63; + k = _addcarry_u64(0, t0, t4, &d[0]); + k = _addcarry_u64(k, t1, 0, &d[1]); + k = _addcarry_u64(k, t2, 0, &d[2]); + (void)_addcarry_u64(k, t3, 0, &d[3]); + +#endif +} + +/* + * Multiplication. + */ +static inline void +f255_mul(uint64_t *d, uint64_t *a, uint64_t *b) +{ +#if BR_INT128 + + unsigned __int128 z; + uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th; + + /* + * Compute the product a*b over plain integers. + */ + z = (unsigned __int128)a[0] * (unsigned __int128)b[0]; + t0 = (uint64_t)z; + z = (unsigned __int128)a[0] * (unsigned __int128)b[1] + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)a[0] * (unsigned __int128)b[2] + (z >> 64); + t2 = (uint64_t)z; + z = (unsigned __int128)a[0] * (unsigned __int128)b[3] + (z >> 64); + t3 = (uint64_t)z; + t4 = (uint64_t)(z >> 64); + + z = (unsigned __int128)a[1] * (unsigned __int128)b[0] + + (unsigned __int128)t1; + t1 = (uint64_t)z; + z = (unsigned __int128)a[1] * (unsigned __int128)b[1] + + (unsigned __int128)t2 + (z >> 64); + t2 = (uint64_t)z; + z = (unsigned __int128)a[1] * (unsigned __int128)b[2] + + (unsigned __int128)t3 + (z >> 64); + t3 = (uint64_t)z; + z = (unsigned __int128)a[1] * (unsigned __int128)b[3] + + (unsigned __int128)t4 + (z >> 64); + t4 = (uint64_t)z; + t5 = (uint64_t)(z >> 64); + + z = (unsigned __int128)a[2] * (unsigned __int128)b[0] + + (unsigned __int128)t2; + t2 = (uint64_t)z; + z = (unsigned __int128)a[2] * (unsigned __int128)b[1] + + (unsigned __int128)t3 + (z >> 64); + t3 = (uint64_t)z; + z = (unsigned __int128)a[2] * (unsigned __int128)b[2] + + (unsigned __int128)t4 + (z >> 64); + t4 = (uint64_t)z; + z = (unsigned __int128)a[2] * (unsigned __int128)b[3] + + (unsigned __int128)t5 + (z >> 64); + t5 = (uint64_t)z; + t6 = (uint64_t)(z >> 64); + + z = (unsigned __int128)a[3] * (unsigned __int128)b[0] + + (unsigned __int128)t3; + t3 = (uint64_t)z; + z = (unsigned __int128)a[3] * (unsigned __int128)b[1] + + (unsigned __int128)t4 + (z >> 64); + t4 = (uint64_t)z; + z = (unsigned __int128)a[3] * (unsigned __int128)b[2] + + (unsigned __int128)t5 + (z >> 64); + t5 = (uint64_t)z; + z = (unsigned __int128)a[3] * (unsigned __int128)b[3] + + (unsigned __int128)t6 + (z >> 64); + t6 = (uint64_t)z; + t7 = (uint64_t)(z >> 64); + + /* + * Modulo p, we have: + * + * 2^255 = 19 + * 2^510 = 19*19 = 361 + * + * We split the intermediate t into three parts, in basis + * 2^255. The low one will be in t0..t3; the middle one in t4..t7. + * The upper one can only be a single bit (th), since the + * multiplication operands are at most 2^255+37 each. + */ + th = t7 >> 62; + t7 = ((t7 << 1) | (t6 >> 63)) & MASK63; + t6 = (t6 << 1) | (t5 >> 63); + t5 = (t5 << 1) | (t4 >> 63); + t4 = (t4 << 1) | (t3 >> 63); + t3 &= MASK63; + + /* + * Multiply the middle part (t4..t7) by 19. We truncate it to + * 255 bits; the extra bits will go along with th. + */ + z = (unsigned __int128)t4 * 19; + t4 = (uint64_t)z; + z = (unsigned __int128)t5 * 19 + (z >> 64); + t5 = (uint64_t)z; + z = (unsigned __int128)t6 * 19 + (z >> 64); + t6 = (uint64_t)z; + z = (unsigned __int128)t7 * 19 + (z >> 64); + t7 = (uint64_t)z & MASK63; + + th = (361 & -th) + (19 * (uint64_t)(z >> 63)); + + /* + * Add elements together. + * At this point: + * t0..t3 fits on 255 bits. + * t4..t7 fits on 255 bits. + * th <= 361 + 342 = 703. + */ + z = (unsigned __int128)t0 + (unsigned __int128)t4 + + (unsigned __int128)th; + t0 = (uint64_t)z; + z = (unsigned __int128)t1 + (unsigned __int128)t5 + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)t2 + (unsigned __int128)t6 + (z >> 64); + t2 = (uint64_t)z; + z = (unsigned __int128)t3 + (unsigned __int128)t7 + (z >> 64); + t3 = (uint64_t)z & MASK63; + th = (uint64_t)(z >> 63); + + /* + * Since the sum is at most 2^256 + 703, the two upper bits, in th, + * can only have value 0, 1 or 2. We just add th*19, which + * guarantees a result of at most 2^255+37. + */ + z = (unsigned __int128)t0 + (19 * th); + d[0] = (uint64_t)z; + z = (unsigned __int128)t1 + (z >> 64); + d[1] = (uint64_t)z; + z = (unsigned __int128)t2 + (z >> 64); + d[2] = (uint64_t)z; + d[3] = t3 + (uint64_t)(z >> 64); + +#elif BR_UMUL128 + + uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th; + uint64_t h0, h1, h2, h3; + unsigned char k; + + /* + * Compute the product a*b over plain integers. + */ + t0 = _umul128(a[0], b[0], &h0); + t1 = _umul128(a[0], b[1], &h1); + k = _addcarry_u64(0, t1, h0, &t1); + t2 = _umul128(a[0], b[2], &h2); + k = _addcarry_u64(k, t2, h1, &t2); + t3 = _umul128(a[0], b[3], &h3); + k = _addcarry_u64(k, t3, h2, &t3); + (void)_addcarry_u64(k, h3, 0, &t4); + + k = _addcarry_u64(0, _umul128(a[1], b[0], &h0), t1, &t1); + k = _addcarry_u64(k, _umul128(a[1], b[1], &h1), t2, &t2); + k = _addcarry_u64(k, _umul128(a[1], b[2], &h2), t3, &t3); + k = _addcarry_u64(k, _umul128(a[1], b[3], &h3), t4, &t4); + t5 = k; + k = _addcarry_u64(0, t2, h0, &t2); + k = _addcarry_u64(k, t3, h1, &t3); + k = _addcarry_u64(k, t4, h2, &t4); + (void)_addcarry_u64(k, t5, h3, &t5); + + k = _addcarry_u64(0, _umul128(a[2], b[0], &h0), t2, &t2); + k = _addcarry_u64(k, _umul128(a[2], b[1], &h1), t3, &t3); + k = _addcarry_u64(k, _umul128(a[2], b[2], &h2), t4, &t4); + k = _addcarry_u64(k, _umul128(a[2], b[3], &h3), t5, &t5); + t6 = k; + k = _addcarry_u64(0, t3, h0, &t3); + k = _addcarry_u64(k, t4, h1, &t4); + k = _addcarry_u64(k, t5, h2, &t5); + (void)_addcarry_u64(k, t6, h3, &t6); + + k = _addcarry_u64(0, _umul128(a[3], b[0], &h0), t3, &t3); + k = _addcarry_u64(k, _umul128(a[3], b[1], &h1), t4, &t4); + k = _addcarry_u64(k, _umul128(a[3], b[2], &h2), t5, &t5); + k = _addcarry_u64(k, _umul128(a[3], b[3], &h3), t6, &t6); + t7 = k; + k = _addcarry_u64(0, t4, h0, &t4); + k = _addcarry_u64(k, t5, h1, &t5); + k = _addcarry_u64(k, t6, h2, &t6); + (void)_addcarry_u64(k, t7, h3, &t7); + + /* + * Modulo p, we have: + * + * 2^255 = 19 + * 2^510 = 19*19 = 361 + * + * We split the intermediate t into three parts, in basis + * 2^255. The low one will be in t0..t3; the middle one in t4..t7. + * The upper one can only be a single bit (th), since the + * multiplication operands are at most 2^255+37 each. + */ + th = t7 >> 62; + t7 = ((t7 << 1) | (t6 >> 63)) & MASK63; + t6 = (t6 << 1) | (t5 >> 63); + t5 = (t5 << 1) | (t4 >> 63); + t4 = (t4 << 1) | (t3 >> 63); + t3 &= MASK63; + + /* + * Multiply the middle part (t4..t7) by 19. We truncate it to + * 255 bits; the extra bits will go along with th. + */ + t4 = _umul128(t4, 19, &h0); + t5 = _umul128(t5, 19, &h1); + t6 = _umul128(t6, 19, &h2); + t7 = _umul128(t7, 19, &h3); + k = _addcarry_u64(0, t5, h0, &t5); + k = _addcarry_u64(k, t6, h1, &t6); + k = _addcarry_u64(k, t7, h2, &t7); + (void)_addcarry_u64(k, h3, 0, &h3); + th = (361 & -th) + (19 * ((h3 << 1) + (t7 >> 63))); + t7 &= MASK63; + + /* + * Add elements together. + * At this point: + * t0..t3 fits on 255 bits. + * t4..t7 fits on 255 bits. + * th <= 361 + 342 = 703. + */ + k = _addcarry_u64(0, t0, t4, &t0); + k = _addcarry_u64(k, t1, t5, &t1); + k = _addcarry_u64(k, t2, t6, &t2); + k = _addcarry_u64(k, t3, t7, &t3); + t4 = k; + k = _addcarry_u64(0, t0, th, &t0); + k = _addcarry_u64(k, t1, 0, &t1); + k = _addcarry_u64(k, t2, 0, &t2); + k = _addcarry_u64(k, t3, 0, &t3); + (void)_addcarry_u64(k, t4, 0, &t4); + + th = (t4 << 1) + (t3 >> 63); + t3 &= MASK63; + + /* + * Since the sum is at most 2^256 + 703, the two upper bits, in th, + * can only have value 0, 1 or 2. We just add th*19, which + * guarantees a result of at most 2^255+37. + */ + k = _addcarry_u64(0, t0, 19 * th, &d[0]); + k = _addcarry_u64(k, t1, 0, &d[1]); + k = _addcarry_u64(k, t2, 0, &d[2]); + (void)_addcarry_u64(k, t3, 0, &d[3]); + +#endif +} + +/* + * Multiplication by A24 = 121665. + */ +static inline void +f255_mul_a24(uint64_t *d, const uint64_t *a) +{ +#if BR_INT128 + + uint64_t t0, t1, t2, t3; + unsigned __int128 z; + + z = (unsigned __int128)a[0] * 121665; + t0 = (uint64_t)z; + z = (unsigned __int128)a[1] * 121665 + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)a[2] * 121665 + (z >> 64); + t2 = (uint64_t)z; + z = (unsigned __int128)a[3] * 121665 + (z >> 64); + t3 = (uint64_t)z & MASK63; + + z = (unsigned __int128)t0 + (19 * (uint64_t)(z >> 63)); + t0 = (uint64_t)z; + z = (unsigned __int128)t1 + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)t2 + (z >> 64); + t2 = (uint64_t)z; + t3 = t3 + (uint64_t)(z >> 64); + + z = (unsigned __int128)t0 + (19 & -(t3 >> 63)); + d[0] = (uint64_t)z; + z = (unsigned __int128)t1 + (z >> 64); + d[1] = (uint64_t)z; + z = (unsigned __int128)t2 + (z >> 64); + d[2] = (uint64_t)z; + d[3] = (t3 & MASK63) + (uint64_t)(z >> 64); + +#elif BR_UMUL128 + + uint64_t t0, t1, t2, t3, t4, h0, h1, h2, h3; + unsigned char k; + + t0 = _umul128(a[0], 121665, &h0); + t1 = _umul128(a[1], 121665, &h1); + k = _addcarry_u64(0, t1, h0, &t1); + t2 = _umul128(a[2], 121665, &h2); + k = _addcarry_u64(k, t2, h1, &t2); + t3 = _umul128(a[3], 121665, &h3); + k = _addcarry_u64(k, t3, h2, &t3); + (void)_addcarry_u64(k, h3, 0, &t4); + + t4 = (t4 << 1) + (t3 >> 63); + t3 &= MASK63; + k = _addcarry_u64(0, t0, 19 * t4, &t0); + k = _addcarry_u64(k, t1, 0, &t1); + k = _addcarry_u64(k, t2, 0, &t2); + (void)_addcarry_u64(k, t3, 0, &t3); + + t4 = 19 & -(t3 >> 63); + t3 &= MASK63; + k = _addcarry_u64(0, t0, t4, &d[0]); + k = _addcarry_u64(k, t1, 0, &d[1]); + k = _addcarry_u64(k, t2, 0, &d[2]); + (void)_addcarry_u64(k, t3, 0, &d[3]); + +#endif +} + +/* + * Finalize reduction. + */ +static inline void +f255_final_reduce(uint64_t *a) +{ +#if BR_INT128 + + uint64_t t0, t1, t2, t3, m; + unsigned __int128 z; + + /* + * We add 19. If the result (in t) is below 2^255, then a[] + * is already less than 2^255-19, thus already reduced. + * Otherwise, we subtract 2^255 from t[], in which case we + * have t = a - (2^255-19), and that's our result. + */ + z = (unsigned __int128)a[0] + 19; + t0 = (uint64_t)z; + z = (unsigned __int128)a[1] + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)a[2] + (z >> 64); + t2 = (uint64_t)z; + t3 = a[3] + (uint64_t)(z >> 64); + + m = -(t3 >> 63); + t3 &= MASK63; + a[0] ^= m & (a[0] ^ t0); + a[1] ^= m & (a[1] ^ t1); + a[2] ^= m & (a[2] ^ t2); + a[3] ^= m & (a[3] ^ t3); + +#elif BR_UMUL128 + + uint64_t t0, t1, t2, t3, m; + unsigned char k; + + /* + * We add 19. If the result (in t) is below 2^255, then a[] + * is already less than 2^255-19, thus already reduced. + * Otherwise, we subtract 2^255 from t[], in which case we + * have t = a - (2^255-19), and that's our result. + */ + k = _addcarry_u64(0, a[0], 19, &t0); + k = _addcarry_u64(k, a[1], 0, &t1); + k = _addcarry_u64(k, a[2], 0, &t2); + (void)_addcarry_u64(k, a[3], 0, &t3); + + m = -(t3 >> 63); + t3 &= MASK63; + a[0] ^= m & (a[0] ^ t0); + a[1] ^= m & (a[1] ^ t1); + a[2] ^= m & (a[2] ^ t2); + a[3] ^= m & (a[3] ^ t3); + +#endif +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *kb, size_t kblen, int curve) +{ + unsigned char k[32]; + uint64_t x1[4], x2[4], z2[4], x3[4], z3[4]; + uint32_t swap; + int i; + + (void)curve; + + /* + * Points are encoded over exactly 32 bytes. Multipliers must fit + * in 32 bytes as well. + */ + if (Glen != 32 || kblen > 32) { + return 0; + } + + /* + * RFC 7748 mandates that the high bit of the last point byte must + * be ignored/cleared. + */ + x1[0] = br_dec64le(&G[ 0]); + x1[1] = br_dec64le(&G[ 8]); + x1[2] = br_dec64le(&G[16]); + x1[3] = br_dec64le(&G[24]) & MASK63; + + /* + * We can use memset() to clear values, because exact-width types + * like uint64_t are guaranteed to have no padding bits or + * trap representations. + */ + memset(x2, 0, sizeof x2); + x2[0] = 1; + memset(z2, 0, sizeof z2); + memcpy(x3, x1, sizeof x1); + memcpy(z3, x2, sizeof x2); + + /* + * The multiplier is provided in big-endian notation, and + * possibly shorter than 32 bytes. + */ + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + swap = 0; + + for (i = 254; i >= 0; i --) { + uint64_t a[4], aa[4], b[4], bb[4], e[4]; + uint64_t c[4], d[4], da[4], cb[4]; + uint32_t kt; + + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; + swap ^= kt; + f255_cswap(x2, x3, swap); + f255_cswap(z2, z3, swap); + swap = kt; + + /* A = x_2 + z_2 */ + f255_add(a, x2, z2); + + /* AA = A^2 */ + f255_mul(aa, a, a); + + /* B = x_2 - z_2 */ + f255_sub(b, x2, z2); + + /* BB = B^2 */ + f255_mul(bb, b, b); + + /* E = AA - BB */ + f255_sub(e, aa, bb); + + /* C = x_3 + z_3 */ + f255_add(c, x3, z3); + + /* D = x_3 - z_3 */ + f255_sub(d, x3, z3); + + /* DA = D * A */ + f255_mul(da, d, a); + + /* CB = C * B */ + f255_mul(cb, c, b); + + /* x_3 = (DA + CB)^2 */ + f255_add(x3, da, cb); + f255_mul(x3, x3, x3); + + /* z_3 = x_1 * (DA - CB)^2 */ + f255_sub(z3, da, cb); + f255_mul(z3, z3, z3); + f255_mul(z3, x1, z3); + + /* x_2 = AA * BB */ + f255_mul(x2, aa, bb); + + /* z_2 = E * (AA + a24 * E) */ + f255_mul_a24(z2, e); + f255_add(z2, aa, z2); + f255_mul(z2, e, z2); + } + + f255_cswap(x2, x3, swap); + f255_cswap(z2, z3, swap); + + /* + * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize + * most non-squarings. We use x1 and x3, now useless, as temporaries. + */ + memcpy(x1, z2, sizeof z2); + for (i = 0; i < 15; i ++) { + f255_mul(x1, x1, x1); + f255_mul(x1, x1, z2); + } + memcpy(x3, x1, sizeof x1); + for (i = 0; i < 14; i ++) { + int j; + + for (j = 0; j < 16; j ++) { + f255_mul(x3, x3, x3); + } + f255_mul(x3, x3, x1); + } + for (i = 14; i >= 0; i --) { + f255_mul(x3, x3, x3); + if ((0xFFEB >> i) & 1) { + f255_mul(x3, z2, x3); + } + } + + /* + * Compute x2/z2. We have 1/z2 in x3. + */ + f255_mul(x2, x2, x3); + f255_final_reduce(x2); + + /* + * Encode the final x2 value in little-endian. + */ + br_enc64le(G, x2[0]); + br_enc64le(G + 8, x2[1]); + br_enc64le(G + 16, x2[2]); + br_enc64le(G + 24, x2[3]); + return 1; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We don't implement this method, since it is used for ECDSA + * only, and there is no ECDSA over Curve25519 (which instead + * uses EdDSA). + */ + (void)A; + (void)B; + (void)len; + (void)x; + (void)xlen; + (void)y; + (void)ylen; + (void)curve; + return 0; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_c25519_m64 = { + (uint32_t)0x20000000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_c25519_m64_get(void) +{ + return &br_ec_c25519_m64; +} + +#else + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_c25519_m64_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/ec_curve25519.c b/third_party/bearssl/src/ec_curve25519.c new file mode 100644 index 0000000..a47d215 --- /dev/null +++ b/third_party/bearssl/src/ec_curve25519.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char GEN[] = { + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const unsigned char ORDER[] = { + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/* see inner.h */ +const br_ec_curve_def br_curve25519 = { + BR_EC_curve25519, + ORDER, sizeof ORDER, + GEN, sizeof GEN +}; diff --git a/third_party/bearssl/src/ec_default.c b/third_party/bearssl/src/ec_default.c new file mode 100644 index 0000000..7bb6e0c --- /dev/null +++ b/third_party/bearssl/src/ec_default.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_get_default(void) +{ +#if BR_LOMUL + return &br_ec_all_m15; +#else + return &br_ec_all_m31; +#endif +} diff --git a/third_party/bearssl/src/ec_keygen.c b/third_party/bearssl/src/ec_keygen.c new file mode 100644 index 0000000..02a3096 --- /dev/null +++ b/third_party/bearssl/src/ec_keygen.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +size_t +br_ec_keygen(const br_prng_class **rng_ctx, + const br_ec_impl *impl, br_ec_private_key *sk, + void *kbuf, int curve) +{ + const unsigned char *order; + unsigned char *buf; + size_t len; + unsigned mask; + + if (curve < 0 || curve >= 32 + || ((impl->supported_curves >> curve) & 1) == 0) + { + return 0; + } + order = impl->order(curve, &len); + while (len > 0 && *order == 0) { + order ++; + len --; + } + if (kbuf == NULL || len == 0) { + return len; + } + mask = order[0]; + mask |= (mask >> 1); + mask |= (mask >> 2); + mask |= (mask >> 4); + + /* + * We generate sequences of random bits of the right size, until + * the value is strictly lower than the curve order (we also + * check for all-zero values, which are invalid). + */ + buf = kbuf; + for (;;) { + size_t u; + unsigned cc, zz; + + (*rng_ctx)->generate(rng_ctx, buf, len); + buf[0] &= mask; + cc = 0; + u = len; + zz = 0; + while (u -- > 0) { + cc = ((unsigned)(buf[u] - order[u] - cc) >> 8) & 1; + zz |= buf[u]; + } + if (cc != 0 && zz != 0) { + break; + } + } + + if (sk != NULL) { + sk->curve = curve; + sk->x = buf; + sk->xlen = len; + } + return len; +} diff --git a/third_party/bearssl/src/ec_p256_m15.c b/third_party/bearssl/src/ec_p256_m15.c new file mode 100644 index 0000000..05800d8 --- /dev/null +++ b/third_party/bearssl/src/ec_p256_m15.c @@ -0,0 +1,2124 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_ + * that right-shifting a signed negative integer copies the sign bit + * (arithmetic right-shift). This is "implementation-defined behaviour", + * i.e. it is not undefined, but it may differ between compilers. Each + * compiler is supposed to document its behaviour in that respect. GCC + * explicitly defines that an arithmetic right shift is used. We expect + * all other compilers to do the same, because underlying CPU offer an + * arithmetic right shift opcode that could not be used otherwise. + */ +#if BR_NO_ARITH_SHIFT +#define ARSH(x, n) (((uint32_t)(x) >> (n)) \ + | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) +#else +#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) +#endif + +/* + * Convert an integer from unsigned big-endian encoding to a sequence of + * 13-bit words in little-endian order. The final "partial" word is + * returned. + */ +static uint32_t +be8_to_le13(uint32_t *dst, const unsigned char *src, size_t len) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + acc |= (uint32_t)src[len] << acc_len; + acc_len += 8; + if (acc_len >= 13) { + *dst ++ = acc & 0x1FFF; + acc >>= 13; + acc_len -= 13; + } + } + return acc; +} + +/* + * Convert an integer (13-bit words, little-endian) to unsigned + * big-endian encoding. The total encoding length is provided; all + * the destination bytes will be filled. + */ +static void +le13_to_be8(unsigned char *dst, size_t len, const uint32_t *src) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + if (acc_len < 8) { + acc |= (*src ++) << acc_len; + acc_len += 13; + } + dst[len] = (unsigned char)acc; + acc >>= 8; + acc_len -= 8; + } +} + +/* + * Normalise an array of words to a strict 13 bits per word. Returned + * value is the resulting carry. The source (w) and destination (d) + * arrays may be identical, but shall not overlap partially. + */ +static inline uint32_t +norm13(uint32_t *d, const uint32_t *w, size_t len) +{ + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < len; u ++) { + int32_t z; + + z = w[u] + cc; + d[u] = z & 0x1FFF; + cc = ARSH(z, 13); + } + return cc; +} + +/* + * mul20() multiplies two 260-bit integers together. Each word must fit + * on 13 bits; source operands use 20 words, destination operand + * receives 40 words. All overlaps allowed. + * + * square20() computes the square of a 260-bit integer. Each word must + * fit on 13 bits; source operand uses 20 words, destination operand + * receives 40 words. All overlaps allowed. + */ + +#if BR_SLOW_MUL15 + +static void +mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * Two-level Karatsuba: turns a 20x20 multiplication into + * nine 5x5 multiplications. We use 13-bit words but do not + * propagate carries immediately, so words may expand: + * + * - First Karatsuba decomposition turns the 20x20 mul on + * 13-bit words into three 10x10 muls, two on 13-bit words + * and one on 14-bit words. + * + * - Second Karatsuba decomposition further splits these into: + * + * * four 5x5 muls on 13-bit words + * * four 5x5 muls on 14-bit words + * * one 5x5 mul on 15-bit words + * + * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit + * or 15-bit words, respectively. + */ + uint32_t u[45], v[45], w[90]; + uint32_t cc; + int i; + +#define ZADD(dw, d_off, s1w, s1_off, s2w, s2_off) do { \ + (dw)[5 * (d_off) + 0] = (s1w)[5 * (s1_off) + 0] \ + + (s2w)[5 * (s2_off) + 0]; \ + (dw)[5 * (d_off) + 1] = (s1w)[5 * (s1_off) + 1] \ + + (s2w)[5 * (s2_off) + 1]; \ + (dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \ + + (s2w)[5 * (s2_off) + 2]; \ + (dw)[5 * (d_off) + 3] = (s1w)[5 * (s1_off) + 3] \ + + (s2w)[5 * (s2_off) + 3]; \ + (dw)[5 * (d_off) + 4] = (s1w)[5 * (s1_off) + 4] \ + + (s2w)[5 * (s2_off) + 4]; \ + } while (0) + +#define ZADDT(dw, d_off, sw, s_off) do { \ + (dw)[5 * (d_off) + 0] += (sw)[5 * (s_off) + 0]; \ + (dw)[5 * (d_off) + 1] += (sw)[5 * (s_off) + 1]; \ + (dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \ + (dw)[5 * (d_off) + 3] += (sw)[5 * (s_off) + 3]; \ + (dw)[5 * (d_off) + 4] += (sw)[5 * (s_off) + 4]; \ + } while (0) + +#define ZSUB2F(dw, d_off, s1w, s1_off, s2w, s2_off) do { \ + (dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \ + + (s2w)[5 * (s2_off) + 0]; \ + (dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \ + + (s2w)[5 * (s2_off) + 1]; \ + (dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \ + + (s2w)[5 * (s2_off) + 2]; \ + (dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \ + + (s2w)[5 * (s2_off) + 3]; \ + (dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \ + + (s2w)[5 * (s2_off) + 4]; \ + } while (0) + +#define CPR1(w, cprcc) do { \ + uint32_t cprz = (w) + cprcc; \ + (w) = cprz & 0x1FFF; \ + cprcc = cprz >> 13; \ + } while (0) + +#define CPR(dw, d_off) do { \ + uint32_t cprcc; \ + cprcc = 0; \ + CPR1((dw)[(d_off) + 0], cprcc); \ + CPR1((dw)[(d_off) + 1], cprcc); \ + CPR1((dw)[(d_off) + 2], cprcc); \ + CPR1((dw)[(d_off) + 3], cprcc); \ + CPR1((dw)[(d_off) + 4], cprcc); \ + CPR1((dw)[(d_off) + 5], cprcc); \ + CPR1((dw)[(d_off) + 6], cprcc); \ + CPR1((dw)[(d_off) + 7], cprcc); \ + CPR1((dw)[(d_off) + 8], cprcc); \ + (dw)[(d_off) + 9] = cprcc; \ + } while (0) + + memcpy(u, a, 20 * sizeof *a); + ZADD(u, 4, a, 0, a, 1); + ZADD(u, 5, a, 2, a, 3); + ZADD(u, 6, a, 0, a, 2); + ZADD(u, 7, a, 1, a, 3); + ZADD(u, 8, u, 6, u, 7); + + memcpy(v, b, 20 * sizeof *b); + ZADD(v, 4, b, 0, b, 1); + ZADD(v, 5, b, 2, b, 3); + ZADD(v, 6, b, 0, b, 2); + ZADD(v, 7, b, 1, b, 3); + ZADD(v, 8, v, 6, v, 7); + + /* + * Do the eight first 8x8 muls. Source words are at most 16382 + * each, so we can add product results together "as is" in 32-bit + * words. + */ + for (i = 0; i < 40; i += 5) { + w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]); + w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1]) + + MUL15(u[i + 1], v[i + 0]); + w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2]) + + MUL15(u[i + 1], v[i + 1]) + + MUL15(u[i + 2], v[i + 0]); + w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3]) + + MUL15(u[i + 1], v[i + 2]) + + MUL15(u[i + 2], v[i + 1]) + + MUL15(u[i + 3], v[i + 0]); + w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4]) + + MUL15(u[i + 1], v[i + 3]) + + MUL15(u[i + 2], v[i + 2]) + + MUL15(u[i + 3], v[i + 1]) + + MUL15(u[i + 4], v[i + 0]); + w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4]) + + MUL15(u[i + 2], v[i + 3]) + + MUL15(u[i + 3], v[i + 2]) + + MUL15(u[i + 4], v[i + 1]); + w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4]) + + MUL15(u[i + 3], v[i + 3]) + + MUL15(u[i + 4], v[i + 2]); + w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4]) + + MUL15(u[i + 4], v[i + 3]); + w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]); + w[(i << 1) + 9] = 0; + } + + /* + * For the 9th multiplication, source words are up to 32764, + * so we must do some carry propagation. If we add up to + * 4 products and the carry is no more than 524224, then the + * result fits in 32 bits, and the next carry will be no more + * than 524224 (because 4*(32764^2)+524224 < 8192*524225). + * + * We thus just skip one of the products in the middle word, + * then do a carry propagation (this reduces words to 13 bits + * each, except possibly the last, which may use up to 17 bits + * or so), then add the missing product. + */ + w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]); + w[80 + 1] = MUL15(u[40 + 0], v[40 + 1]) + + MUL15(u[40 + 1], v[40 + 0]); + w[80 + 2] = MUL15(u[40 + 0], v[40 + 2]) + + MUL15(u[40 + 1], v[40 + 1]) + + MUL15(u[40 + 2], v[40 + 0]); + w[80 + 3] = MUL15(u[40 + 0], v[40 + 3]) + + MUL15(u[40 + 1], v[40 + 2]) + + MUL15(u[40 + 2], v[40 + 1]) + + MUL15(u[40 + 3], v[40 + 0]); + w[80 + 4] = MUL15(u[40 + 0], v[40 + 4]) + + MUL15(u[40 + 1], v[40 + 3]) + + MUL15(u[40 + 2], v[40 + 2]) + + MUL15(u[40 + 3], v[40 + 1]); + /* + MUL15(u[40 + 4], v[40 + 0]) */ + w[80 + 5] = MUL15(u[40 + 1], v[40 + 4]) + + MUL15(u[40 + 2], v[40 + 3]) + + MUL15(u[40 + 3], v[40 + 2]) + + MUL15(u[40 + 4], v[40 + 1]); + w[80 + 6] = MUL15(u[40 + 2], v[40 + 4]) + + MUL15(u[40 + 3], v[40 + 3]) + + MUL15(u[40 + 4], v[40 + 2]); + w[80 + 7] = MUL15(u[40 + 3], v[40 + 4]) + + MUL15(u[40 + 4], v[40 + 3]); + w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]); + + CPR(w, 80); + + w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]); + + /* + * The products on 14-bit words in slots 6 and 7 yield values + * up to 5*(16382^2) each, and we need to subtract two such + * values from the higher word. We need the subtraction to fit + * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit. + * However, 10*(16382^2) does not fit. So we must perform a + * bit of reduction here. + */ + CPR(w, 60); + CPR(w, 70); + + /* + * Recompose results. + */ + + /* 0..1*0..1 into 0..3 */ + ZSUB2F(w, 8, w, 0, w, 2); + ZSUB2F(w, 9, w, 1, w, 3); + ZADDT(w, 1, w, 8); + ZADDT(w, 2, w, 9); + + /* 2..3*2..3 into 4..7 */ + ZSUB2F(w, 10, w, 4, w, 6); + ZSUB2F(w, 11, w, 5, w, 7); + ZADDT(w, 5, w, 10); + ZADDT(w, 6, w, 11); + + /* (0..1+2..3)*(0..1+2..3) into 12..15 */ + ZSUB2F(w, 16, w, 12, w, 14); + ZSUB2F(w, 17, w, 13, w, 15); + ZADDT(w, 13, w, 16); + ZADDT(w, 14, w, 17); + + /* first-level recomposition */ + ZSUB2F(w, 12, w, 0, w, 4); + ZSUB2F(w, 13, w, 1, w, 5); + ZSUB2F(w, 14, w, 2, w, 6); + ZSUB2F(w, 15, w, 3, w, 7); + ZADDT(w, 2, w, 12); + ZADDT(w, 3, w, 13); + ZADDT(w, 4, w, 14); + ZADDT(w, 5, w, 15); + + /* + * Perform carry propagation to bring all words down to 13 bits. + */ + cc = norm13(d, w, 40); + d[39] += (cc << 13); + +#undef ZADD +#undef ZADDT +#undef ZSUB2F +#undef CPR1 +#undef CPR +} + +static inline void +square20(uint32_t *d, const uint32_t *a) +{ + mul20(d, a, a); +} + +#else + +static void +mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[39]; + + t[ 0] = MUL15(a[ 0], b[ 0]); + t[ 1] = MUL15(a[ 0], b[ 1]) + + MUL15(a[ 1], b[ 0]); + t[ 2] = MUL15(a[ 0], b[ 2]) + + MUL15(a[ 1], b[ 1]) + + MUL15(a[ 2], b[ 0]); + t[ 3] = MUL15(a[ 0], b[ 3]) + + MUL15(a[ 1], b[ 2]) + + MUL15(a[ 2], b[ 1]) + + MUL15(a[ 3], b[ 0]); + t[ 4] = MUL15(a[ 0], b[ 4]) + + MUL15(a[ 1], b[ 3]) + + MUL15(a[ 2], b[ 2]) + + MUL15(a[ 3], b[ 1]) + + MUL15(a[ 4], b[ 0]); + t[ 5] = MUL15(a[ 0], b[ 5]) + + MUL15(a[ 1], b[ 4]) + + MUL15(a[ 2], b[ 3]) + + MUL15(a[ 3], b[ 2]) + + MUL15(a[ 4], b[ 1]) + + MUL15(a[ 5], b[ 0]); + t[ 6] = MUL15(a[ 0], b[ 6]) + + MUL15(a[ 1], b[ 5]) + + MUL15(a[ 2], b[ 4]) + + MUL15(a[ 3], b[ 3]) + + MUL15(a[ 4], b[ 2]) + + MUL15(a[ 5], b[ 1]) + + MUL15(a[ 6], b[ 0]); + t[ 7] = MUL15(a[ 0], b[ 7]) + + MUL15(a[ 1], b[ 6]) + + MUL15(a[ 2], b[ 5]) + + MUL15(a[ 3], b[ 4]) + + MUL15(a[ 4], b[ 3]) + + MUL15(a[ 5], b[ 2]) + + MUL15(a[ 6], b[ 1]) + + MUL15(a[ 7], b[ 0]); + t[ 8] = MUL15(a[ 0], b[ 8]) + + MUL15(a[ 1], b[ 7]) + + MUL15(a[ 2], b[ 6]) + + MUL15(a[ 3], b[ 5]) + + MUL15(a[ 4], b[ 4]) + + MUL15(a[ 5], b[ 3]) + + MUL15(a[ 6], b[ 2]) + + MUL15(a[ 7], b[ 1]) + + MUL15(a[ 8], b[ 0]); + t[ 9] = MUL15(a[ 0], b[ 9]) + + MUL15(a[ 1], b[ 8]) + + MUL15(a[ 2], b[ 7]) + + MUL15(a[ 3], b[ 6]) + + MUL15(a[ 4], b[ 5]) + + MUL15(a[ 5], b[ 4]) + + MUL15(a[ 6], b[ 3]) + + MUL15(a[ 7], b[ 2]) + + MUL15(a[ 8], b[ 1]) + + MUL15(a[ 9], b[ 0]); + t[10] = MUL15(a[ 0], b[10]) + + MUL15(a[ 1], b[ 9]) + + MUL15(a[ 2], b[ 8]) + + MUL15(a[ 3], b[ 7]) + + MUL15(a[ 4], b[ 6]) + + MUL15(a[ 5], b[ 5]) + + MUL15(a[ 6], b[ 4]) + + MUL15(a[ 7], b[ 3]) + + MUL15(a[ 8], b[ 2]) + + MUL15(a[ 9], b[ 1]) + + MUL15(a[10], b[ 0]); + t[11] = MUL15(a[ 0], b[11]) + + MUL15(a[ 1], b[10]) + + MUL15(a[ 2], b[ 9]) + + MUL15(a[ 3], b[ 8]) + + MUL15(a[ 4], b[ 7]) + + MUL15(a[ 5], b[ 6]) + + MUL15(a[ 6], b[ 5]) + + MUL15(a[ 7], b[ 4]) + + MUL15(a[ 8], b[ 3]) + + MUL15(a[ 9], b[ 2]) + + MUL15(a[10], b[ 1]) + + MUL15(a[11], b[ 0]); + t[12] = MUL15(a[ 0], b[12]) + + MUL15(a[ 1], b[11]) + + MUL15(a[ 2], b[10]) + + MUL15(a[ 3], b[ 9]) + + MUL15(a[ 4], b[ 8]) + + MUL15(a[ 5], b[ 7]) + + MUL15(a[ 6], b[ 6]) + + MUL15(a[ 7], b[ 5]) + + MUL15(a[ 8], b[ 4]) + + MUL15(a[ 9], b[ 3]) + + MUL15(a[10], b[ 2]) + + MUL15(a[11], b[ 1]) + + MUL15(a[12], b[ 0]); + t[13] = MUL15(a[ 0], b[13]) + + MUL15(a[ 1], b[12]) + + MUL15(a[ 2], b[11]) + + MUL15(a[ 3], b[10]) + + MUL15(a[ 4], b[ 9]) + + MUL15(a[ 5], b[ 8]) + + MUL15(a[ 6], b[ 7]) + + MUL15(a[ 7], b[ 6]) + + MUL15(a[ 8], b[ 5]) + + MUL15(a[ 9], b[ 4]) + + MUL15(a[10], b[ 3]) + + MUL15(a[11], b[ 2]) + + MUL15(a[12], b[ 1]) + + MUL15(a[13], b[ 0]); + t[14] = MUL15(a[ 0], b[14]) + + MUL15(a[ 1], b[13]) + + MUL15(a[ 2], b[12]) + + MUL15(a[ 3], b[11]) + + MUL15(a[ 4], b[10]) + + MUL15(a[ 5], b[ 9]) + + MUL15(a[ 6], b[ 8]) + + MUL15(a[ 7], b[ 7]) + + MUL15(a[ 8], b[ 6]) + + MUL15(a[ 9], b[ 5]) + + MUL15(a[10], b[ 4]) + + MUL15(a[11], b[ 3]) + + MUL15(a[12], b[ 2]) + + MUL15(a[13], b[ 1]) + + MUL15(a[14], b[ 0]); + t[15] = MUL15(a[ 0], b[15]) + + MUL15(a[ 1], b[14]) + + MUL15(a[ 2], b[13]) + + MUL15(a[ 3], b[12]) + + MUL15(a[ 4], b[11]) + + MUL15(a[ 5], b[10]) + + MUL15(a[ 6], b[ 9]) + + MUL15(a[ 7], b[ 8]) + + MUL15(a[ 8], b[ 7]) + + MUL15(a[ 9], b[ 6]) + + MUL15(a[10], b[ 5]) + + MUL15(a[11], b[ 4]) + + MUL15(a[12], b[ 3]) + + MUL15(a[13], b[ 2]) + + MUL15(a[14], b[ 1]) + + MUL15(a[15], b[ 0]); + t[16] = MUL15(a[ 0], b[16]) + + MUL15(a[ 1], b[15]) + + MUL15(a[ 2], b[14]) + + MUL15(a[ 3], b[13]) + + MUL15(a[ 4], b[12]) + + MUL15(a[ 5], b[11]) + + MUL15(a[ 6], b[10]) + + MUL15(a[ 7], b[ 9]) + + MUL15(a[ 8], b[ 8]) + + MUL15(a[ 9], b[ 7]) + + MUL15(a[10], b[ 6]) + + MUL15(a[11], b[ 5]) + + MUL15(a[12], b[ 4]) + + MUL15(a[13], b[ 3]) + + MUL15(a[14], b[ 2]) + + MUL15(a[15], b[ 1]) + + MUL15(a[16], b[ 0]); + t[17] = MUL15(a[ 0], b[17]) + + MUL15(a[ 1], b[16]) + + MUL15(a[ 2], b[15]) + + MUL15(a[ 3], b[14]) + + MUL15(a[ 4], b[13]) + + MUL15(a[ 5], b[12]) + + MUL15(a[ 6], b[11]) + + MUL15(a[ 7], b[10]) + + MUL15(a[ 8], b[ 9]) + + MUL15(a[ 9], b[ 8]) + + MUL15(a[10], b[ 7]) + + MUL15(a[11], b[ 6]) + + MUL15(a[12], b[ 5]) + + MUL15(a[13], b[ 4]) + + MUL15(a[14], b[ 3]) + + MUL15(a[15], b[ 2]) + + MUL15(a[16], b[ 1]) + + MUL15(a[17], b[ 0]); + t[18] = MUL15(a[ 0], b[18]) + + MUL15(a[ 1], b[17]) + + MUL15(a[ 2], b[16]) + + MUL15(a[ 3], b[15]) + + MUL15(a[ 4], b[14]) + + MUL15(a[ 5], b[13]) + + MUL15(a[ 6], b[12]) + + MUL15(a[ 7], b[11]) + + MUL15(a[ 8], b[10]) + + MUL15(a[ 9], b[ 9]) + + MUL15(a[10], b[ 8]) + + MUL15(a[11], b[ 7]) + + MUL15(a[12], b[ 6]) + + MUL15(a[13], b[ 5]) + + MUL15(a[14], b[ 4]) + + MUL15(a[15], b[ 3]) + + MUL15(a[16], b[ 2]) + + MUL15(a[17], b[ 1]) + + MUL15(a[18], b[ 0]); + t[19] = MUL15(a[ 0], b[19]) + + MUL15(a[ 1], b[18]) + + MUL15(a[ 2], b[17]) + + MUL15(a[ 3], b[16]) + + MUL15(a[ 4], b[15]) + + MUL15(a[ 5], b[14]) + + MUL15(a[ 6], b[13]) + + MUL15(a[ 7], b[12]) + + MUL15(a[ 8], b[11]) + + MUL15(a[ 9], b[10]) + + MUL15(a[10], b[ 9]) + + MUL15(a[11], b[ 8]) + + MUL15(a[12], b[ 7]) + + MUL15(a[13], b[ 6]) + + MUL15(a[14], b[ 5]) + + MUL15(a[15], b[ 4]) + + MUL15(a[16], b[ 3]) + + MUL15(a[17], b[ 2]) + + MUL15(a[18], b[ 1]) + + MUL15(a[19], b[ 0]); + t[20] = MUL15(a[ 1], b[19]) + + MUL15(a[ 2], b[18]) + + MUL15(a[ 3], b[17]) + + MUL15(a[ 4], b[16]) + + MUL15(a[ 5], b[15]) + + MUL15(a[ 6], b[14]) + + MUL15(a[ 7], b[13]) + + MUL15(a[ 8], b[12]) + + MUL15(a[ 9], b[11]) + + MUL15(a[10], b[10]) + + MUL15(a[11], b[ 9]) + + MUL15(a[12], b[ 8]) + + MUL15(a[13], b[ 7]) + + MUL15(a[14], b[ 6]) + + MUL15(a[15], b[ 5]) + + MUL15(a[16], b[ 4]) + + MUL15(a[17], b[ 3]) + + MUL15(a[18], b[ 2]) + + MUL15(a[19], b[ 1]); + t[21] = MUL15(a[ 2], b[19]) + + MUL15(a[ 3], b[18]) + + MUL15(a[ 4], b[17]) + + MUL15(a[ 5], b[16]) + + MUL15(a[ 6], b[15]) + + MUL15(a[ 7], b[14]) + + MUL15(a[ 8], b[13]) + + MUL15(a[ 9], b[12]) + + MUL15(a[10], b[11]) + + MUL15(a[11], b[10]) + + MUL15(a[12], b[ 9]) + + MUL15(a[13], b[ 8]) + + MUL15(a[14], b[ 7]) + + MUL15(a[15], b[ 6]) + + MUL15(a[16], b[ 5]) + + MUL15(a[17], b[ 4]) + + MUL15(a[18], b[ 3]) + + MUL15(a[19], b[ 2]); + t[22] = MUL15(a[ 3], b[19]) + + MUL15(a[ 4], b[18]) + + MUL15(a[ 5], b[17]) + + MUL15(a[ 6], b[16]) + + MUL15(a[ 7], b[15]) + + MUL15(a[ 8], b[14]) + + MUL15(a[ 9], b[13]) + + MUL15(a[10], b[12]) + + MUL15(a[11], b[11]) + + MUL15(a[12], b[10]) + + MUL15(a[13], b[ 9]) + + MUL15(a[14], b[ 8]) + + MUL15(a[15], b[ 7]) + + MUL15(a[16], b[ 6]) + + MUL15(a[17], b[ 5]) + + MUL15(a[18], b[ 4]) + + MUL15(a[19], b[ 3]); + t[23] = MUL15(a[ 4], b[19]) + + MUL15(a[ 5], b[18]) + + MUL15(a[ 6], b[17]) + + MUL15(a[ 7], b[16]) + + MUL15(a[ 8], b[15]) + + MUL15(a[ 9], b[14]) + + MUL15(a[10], b[13]) + + MUL15(a[11], b[12]) + + MUL15(a[12], b[11]) + + MUL15(a[13], b[10]) + + MUL15(a[14], b[ 9]) + + MUL15(a[15], b[ 8]) + + MUL15(a[16], b[ 7]) + + MUL15(a[17], b[ 6]) + + MUL15(a[18], b[ 5]) + + MUL15(a[19], b[ 4]); + t[24] = MUL15(a[ 5], b[19]) + + MUL15(a[ 6], b[18]) + + MUL15(a[ 7], b[17]) + + MUL15(a[ 8], b[16]) + + MUL15(a[ 9], b[15]) + + MUL15(a[10], b[14]) + + MUL15(a[11], b[13]) + + MUL15(a[12], b[12]) + + MUL15(a[13], b[11]) + + MUL15(a[14], b[10]) + + MUL15(a[15], b[ 9]) + + MUL15(a[16], b[ 8]) + + MUL15(a[17], b[ 7]) + + MUL15(a[18], b[ 6]) + + MUL15(a[19], b[ 5]); + t[25] = MUL15(a[ 6], b[19]) + + MUL15(a[ 7], b[18]) + + MUL15(a[ 8], b[17]) + + MUL15(a[ 9], b[16]) + + MUL15(a[10], b[15]) + + MUL15(a[11], b[14]) + + MUL15(a[12], b[13]) + + MUL15(a[13], b[12]) + + MUL15(a[14], b[11]) + + MUL15(a[15], b[10]) + + MUL15(a[16], b[ 9]) + + MUL15(a[17], b[ 8]) + + MUL15(a[18], b[ 7]) + + MUL15(a[19], b[ 6]); + t[26] = MUL15(a[ 7], b[19]) + + MUL15(a[ 8], b[18]) + + MUL15(a[ 9], b[17]) + + MUL15(a[10], b[16]) + + MUL15(a[11], b[15]) + + MUL15(a[12], b[14]) + + MUL15(a[13], b[13]) + + MUL15(a[14], b[12]) + + MUL15(a[15], b[11]) + + MUL15(a[16], b[10]) + + MUL15(a[17], b[ 9]) + + MUL15(a[18], b[ 8]) + + MUL15(a[19], b[ 7]); + t[27] = MUL15(a[ 8], b[19]) + + MUL15(a[ 9], b[18]) + + MUL15(a[10], b[17]) + + MUL15(a[11], b[16]) + + MUL15(a[12], b[15]) + + MUL15(a[13], b[14]) + + MUL15(a[14], b[13]) + + MUL15(a[15], b[12]) + + MUL15(a[16], b[11]) + + MUL15(a[17], b[10]) + + MUL15(a[18], b[ 9]) + + MUL15(a[19], b[ 8]); + t[28] = MUL15(a[ 9], b[19]) + + MUL15(a[10], b[18]) + + MUL15(a[11], b[17]) + + MUL15(a[12], b[16]) + + MUL15(a[13], b[15]) + + MUL15(a[14], b[14]) + + MUL15(a[15], b[13]) + + MUL15(a[16], b[12]) + + MUL15(a[17], b[11]) + + MUL15(a[18], b[10]) + + MUL15(a[19], b[ 9]); + t[29] = MUL15(a[10], b[19]) + + MUL15(a[11], b[18]) + + MUL15(a[12], b[17]) + + MUL15(a[13], b[16]) + + MUL15(a[14], b[15]) + + MUL15(a[15], b[14]) + + MUL15(a[16], b[13]) + + MUL15(a[17], b[12]) + + MUL15(a[18], b[11]) + + MUL15(a[19], b[10]); + t[30] = MUL15(a[11], b[19]) + + MUL15(a[12], b[18]) + + MUL15(a[13], b[17]) + + MUL15(a[14], b[16]) + + MUL15(a[15], b[15]) + + MUL15(a[16], b[14]) + + MUL15(a[17], b[13]) + + MUL15(a[18], b[12]) + + MUL15(a[19], b[11]); + t[31] = MUL15(a[12], b[19]) + + MUL15(a[13], b[18]) + + MUL15(a[14], b[17]) + + MUL15(a[15], b[16]) + + MUL15(a[16], b[15]) + + MUL15(a[17], b[14]) + + MUL15(a[18], b[13]) + + MUL15(a[19], b[12]); + t[32] = MUL15(a[13], b[19]) + + MUL15(a[14], b[18]) + + MUL15(a[15], b[17]) + + MUL15(a[16], b[16]) + + MUL15(a[17], b[15]) + + MUL15(a[18], b[14]) + + MUL15(a[19], b[13]); + t[33] = MUL15(a[14], b[19]) + + MUL15(a[15], b[18]) + + MUL15(a[16], b[17]) + + MUL15(a[17], b[16]) + + MUL15(a[18], b[15]) + + MUL15(a[19], b[14]); + t[34] = MUL15(a[15], b[19]) + + MUL15(a[16], b[18]) + + MUL15(a[17], b[17]) + + MUL15(a[18], b[16]) + + MUL15(a[19], b[15]); + t[35] = MUL15(a[16], b[19]) + + MUL15(a[17], b[18]) + + MUL15(a[18], b[17]) + + MUL15(a[19], b[16]); + t[36] = MUL15(a[17], b[19]) + + MUL15(a[18], b[18]) + + MUL15(a[19], b[17]); + t[37] = MUL15(a[18], b[19]) + + MUL15(a[19], b[18]); + t[38] = MUL15(a[19], b[19]); + d[39] = norm13(d, t, 39); +} + +static void +square20(uint32_t *d, const uint32_t *a) +{ + uint32_t t[39]; + + t[ 0] = MUL15(a[ 0], a[ 0]); + t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1); + t[ 2] = MUL15(a[ 1], a[ 1]) + + ((MUL15(a[ 0], a[ 2])) << 1); + t[ 3] = ((MUL15(a[ 0], a[ 3]) + + MUL15(a[ 1], a[ 2])) << 1); + t[ 4] = MUL15(a[ 2], a[ 2]) + + ((MUL15(a[ 0], a[ 4]) + + MUL15(a[ 1], a[ 3])) << 1); + t[ 5] = ((MUL15(a[ 0], a[ 5]) + + MUL15(a[ 1], a[ 4]) + + MUL15(a[ 2], a[ 3])) << 1); + t[ 6] = MUL15(a[ 3], a[ 3]) + + ((MUL15(a[ 0], a[ 6]) + + MUL15(a[ 1], a[ 5]) + + MUL15(a[ 2], a[ 4])) << 1); + t[ 7] = ((MUL15(a[ 0], a[ 7]) + + MUL15(a[ 1], a[ 6]) + + MUL15(a[ 2], a[ 5]) + + MUL15(a[ 3], a[ 4])) << 1); + t[ 8] = MUL15(a[ 4], a[ 4]) + + ((MUL15(a[ 0], a[ 8]) + + MUL15(a[ 1], a[ 7]) + + MUL15(a[ 2], a[ 6]) + + MUL15(a[ 3], a[ 5])) << 1); + t[ 9] = ((MUL15(a[ 0], a[ 9]) + + MUL15(a[ 1], a[ 8]) + + MUL15(a[ 2], a[ 7]) + + MUL15(a[ 3], a[ 6]) + + MUL15(a[ 4], a[ 5])) << 1); + t[10] = MUL15(a[ 5], a[ 5]) + + ((MUL15(a[ 0], a[10]) + + MUL15(a[ 1], a[ 9]) + + MUL15(a[ 2], a[ 8]) + + MUL15(a[ 3], a[ 7]) + + MUL15(a[ 4], a[ 6])) << 1); + t[11] = ((MUL15(a[ 0], a[11]) + + MUL15(a[ 1], a[10]) + + MUL15(a[ 2], a[ 9]) + + MUL15(a[ 3], a[ 8]) + + MUL15(a[ 4], a[ 7]) + + MUL15(a[ 5], a[ 6])) << 1); + t[12] = MUL15(a[ 6], a[ 6]) + + ((MUL15(a[ 0], a[12]) + + MUL15(a[ 1], a[11]) + + MUL15(a[ 2], a[10]) + + MUL15(a[ 3], a[ 9]) + + MUL15(a[ 4], a[ 8]) + + MUL15(a[ 5], a[ 7])) << 1); + t[13] = ((MUL15(a[ 0], a[13]) + + MUL15(a[ 1], a[12]) + + MUL15(a[ 2], a[11]) + + MUL15(a[ 3], a[10]) + + MUL15(a[ 4], a[ 9]) + + MUL15(a[ 5], a[ 8]) + + MUL15(a[ 6], a[ 7])) << 1); + t[14] = MUL15(a[ 7], a[ 7]) + + ((MUL15(a[ 0], a[14]) + + MUL15(a[ 1], a[13]) + + MUL15(a[ 2], a[12]) + + MUL15(a[ 3], a[11]) + + MUL15(a[ 4], a[10]) + + MUL15(a[ 5], a[ 9]) + + MUL15(a[ 6], a[ 8])) << 1); + t[15] = ((MUL15(a[ 0], a[15]) + + MUL15(a[ 1], a[14]) + + MUL15(a[ 2], a[13]) + + MUL15(a[ 3], a[12]) + + MUL15(a[ 4], a[11]) + + MUL15(a[ 5], a[10]) + + MUL15(a[ 6], a[ 9]) + + MUL15(a[ 7], a[ 8])) << 1); + t[16] = MUL15(a[ 8], a[ 8]) + + ((MUL15(a[ 0], a[16]) + + MUL15(a[ 1], a[15]) + + MUL15(a[ 2], a[14]) + + MUL15(a[ 3], a[13]) + + MUL15(a[ 4], a[12]) + + MUL15(a[ 5], a[11]) + + MUL15(a[ 6], a[10]) + + MUL15(a[ 7], a[ 9])) << 1); + t[17] = ((MUL15(a[ 0], a[17]) + + MUL15(a[ 1], a[16]) + + MUL15(a[ 2], a[15]) + + MUL15(a[ 3], a[14]) + + MUL15(a[ 4], a[13]) + + MUL15(a[ 5], a[12]) + + MUL15(a[ 6], a[11]) + + MUL15(a[ 7], a[10]) + + MUL15(a[ 8], a[ 9])) << 1); + t[18] = MUL15(a[ 9], a[ 9]) + + ((MUL15(a[ 0], a[18]) + + MUL15(a[ 1], a[17]) + + MUL15(a[ 2], a[16]) + + MUL15(a[ 3], a[15]) + + MUL15(a[ 4], a[14]) + + MUL15(a[ 5], a[13]) + + MUL15(a[ 6], a[12]) + + MUL15(a[ 7], a[11]) + + MUL15(a[ 8], a[10])) << 1); + t[19] = ((MUL15(a[ 0], a[19]) + + MUL15(a[ 1], a[18]) + + MUL15(a[ 2], a[17]) + + MUL15(a[ 3], a[16]) + + MUL15(a[ 4], a[15]) + + MUL15(a[ 5], a[14]) + + MUL15(a[ 6], a[13]) + + MUL15(a[ 7], a[12]) + + MUL15(a[ 8], a[11]) + + MUL15(a[ 9], a[10])) << 1); + t[20] = MUL15(a[10], a[10]) + + ((MUL15(a[ 1], a[19]) + + MUL15(a[ 2], a[18]) + + MUL15(a[ 3], a[17]) + + MUL15(a[ 4], a[16]) + + MUL15(a[ 5], a[15]) + + MUL15(a[ 6], a[14]) + + MUL15(a[ 7], a[13]) + + MUL15(a[ 8], a[12]) + + MUL15(a[ 9], a[11])) << 1); + t[21] = ((MUL15(a[ 2], a[19]) + + MUL15(a[ 3], a[18]) + + MUL15(a[ 4], a[17]) + + MUL15(a[ 5], a[16]) + + MUL15(a[ 6], a[15]) + + MUL15(a[ 7], a[14]) + + MUL15(a[ 8], a[13]) + + MUL15(a[ 9], a[12]) + + MUL15(a[10], a[11])) << 1); + t[22] = MUL15(a[11], a[11]) + + ((MUL15(a[ 3], a[19]) + + MUL15(a[ 4], a[18]) + + MUL15(a[ 5], a[17]) + + MUL15(a[ 6], a[16]) + + MUL15(a[ 7], a[15]) + + MUL15(a[ 8], a[14]) + + MUL15(a[ 9], a[13]) + + MUL15(a[10], a[12])) << 1); + t[23] = ((MUL15(a[ 4], a[19]) + + MUL15(a[ 5], a[18]) + + MUL15(a[ 6], a[17]) + + MUL15(a[ 7], a[16]) + + MUL15(a[ 8], a[15]) + + MUL15(a[ 9], a[14]) + + MUL15(a[10], a[13]) + + MUL15(a[11], a[12])) << 1); + t[24] = MUL15(a[12], a[12]) + + ((MUL15(a[ 5], a[19]) + + MUL15(a[ 6], a[18]) + + MUL15(a[ 7], a[17]) + + MUL15(a[ 8], a[16]) + + MUL15(a[ 9], a[15]) + + MUL15(a[10], a[14]) + + MUL15(a[11], a[13])) << 1); + t[25] = ((MUL15(a[ 6], a[19]) + + MUL15(a[ 7], a[18]) + + MUL15(a[ 8], a[17]) + + MUL15(a[ 9], a[16]) + + MUL15(a[10], a[15]) + + MUL15(a[11], a[14]) + + MUL15(a[12], a[13])) << 1); + t[26] = MUL15(a[13], a[13]) + + ((MUL15(a[ 7], a[19]) + + MUL15(a[ 8], a[18]) + + MUL15(a[ 9], a[17]) + + MUL15(a[10], a[16]) + + MUL15(a[11], a[15]) + + MUL15(a[12], a[14])) << 1); + t[27] = ((MUL15(a[ 8], a[19]) + + MUL15(a[ 9], a[18]) + + MUL15(a[10], a[17]) + + MUL15(a[11], a[16]) + + MUL15(a[12], a[15]) + + MUL15(a[13], a[14])) << 1); + t[28] = MUL15(a[14], a[14]) + + ((MUL15(a[ 9], a[19]) + + MUL15(a[10], a[18]) + + MUL15(a[11], a[17]) + + MUL15(a[12], a[16]) + + MUL15(a[13], a[15])) << 1); + t[29] = ((MUL15(a[10], a[19]) + + MUL15(a[11], a[18]) + + MUL15(a[12], a[17]) + + MUL15(a[13], a[16]) + + MUL15(a[14], a[15])) << 1); + t[30] = MUL15(a[15], a[15]) + + ((MUL15(a[11], a[19]) + + MUL15(a[12], a[18]) + + MUL15(a[13], a[17]) + + MUL15(a[14], a[16])) << 1); + t[31] = ((MUL15(a[12], a[19]) + + MUL15(a[13], a[18]) + + MUL15(a[14], a[17]) + + MUL15(a[15], a[16])) << 1); + t[32] = MUL15(a[16], a[16]) + + ((MUL15(a[13], a[19]) + + MUL15(a[14], a[18]) + + MUL15(a[15], a[17])) << 1); + t[33] = ((MUL15(a[14], a[19]) + + MUL15(a[15], a[18]) + + MUL15(a[16], a[17])) << 1); + t[34] = MUL15(a[17], a[17]) + + ((MUL15(a[15], a[19]) + + MUL15(a[16], a[18])) << 1); + t[35] = ((MUL15(a[16], a[19]) + + MUL15(a[17], a[18])) << 1); + t[36] = MUL15(a[18], a[18]) + + ((MUL15(a[17], a[19])) << 1); + t[37] = ((MUL15(a[18], a[19])) << 1); + t[38] = MUL15(a[19], a[19]); + d[39] = norm13(d, t, 39); +} + +#endif + +/* + * Modulus for field F256 (field for point coordinates in curve P-256). + */ +static const uint32_t F256[] = { + 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x001F, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0400, 0x0000, + 0x0000, 0x1FF8, 0x1FFF, 0x01FF +}; + +/* + * The 'b' curve equation coefficient for P-256. + */ +static const uint32_t P256_B[] = { + 0x004B, 0x1E93, 0x0F89, 0x1C78, 0x03BC, 0x187B, 0x114E, 0x1619, + 0x1D06, 0x0328, 0x01AF, 0x0D31, 0x1557, 0x15DE, 0x1ECF, 0x127C, + 0x0A3A, 0x0EC5, 0x118D, 0x00B5 +}; + +/* + * Perform a "short reduction" in field F256 (field for curve P-256). + * The source value should be less than 262 bits; on output, it will + * be at most 257 bits, and less than twice the modulus. + */ +static void +reduce_f256(uint32_t *d) +{ + uint32_t x; + + x = d[19] >> 9; + d[19] &= 0x01FF; + d[17] += x << 3; + d[14] -= x << 10; + d[7] -= x << 5; + d[0] += x; + norm13(d, d, 20); +} + +/* + * Perform a "final reduction" in field F256 (field for curve P-256). + * The source value must be less than twice the modulus. If the value + * is not lower than the modulus, then the modulus is subtracted and + * this function returns 1; otherwise, it leaves it untouched and it + * returns 0. + */ +static uint32_t +reduce_final_f256(uint32_t *d) +{ + uint32_t t[20]; + uint32_t cc; + int i; + + memcpy(t, d, sizeof t); + cc = 0; + for (i = 0; i < 20; i ++) { + uint32_t w; + + w = t[i] - F256[i] - cc; + cc = w >> 31; + t[i] = w & 0x1FFF; + } + cc ^= 1; + CCOPY(cc, d, t, sizeof t); + return cc; +} + +/* + * Perform a multiplication of two integers modulo + * 2^256-2^224+2^192+2^96-1 (for NIST curve P-256). Operands are arrays + * of 20 words, each containing 13 bits of data, in little-endian order. + * On input, upper word may be up to 13 bits (hence value up to 2^260-1); + * on output, value fits on 257 bits and is lower than twice the modulus. + */ +static void +mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[40], cc; + int i; + + /* + * Compute raw multiplication. All result words fit in 13 bits + * each. + */ + mul20(t, a, b); + + /* + * Modular reduction: each high word in added/subtracted where + * necessary. + * + * The modulus is: + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 + * Therefore: + * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + * + * For a word x at bit offset n (n >= 256), we have: + * x*2^n = x*2^(n-32) - x*2^(n-64) + * - x*2^(n - 160) + x*2^(n-256) mod p + * + * Thus, we can nullify the high word if we reinject it at some + * proper emplacements. + */ + for (i = 39; i >= 20; i --) { + uint32_t x; + + x = t[i]; + t[i - 2] += ARSH(x, 6); + t[i - 3] += (x << 7) & 0x1FFF; + t[i - 4] -= ARSH(x, 12); + t[i - 5] -= (x << 1) & 0x1FFF; + t[i - 12] -= ARSH(x, 4); + t[i - 13] -= (x << 9) & 0x1FFF; + t[i - 19] += ARSH(x, 9); + t[i - 20] += (x << 4) & 0x1FFF; + } + + /* + * Propagate carries. This is a signed propagation, and the + * result may be negative. The loop above may enlarge values, + * but not two much: worst case is the chain involving t[i - 3], + * in which a value may be added to itself up to 7 times. Since + * starting values are 13-bit each, all words fit on 20 bits + * (21 to account for the sign bit). + */ + cc = norm13(t, t, 20); + + /* + * Perform modular reduction again for the bits beyond 256 (the carry + * and the bits 256..259). Since the largest shift below is by 10 + * bits, and the values fit on 21 bits, values fit in 32-bit words, + * thereby allowing injecting full word values. + */ + cc = (cc << 4) | (t[19] >> 9); + t[19] &= 0x01FF; + t[17] += cc << 3; + t[14] -= cc << 10; + t[7] -= cc << 5; + t[0] += cc; + + /* + * If the carry is negative, then after carry propagation, we may + * end up with a value which is negative, and we don't want that. + * Thus, in that case, we add the modulus. Note that the subtraction + * result, when the carry is negative, is always smaller than the + * modulus, so the extra addition will not make the value exceed + * twice the modulus. + */ + cc >>= 31; + t[0] -= cc; + t[7] += cc << 5; + t[14] += cc << 10; + t[17] -= cc << 3; + t[19] += cc << 9; + + norm13(d, t, 20); +} + +/* + * Square an integer modulo 2^256-2^224+2^192+2^96-1 (for NIST curve + * P-256). Operand is an array of 20 words, each containing 13 bits of + * data, in little-endian order. On input, upper word may be up to 13 + * bits (hence value up to 2^260-1); on output, value fits on 257 bits + * and is lower than twice the modulus. + */ +static void +square_f256(uint32_t *d, const uint32_t *a) +{ + uint32_t t[40], cc; + int i; + + /* + * Compute raw square. All result words fit in 13 bits each. + */ + square20(t, a); + + /* + * Modular reduction: each high word in added/subtracted where + * necessary. + * + * The modulus is: + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 + * Therefore: + * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + * + * For a word x at bit offset n (n >= 256), we have: + * x*2^n = x*2^(n-32) - x*2^(n-64) + * - x*2^(n - 160) + x*2^(n-256) mod p + * + * Thus, we can nullify the high word if we reinject it at some + * proper emplacements. + */ + for (i = 39; i >= 20; i --) { + uint32_t x; + + x = t[i]; + t[i - 2] += ARSH(x, 6); + t[i - 3] += (x << 7) & 0x1FFF; + t[i - 4] -= ARSH(x, 12); + t[i - 5] -= (x << 1) & 0x1FFF; + t[i - 12] -= ARSH(x, 4); + t[i - 13] -= (x << 9) & 0x1FFF; + t[i - 19] += ARSH(x, 9); + t[i - 20] += (x << 4) & 0x1FFF; + } + + /* + * Propagate carries. This is a signed propagation, and the + * result may be negative. The loop above may enlarge values, + * but not two much: worst case is the chain involving t[i - 3], + * in which a value may be added to itself up to 7 times. Since + * starting values are 13-bit each, all words fit on 20 bits + * (21 to account for the sign bit). + */ + cc = norm13(t, t, 20); + + /* + * Perform modular reduction again for the bits beyond 256 (the carry + * and the bits 256..259). Since the largest shift below is by 10 + * bits, and the values fit on 21 bits, values fit in 32-bit words, + * thereby allowing injecting full word values. + */ + cc = (cc << 4) | (t[19] >> 9); + t[19] &= 0x01FF; + t[17] += cc << 3; + t[14] -= cc << 10; + t[7] -= cc << 5; + t[0] += cc; + + /* + * If the carry is negative, then after carry propagation, we may + * end up with a value which is negative, and we don't want that. + * Thus, in that case, we add the modulus. Note that the subtraction + * result, when the carry is negative, is always smaller than the + * modulus, so the extra addition will not make the value exceed + * twice the modulus. + */ + cc >>= 31; + t[0] -= cc; + t[7] += cc << 5; + t[14] += cc << 10; + t[17] -= cc << 3; + t[19] += cc << 9; + + norm13(d, t, 20); +} + +/* + * Jacobian coordinates for a point in P-256: affine coordinates (X,Y) + * are such that: + * X = x / z^2 + * Y = y / z^3 + * For the point at infinity, z = 0. + * Each point thus admits many possible representations. + * + * Coordinates are represented in arrays of 32-bit integers, each holding + * 13 bits of data. Values may also be slightly greater than the modulus, + * but they will always be lower than twice the modulus. + */ +typedef struct { + uint32_t x[20]; + uint32_t y[20]; + uint32_t z[20]; +} p256_jacobian; + +/* + * Convert a point to affine coordinates: + * - If the point is the point at infinity, then all three coordinates + * are set to 0. + * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y' + * coordinates are the 'X' and 'Y' affine coordinates. + * The coordinates are guaranteed to be lower than the modulus. + */ +static void +p256_to_affine(p256_jacobian *P) +{ + uint32_t t1[20], t2[20]; + int i; + + /* + * Invert z with a modular exponentiation: the modulus is + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is + * p-2. Exponent bit pattern (from high to low) is: + * - 32 bits of value 1 + * - 31 bits of value 0 + * - 1 bit of value 1 + * - 96 bits of value 0 + * - 94 bits of value 1 + * - 1 bit of value 0 + * - 1 bit of value 1 + * Thus, we precompute z^(2^31-1) to speed things up. + * + * If z = 0 (point at infinity) then the modular exponentiation + * will yield 0, which leads to the expected result (all three + * coordinates set to 0). + */ + + /* + * A simple square-and-multiply for z^(2^31-1). We could save about + * two dozen multiplications here with an addition chain, but + * this would require a bit more code, and extra stack buffers. + */ + memcpy(t1, P->z, sizeof P->z); + for (i = 0; i < 30; i ++) { + square_f256(t1, t1); + mul_f256(t1, t1, P->z); + } + + /* + * Square-and-multiply. Apart from the squarings, we have a few + * multiplications to set bits to 1; we multiply by the original z + * for setting 1 bit, and by t1 for setting 31 bits. + */ + memcpy(t2, P->z, sizeof P->z); + for (i = 1; i < 256; i ++) { + square_f256(t2, t2); + switch (i) { + case 31: + case 190: + case 221: + case 252: + mul_f256(t2, t2, t1); + break; + case 63: + case 253: + case 255: + mul_f256(t2, t2, P->z); + break; + } + } + + /* + * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3. + */ + mul_f256(t1, t2, t2); + mul_f256(P->x, t1, P->x); + mul_f256(t1, t1, t2); + mul_f256(P->y, t1, P->y); + reduce_final_f256(P->x); + reduce_final_f256(P->y); + + /* + * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise + * this will set z to 1. + */ + mul_f256(P->z, P->z, t2); + reduce_final_f256(P->z); +} + +/* + * Double a point in P-256. This function works for all valid points, + * including the point at infinity. + */ +static void +p256_double(p256_jacobian *Q) +{ + /* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * These formulas work for all points, including points of order 2 + * and points at infinity: + * - If y = 0 then z' = 0. But there is no such point in P-256 + * anyway. + * - If z = 0 then z' = 0. + */ + uint32_t t1[20], t2[20], t3[20], t4[20]; + int i; + + /* + * Compute z^2 in t1. + */ + square_f256(t1, Q->z); + + /* + * Compute x-z^2 in t2 and x+z^2 in t1. + */ + for (i = 0; i < 20; i ++) { + t2[i] = (F256[i] << 1) + Q->x[i] - t1[i]; + t1[i] += Q->x[i]; + } + norm13(t1, t1, 20); + norm13(t2, t2, 20); + + /* + * Compute 3*(x+z^2)*(x-z^2) in t1. + */ + mul_f256(t3, t1, t2); + for (i = 0; i < 20; i ++) { + t1[i] = MUL15(3, t3[i]); + } + norm13(t1, t1, 20); + + /* + * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + square_f256(t3, Q->y); + for (i = 0; i < 20; i ++) { + t3[i] <<= 1; + } + norm13(t3, t3, 20); + mul_f256(t2, Q->x, t3); + for (i = 0; i < 20; i ++) { + t2[i] <<= 1; + } + norm13(t2, t2, 20); + reduce_f256(t2); + + /* + * Compute x' = m^2 - 2*s. + */ + square_f256(Q->x, t1); + for (i = 0; i < 20; i ++) { + Q->x[i] += (F256[i] << 2) - (t2[i] << 1); + } + norm13(Q->x, Q->x, 20); + reduce_f256(Q->x); + + /* + * Compute z' = 2*y*z. + */ + mul_f256(t4, Q->y, Q->z); + for (i = 0; i < 20; i ++) { + Q->z[i] = t4[i] << 1; + } + norm13(Q->z, Q->z, 20); + reduce_f256(Q->z); + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + for (i = 0; i < 20; i ++) { + t2[i] += (F256[i] << 1) - Q->x[i]; + } + norm13(t2, t2, 20); + mul_f256(Q->y, t1, t2); + square_f256(t4, t3); + for (i = 0; i < 20; i ++) { + Q->y[i] += (F256[i] << 2) - (t4[i] << 1); + } + norm13(Q->y, Q->y, 20); + reduce_f256(Q->y); +} + +/* + * Add point P2 to point P1. + * + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 but P2 != 0 + * - If P1 != 0 but P2 == 0 + * - If P1 == P2 + * + * In all three cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y coordinate + * - P1 == 0 and P2 == 0 + * - The Y coordinate of one of the points is 0 and the other point is + * the point at infinity. + * + * The third case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + */ +static uint32_t +p256_add(p256_jacobian *P1, const p256_jacobian *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + */ + uint32_t t1[20], t2[20], t3[20], t4[20], t5[20], t6[20], t7[20]; + uint32_t ret; + int i; + + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + square_f256(t3, P2->z); + mul_f256(t1, P1->x, t3); + mul_f256(t4, P2->z, t3); + mul_f256(t3, P1->y, t4); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + square_f256(t4, P1->z); + mul_f256(t2, P2->x, t4); + mul_f256(t5, P1->z, t4); + mul_f256(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + for (i = 0; i < 20; i ++) { + t2[i] += (F256[i] << 1) - t1[i]; + t4[i] += (F256[i] << 1) - t3[i]; + } + norm13(t2, t2, 20); + norm13(t4, t4, 20); + reduce_f256(t4); + reduce_final_f256(t4); + ret = 0; + for (i = 0; i < 20; i ++) { + ret |= t4[i]; + } + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + square_f256(t7, t2); + mul_f256(t6, t1, t7); + mul_f256(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + square_f256(P1->x, t4); + for (i = 0; i < 20; i ++) { + P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1); + } + norm13(P1->x, P1->x, 20); + reduce_f256(P1->x); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + for (i = 0; i < 20; i ++) { + t6[i] += (F256[i] << 1) - P1->x[i]; + } + norm13(t6, t6, 20); + mul_f256(P1->y, t4, t6); + mul_f256(t1, t5, t3); + for (i = 0; i < 20; i ++) { + P1->y[i] += (F256[i] << 1) - t1[i]; + } + norm13(P1->y, P1->y, 20); + reduce_f256(P1->y); + + /* + * Compute z3 = h*z1*z2. + */ + mul_f256(t1, P1->z, P2->z); + mul_f256(P1->z, t1, t2); + + return ret; +} + +/* + * Add point P2 to point P1. This is a specialised function for the + * case when P2 is a non-zero point in affine coordinate. + * + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 + * - If P1 == P2 + * + * In both cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y coordinate + * - The Y coordinate of P2 is 0 and P1 is the point at infinity. + * + * The second case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + */ +static uint32_t +p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 + * u2 = x2 * z1^2 + * s1 = y1 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 + */ + uint32_t t1[20], t2[20], t3[20], t4[20], t5[20], t6[20], t7[20]; + uint32_t ret; + int i; + + /* + * Compute u1 = x1 (in t1) and s1 = y1 (in t3). + */ + memcpy(t1, P1->x, sizeof t1); + memcpy(t3, P1->y, sizeof t3); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + square_f256(t4, P1->z); + mul_f256(t2, P2->x, t4); + mul_f256(t5, P1->z, t4); + mul_f256(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + for (i = 0; i < 20; i ++) { + t2[i] += (F256[i] << 1) - t1[i]; + t4[i] += (F256[i] << 1) - t3[i]; + } + norm13(t2, t2, 20); + norm13(t4, t4, 20); + reduce_f256(t4); + reduce_final_f256(t4); + ret = 0; + for (i = 0; i < 20; i ++) { + ret |= t4[i]; + } + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + square_f256(t7, t2); + mul_f256(t6, t1, t7); + mul_f256(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + square_f256(P1->x, t4); + for (i = 0; i < 20; i ++) { + P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1); + } + norm13(P1->x, P1->x, 20); + reduce_f256(P1->x); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + for (i = 0; i < 20; i ++) { + t6[i] += (F256[i] << 1) - P1->x[i]; + } + norm13(t6, t6, 20); + mul_f256(P1->y, t4, t6); + mul_f256(t1, t5, t3); + for (i = 0; i < 20; i ++) { + P1->y[i] += (F256[i] << 1) - t1[i]; + } + norm13(P1->y, P1->y, 20); + reduce_f256(P1->y); + + /* + * Compute z3 = h*z1*z2. + */ + mul_f256(P1->z, P1->z, t2); + + return ret; +} + +/* + * Decode a P-256 point. This function does not support the point at + * infinity. Returned value is 0 if the point is invalid, 1 otherwise. + */ +static uint32_t +p256_decode(p256_jacobian *P, const void *src, size_t len) +{ + const unsigned char *buf; + uint32_t tx[20], ty[20], t1[20], t2[20]; + uint32_t bad; + int i; + + if (len != 65) { + return 0; + } + buf = src; + + /* + * First byte must be 0x04 (uncompressed format). We could support + * "hybrid format" (first byte is 0x06 or 0x07, and encodes the + * least significant bit of the Y coordinate), but it is explicitly + * forbidden by RFC 5480 (section 2.2). + */ + bad = NEQ(buf[0], 0x04); + + /* + * Decode the coordinates, and check that they are both lower + * than the modulus. + */ + tx[19] = be8_to_le13(tx, buf + 1, 32); + ty[19] = be8_to_le13(ty, buf + 33, 32); + bad |= reduce_final_f256(tx); + bad |= reduce_final_f256(ty); + + /* + * Check curve equation. + */ + square_f256(t1, tx); + mul_f256(t1, tx, t1); + square_f256(t2, ty); + for (i = 0; i < 20; i ++) { + t1[i] += (F256[i] << 3) - MUL15(3, tx[i]) + P256_B[i] - t2[i]; + } + norm13(t1, t1, 20); + reduce_f256(t1); + reduce_final_f256(t1); + for (i = 0; i < 20; i ++) { + bad |= t1[i]; + } + + /* + * Copy coordinates to the point structure. + */ + memcpy(P->x, tx, sizeof tx); + memcpy(P->y, ty, sizeof ty); + memset(P->z, 0, sizeof P->z); + P->z[0] = 1; + return EQ(bad, 0); +} + +/* + * Encode a point into a buffer. This function assumes that the point is + * valid, in affine coordinates, and not the point at infinity. + */ +static void +p256_encode(void *dst, const p256_jacobian *P) +{ + unsigned char *buf; + + buf = dst; + buf[0] = 0x04; + le13_to_be8(buf + 1, 32, P->x); + le13_to_be8(buf + 33, 32, P->y); +} + +/* + * Multiply a curve point by an integer. The integer is assumed to be + * lower than the curve order, and the base point must not be the point + * at infinity. + */ +static void +p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen) +{ + /* + * qz is a flag that is initially 1, and remains equal to 1 + * as long as the point is the point at infinity. + * + * We use a 2-bit window to handle multiplier bits by pairs. + * The precomputed window really is the points P2 and P3. + */ + uint32_t qz; + p256_jacobian P2, P3, Q, T, U; + + /* + * Compute window values. + */ + P2 = *P; + p256_double(&P2); + P3 = *P; + p256_add(&P3, &P2); + + /* + * We start with Q = 0. We process multiplier bits 2 by 2. + */ + memset(&Q, 0, sizeof Q); + qz = 1; + while (xlen -- > 0) { + int k; + + for (k = 6; k >= 0; k -= 2) { + uint32_t bits; + uint32_t bnz; + + p256_double(&Q); + p256_double(&Q); + T = *P; + U = Q; + bits = (*x >> k) & (uint32_t)3; + bnz = NEQ(bits, 0); + CCOPY(EQ(bits, 2), &T, &P2, sizeof T); + CCOPY(EQ(bits, 3), &T, &P3, sizeof T); + p256_add(&U, &T); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + } + x ++; + } + *P = Q; +} + +/* + * Precomputed window: k*G points, where G is the curve generator, and k + * is an integer from 1 to 15 (inclusive). The X and Y coordinates of + * the point are encoded as 20 words of 13 bits each (little-endian + * order); 13-bit words are then grouped 2-by-2 into 32-bit words + * (little-endian order within each word). + */ +static const uint32_t Gwin[15][20] = { + + { 0x04C60296, 0x02721176, 0x19D00F4A, 0x102517AC, + 0x13B8037D, 0x0748103C, 0x1E730E56, 0x08481FE2, + 0x0F97012C, 0x00D605F4, 0x1DFA11F5, 0x0C801A0D, + 0x0F670CBB, 0x0AED0CC5, 0x115E0E33, 0x181F0785, + 0x13F514A7, 0x0FF30E3B, 0x17171E1A, 0x009F18D0 }, + + { 0x1B341978, 0x16911F11, 0x0D9A1A60, 0x1C4E1FC8, + 0x1E040969, 0x096A06B0, 0x091C0030, 0x09EF1A29, + 0x18C40D03, 0x00F91C9E, 0x13C313D1, 0x096F0748, + 0x011419E0, 0x1CC713A6, 0x1DD31DAD, 0x1EE80C36, + 0x1ECD0C69, 0x1A0800A4, 0x08861B8E, 0x000E1DD5 }, + + { 0x173F1D6C, 0x02CC06F1, 0x14C21FB4, 0x043D1EB6, + 0x0F3606B7, 0x1A971C59, 0x1BF71951, 0x01481323, + 0x068D0633, 0x00BD12F9, 0x13EA1032, 0x136209E8, + 0x1C1E19A7, 0x06C7013E, 0x06C10AB0, 0x14C908BB, + 0x05830CE1, 0x1FEF18DD, 0x00620998, 0x010E0D19 }, + + { 0x18180852, 0x0604111A, 0x0B771509, 0x1B6F0156, + 0x00181FE2, 0x1DCC0AF4, 0x16EF0659, 0x11F70E80, + 0x11A912D0, 0x01C414D2, 0x027618C6, 0x05840FC6, + 0x100215C4, 0x187E0C3B, 0x12771C96, 0x150C0B5D, + 0x0FF705FD, 0x07981C67, 0x1AD20C63, 0x01C11C55 }, + + { 0x1E8113ED, 0x0A940370, 0x12920215, 0x1FA31D6F, + 0x1F7C0C82, 0x10CD03F7, 0x02640560, 0x081A0B5E, + 0x1BD21151, 0x00A21642, 0x0D0B0DA4, 0x0176113F, + 0x04440D1D, 0x001A1360, 0x1068012F, 0x1F141E49, + 0x10DF136B, 0x0E4F162B, 0x0D44104A, 0x01C1105F }, + + { 0x011411A9, 0x01551A4F, 0x0ADA0C6B, 0x01BD0EC8, + 0x18120C74, 0x112F1778, 0x099202CB, 0x0C05124B, + 0x195316A4, 0x01600685, 0x1E3B1FE2, 0x189014E3, + 0x0B5E1FD7, 0x0E0311F8, 0x08E000F7, 0x174E00DE, + 0x160702DF, 0x1B5A15BF, 0x03A11237, 0x01D01704 }, + + { 0x0C3D12A3, 0x0C501C0C, 0x17AD1300, 0x1715003F, + 0x03F719F8, 0x18031ED8, 0x1D980667, 0x0F681896, + 0x1B7D00BF, 0x011C14CE, 0x0FA000B4, 0x1C3501B0, + 0x0D901C55, 0x06790C10, 0x029E0736, 0x0DEB0400, + 0x034F183A, 0x030619B4, 0x0DEF0033, 0x00E71AC7 }, + + { 0x1B7D1393, 0x1B3B1076, 0x0BED1B4D, 0x13011F3A, + 0x0E0E1238, 0x156A132B, 0x013A02D3, 0x160A0D01, + 0x1CED1EE9, 0x00C5165D, 0x184C157E, 0x08141A83, + 0x153C0DA5, 0x1ED70F9D, 0x05170D51, 0x02CF13B8, + 0x18AE1771, 0x1B04113F, 0x05EC11E9, 0x015A16B3 }, + + { 0x04A41EE0, 0x1D1412E4, 0x1C591D79, 0x118511B7, + 0x14F00ACB, 0x1AE31E1C, 0x049C0D51, 0x016E061E, + 0x1DB71EDF, 0x01D41A35, 0x0E8208FA, 0x14441293, + 0x011F1E85, 0x1D54137A, 0x026B114F, 0x151D0832, + 0x00A50964, 0x1F9C1E1C, 0x064B12C9, 0x005409D1 }, + + { 0x062B123F, 0x0C0D0501, 0x183704C3, 0x08E31120, + 0x0A2E0A6C, 0x14440FED, 0x090A0D1E, 0x13271964, + 0x0B590A3A, 0x019D1D9B, 0x05780773, 0x09770A91, + 0x0F770CA3, 0x053F19D4, 0x02C80DED, 0x1A761304, + 0x091E0DD9, 0x15D201B8, 0x151109AA, 0x010F0198 }, + + { 0x05E101D1, 0x072314DD, 0x045F1433, 0x1A041541, + 0x10B3142E, 0x01840736, 0x1C1B19DB, 0x098B0418, + 0x1DBC083B, 0x007D1444, 0x01511740, 0x11DD1F3A, + 0x04ED0E2F, 0x1B4B1A62, 0x10480D04, 0x09E911A2, + 0x04211AFA, 0x19140893, 0x04D60CC4, 0x01210648 }, + + { 0x112703C4, 0x018B1BA1, 0x164C1D50, 0x05160BE0, + 0x0BCC1830, 0x01CB1554, 0x13291732, 0x1B2B1918, + 0x0DED0817, 0x00E80775, 0x0A2401D3, 0x0BFE08B3, + 0x0E531199, 0x058616E9, 0x04770B91, 0x110F0C55, + 0x19C11554, 0x0BFB1159, 0x03541C38, 0x000E1C2D }, + + { 0x10390C01, 0x02BB0751, 0x0AC5098E, 0x096C17AB, + 0x03C90E28, 0x10BD18BF, 0x002E1F2D, 0x092B0986, + 0x1BD700AC, 0x002E1F20, 0x1E3D1FD8, 0x077718BB, + 0x06F919C4, 0x187407ED, 0x11370E14, 0x081E139C, + 0x00481ADB, 0x14AB0289, 0x066A0EBE, 0x00C70ED6 }, + + { 0x0694120B, 0x124E1CC9, 0x0E2F0570, 0x17CF081A, + 0x078906AC, 0x066D17CF, 0x1B3207F4, 0x0C5705E9, + 0x10001C38, 0x00A919DE, 0x06851375, 0x0F900BD8, + 0x080401BA, 0x0EEE0D42, 0x1B8B11EA, 0x0B4519F0, + 0x090F18C0, 0x062E1508, 0x0DD909F4, 0x01EB067C }, + + { 0x0CDC1D5F, 0x0D1818F9, 0x07781636, 0x125B18E8, + 0x0D7003AF, 0x13110099, 0x1D9B1899, 0x175C1EB7, + 0x0E34171A, 0x01E01153, 0x081A0F36, 0x0B391783, + 0x1D1F147E, 0x19CE16D7, 0x11511B21, 0x1F2C10F9, + 0x12CA0E51, 0x05A31D39, 0x171A192E, 0x016B0E4F } +}; + +/* + * Lookup one of the Gwin[] values, by index. This is constant-time. + */ +static void +lookup_Gwin(p256_jacobian *T, uint32_t idx) +{ + uint32_t xy[20]; + uint32_t k; + size_t u; + + memset(xy, 0, sizeof xy); + for (k = 0; k < 15; k ++) { + uint32_t m; + + m = -EQ(idx, k + 1); + for (u = 0; u < 20; u ++) { + xy[u] |= m & Gwin[k][u]; + } + } + for (u = 0; u < 10; u ++) { + T->x[(u << 1) + 0] = xy[u] & 0xFFFF; + T->x[(u << 1) + 1] = xy[u] >> 16; + T->y[(u << 1) + 0] = xy[u + 10] & 0xFFFF; + T->y[(u << 1) + 1] = xy[u + 10] >> 16; + } + memset(T->z, 0, sizeof T->z); + T->z[0] = 1; +} + +/* + * Multiply the generator by an integer. The integer is assumed non-zero + * and lower than the curve order. + */ +static void +p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen) +{ + /* + * qz is a flag that is initially 1, and remains equal to 1 + * as long as the point is the point at infinity. + * + * We use a 4-bit window to handle multiplier bits by groups + * of 4. The precomputed window is constant static data, with + * points in affine coordinates; we use a constant-time lookup. + */ + p256_jacobian Q; + uint32_t qz; + + memset(&Q, 0, sizeof Q); + qz = 1; + while (xlen -- > 0) { + int k; + unsigned bx; + + bx = *x ++; + for (k = 0; k < 2; k ++) { + uint32_t bits; + uint32_t bnz; + p256_jacobian T, U; + + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + bits = (bx >> 4) & 0x0F; + bnz = NEQ(bits, 0); + lookup_Gwin(&T, bits); + U = Q; + p256_add_mixed(&U, &T); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + bx <<= 4; + } + } + *P = Q; +} + +static const unsigned char P256_G[] = { + 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, + 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, + 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, + 0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, + 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B, + 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40, + 0x68, 0x37, 0xBF, 0x51, 0xF5 +}; + +static const unsigned char P256_N[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, + 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, + 0x25, 0x51 +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_G; + return P256_G; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_N; + return P256_N; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 1; +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *x, size_t xlen, int curve) +{ + uint32_t r; + p256_jacobian P; + + (void)curve; + if (Glen != 65) { + return 0; + } + r = p256_decode(&P, G, Glen); + p256_mul(&P, x, xlen); + p256_to_affine(&P); + p256_encode(G, &P); + return r; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + p256_jacobian P; + + (void)curve; + p256_mulgen(&P, x, xlen); + p256_to_affine(&P); + p256_encode(R, &P); + return 65; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + p256_jacobian P, Q; + uint32_t r, t, z; + int i; + + (void)curve; + if (len != 65) { + return 0; + } + r = p256_decode(&P, A, len); + p256_mul(&P, x, xlen); + if (B == NULL) { + p256_mulgen(&Q, y, ylen); + } else { + r &= p256_decode(&Q, B, len); + p256_mul(&Q, y, ylen); + } + + /* + * The final addition may fail in case both points are equal. + */ + t = p256_add(&P, &Q); + reduce_final_f256(P.z); + z = 0; + for (i = 0; i < 20; i ++) { + z |= P.z[i]; + } + z = EQ(z, 0); + p256_double(&Q); + + /* + * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * z = 0, t = 0 return P (normal addition) + * z = 0, t = 1 return P (normal addition) + * z = 1, t = 0 return Q (a 'double' case) + * z = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(z & ~t, &P, &Q, sizeof Q); + p256_to_affine(&P); + p256_encode(A, &P); + r &= ~(z & t); + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_p256_m15 = { + (uint32_t)0x00800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_p256_m31.c b/third_party/bearssl/src/ec_p256_m31.c new file mode 100644 index 0000000..b185937 --- /dev/null +++ b/third_party/bearssl/src/ec_p256_m31.c @@ -0,0 +1,1469 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_ + * that right-shifting a signed negative integer copies the sign bit + * (arithmetic right-shift). This is "implementation-defined behaviour", + * i.e. it is not undefined, but it may differ between compilers. Each + * compiler is supposed to document its behaviour in that respect. GCC + * explicitly defines that an arithmetic right shift is used. We expect + * all other compilers to do the same, because underlying CPU offer an + * arithmetic right shift opcode that could not be used otherwise. + */ +#if BR_NO_ARITH_SHIFT +#define ARSH(x, n) (((uint32_t)(x) >> (n)) \ + | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) +#define ARSHW(x, n) (((uint64_t)(x) >> (n)) \ + | ((-((uint64_t)(x) >> 63)) << (64 - (n)))) +#else +#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) +#define ARSHW(x, n) ((*(int64_t *)&(x)) >> (n)) +#endif + +/* + * Convert an integer from unsigned big-endian encoding to a sequence of + * 30-bit words in little-endian order. The final "partial" word is + * returned. + */ +static uint32_t +be8_to_le30(uint32_t *dst, const unsigned char *src, size_t len) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + uint32_t b; + + b = src[len]; + if (acc_len < 22) { + acc |= b << acc_len; + acc_len += 8; + } else { + *dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF; + acc = b >> (30 - acc_len); + acc_len -= 22; + } + } + return acc; +} + +/* + * Convert an integer (30-bit words, little-endian) to unsigned + * big-endian encoding. The total encoding length is provided; all + * the destination bytes will be filled. + */ +static void +le30_to_be8(unsigned char *dst, size_t len, const uint32_t *src) +{ + uint32_t acc; + int acc_len; + + acc = 0; + acc_len = 0; + while (len -- > 0) { + if (acc_len < 8) { + uint32_t w; + + w = *src ++; + dst[len] = (unsigned char)(acc | (w << acc_len)); + acc = w >> (8 - acc_len); + acc_len += 22; + } else { + dst[len] = (unsigned char)acc; + acc >>= 8; + acc_len -= 8; + } + } +} + +/* + * Multiply two integers. Source integers are represented as arrays of + * nine 30-bit words, for values up to 2^270-1. Result is encoded over + * 18 words of 30 bits each. + */ +static void +mul9(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + /* + * Maximum intermediate result is no more than + * 10376293531797946367, which fits in 64 bits. Reason: + * + * 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406 + * 10376293531797946367 < 9663676407 * 2^30 + * + * Thus, adding together 9 products of 30-bit integers, with + * a carry of at most 9663676406, yields an integer that fits + * on 64 bits and generates a carry of at most 9663676406. + */ + uint64_t t[17]; + uint64_t cc; + int i; + + t[ 0] = MUL31(a[0], b[0]); + t[ 1] = MUL31(a[0], b[1]) + + MUL31(a[1], b[0]); + t[ 2] = MUL31(a[0], b[2]) + + MUL31(a[1], b[1]) + + MUL31(a[2], b[0]); + t[ 3] = MUL31(a[0], b[3]) + + MUL31(a[1], b[2]) + + MUL31(a[2], b[1]) + + MUL31(a[3], b[0]); + t[ 4] = MUL31(a[0], b[4]) + + MUL31(a[1], b[3]) + + MUL31(a[2], b[2]) + + MUL31(a[3], b[1]) + + MUL31(a[4], b[0]); + t[ 5] = MUL31(a[0], b[5]) + + MUL31(a[1], b[4]) + + MUL31(a[2], b[3]) + + MUL31(a[3], b[2]) + + MUL31(a[4], b[1]) + + MUL31(a[5], b[0]); + t[ 6] = MUL31(a[0], b[6]) + + MUL31(a[1], b[5]) + + MUL31(a[2], b[4]) + + MUL31(a[3], b[3]) + + MUL31(a[4], b[2]) + + MUL31(a[5], b[1]) + + MUL31(a[6], b[0]); + t[ 7] = MUL31(a[0], b[7]) + + MUL31(a[1], b[6]) + + MUL31(a[2], b[5]) + + MUL31(a[3], b[4]) + + MUL31(a[4], b[3]) + + MUL31(a[5], b[2]) + + MUL31(a[6], b[1]) + + MUL31(a[7], b[0]); + t[ 8] = MUL31(a[0], b[8]) + + MUL31(a[1], b[7]) + + MUL31(a[2], b[6]) + + MUL31(a[3], b[5]) + + MUL31(a[4], b[4]) + + MUL31(a[5], b[3]) + + MUL31(a[6], b[2]) + + MUL31(a[7], b[1]) + + MUL31(a[8], b[0]); + t[ 9] = MUL31(a[1], b[8]) + + MUL31(a[2], b[7]) + + MUL31(a[3], b[6]) + + MUL31(a[4], b[5]) + + MUL31(a[5], b[4]) + + MUL31(a[6], b[3]) + + MUL31(a[7], b[2]) + + MUL31(a[8], b[1]); + t[10] = MUL31(a[2], b[8]) + + MUL31(a[3], b[7]) + + MUL31(a[4], b[6]) + + MUL31(a[5], b[5]) + + MUL31(a[6], b[4]) + + MUL31(a[7], b[3]) + + MUL31(a[8], b[2]); + t[11] = MUL31(a[3], b[8]) + + MUL31(a[4], b[7]) + + MUL31(a[5], b[6]) + + MUL31(a[6], b[5]) + + MUL31(a[7], b[4]) + + MUL31(a[8], b[3]); + t[12] = MUL31(a[4], b[8]) + + MUL31(a[5], b[7]) + + MUL31(a[6], b[6]) + + MUL31(a[7], b[5]) + + MUL31(a[8], b[4]); + t[13] = MUL31(a[5], b[8]) + + MUL31(a[6], b[7]) + + MUL31(a[7], b[6]) + + MUL31(a[8], b[5]); + t[14] = MUL31(a[6], b[8]) + + MUL31(a[7], b[7]) + + MUL31(a[8], b[6]); + t[15] = MUL31(a[7], b[8]) + + MUL31(a[8], b[7]); + t[16] = MUL31(a[8], b[8]); + + /* + * Propagate carries. + */ + cc = 0; + for (i = 0; i < 17; i ++) { + uint64_t w; + + w = t[i] + cc; + d[i] = (uint32_t)w & 0x3FFFFFFF; + cc = w >> 30; + } + d[17] = (uint32_t)cc; +} + +/* + * Square a 270-bit integer, represented as an array of nine 30-bit words. + * Result uses 18 words of 30 bits each. + */ +static void +square9(uint32_t *d, const uint32_t *a) +{ + uint64_t t[17]; + uint64_t cc; + int i; + + t[ 0] = MUL31(a[0], a[0]); + t[ 1] = ((MUL31(a[0], a[1])) << 1); + t[ 2] = MUL31(a[1], a[1]) + + ((MUL31(a[0], a[2])) << 1); + t[ 3] = ((MUL31(a[0], a[3]) + + MUL31(a[1], a[2])) << 1); + t[ 4] = MUL31(a[2], a[2]) + + ((MUL31(a[0], a[4]) + + MUL31(a[1], a[3])) << 1); + t[ 5] = ((MUL31(a[0], a[5]) + + MUL31(a[1], a[4]) + + MUL31(a[2], a[3])) << 1); + t[ 6] = MUL31(a[3], a[3]) + + ((MUL31(a[0], a[6]) + + MUL31(a[1], a[5]) + + MUL31(a[2], a[4])) << 1); + t[ 7] = ((MUL31(a[0], a[7]) + + MUL31(a[1], a[6]) + + MUL31(a[2], a[5]) + + MUL31(a[3], a[4])) << 1); + t[ 8] = MUL31(a[4], a[4]) + + ((MUL31(a[0], a[8]) + + MUL31(a[1], a[7]) + + MUL31(a[2], a[6]) + + MUL31(a[3], a[5])) << 1); + t[ 9] = ((MUL31(a[1], a[8]) + + MUL31(a[2], a[7]) + + MUL31(a[3], a[6]) + + MUL31(a[4], a[5])) << 1); + t[10] = MUL31(a[5], a[5]) + + ((MUL31(a[2], a[8]) + + MUL31(a[3], a[7]) + + MUL31(a[4], a[6])) << 1); + t[11] = ((MUL31(a[3], a[8]) + + MUL31(a[4], a[7]) + + MUL31(a[5], a[6])) << 1); + t[12] = MUL31(a[6], a[6]) + + ((MUL31(a[4], a[8]) + + MUL31(a[5], a[7])) << 1); + t[13] = ((MUL31(a[5], a[8]) + + MUL31(a[6], a[7])) << 1); + t[14] = MUL31(a[7], a[7]) + + ((MUL31(a[6], a[8])) << 1); + t[15] = ((MUL31(a[7], a[8])) << 1); + t[16] = MUL31(a[8], a[8]); + + /* + * Propagate carries. + */ + cc = 0; + for (i = 0; i < 17; i ++) { + uint64_t w; + + w = t[i] + cc; + d[i] = (uint32_t)w & 0x3FFFFFFF; + cc = w >> 30; + } + d[17] = (uint32_t)cc; +} + +/* + * Base field modulus for P-256. + */ +static const uint32_t F256[] = { + + 0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000, + 0x00000000, 0x00001000, 0x3FFFC000, 0x0000FFFF +}; + +/* + * The 'b' curve equation coefficient for P-256. + */ +static const uint32_t P256_B[] = { + + 0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65, + 0x2EF555DA, 0x293E7B3E, 0x0D762A8E, 0x00005AC6 +}; + +/* + * Addition in the field. Source operands shall fit on 257 bits; output + * will be lower than twice the modulus. + */ +static void +add_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t w, cc; + int i; + + cc = 0; + for (i = 0; i < 9; i ++) { + w = a[i] + b[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = w >> 30; + } + w >>= 16; + d[8] &= 0xFFFF; + d[3] -= w << 6; + d[6] -= w << 12; + d[7] += w << 14; + cc = w; + for (i = 0; i < 9; i ++) { + w = d[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = ARSH(w, 30); + } +} + +/* + * Subtraction in the field. Source operands shall be smaller than twice + * the modulus; the result will fulfil the same property. + */ +static void +sub_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t w, cc; + int i; + + /* + * We really compute a - b + 2*p to make sure that the result is + * positive. + */ + w = a[0] - b[0] - 0x00002; + d[0] = w & 0x3FFFFFFF; + w = a[1] - b[1] + ARSH(w, 30); + d[1] = w & 0x3FFFFFFF; + w = a[2] - b[2] + ARSH(w, 30); + d[2] = w & 0x3FFFFFFF; + w = a[3] - b[3] + ARSH(w, 30) + 0x00080; + d[3] = w & 0x3FFFFFFF; + w = a[4] - b[4] + ARSH(w, 30); + d[4] = w & 0x3FFFFFFF; + w = a[5] - b[5] + ARSH(w, 30); + d[5] = w & 0x3FFFFFFF; + w = a[6] - b[6] + ARSH(w, 30) + 0x02000; + d[6] = w & 0x3FFFFFFF; + w = a[7] - b[7] + ARSH(w, 30) - 0x08000; + d[7] = w & 0x3FFFFFFF; + w = a[8] - b[8] + ARSH(w, 30) + 0x20000; + d[8] = w & 0xFFFF; + w >>= 16; + d[8] &= 0xFFFF; + d[3] -= w << 6; + d[6] -= w << 12; + d[7] += w << 14; + cc = w; + for (i = 0; i < 9; i ++) { + w = d[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = ARSH(w, 30); + } +} + +/* + * Compute a multiplication in F256. Source operands shall be less than + * twice the modulus. + */ +static void +mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + uint32_t t[18]; + uint64_t s[18]; + uint64_t cc, x; + uint32_t z, c; + int i; + + mul9(t, a, b); + + /* + * Modular reduction: each high word in added/subtracted where + * necessary. + * + * The modulus is: + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 + * Therefore: + * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + * + * For a word x at bit offset n (n >= 256), we have: + * x*2^n = x*2^(n-32) - x*2^(n-64) + * - x*2^(n - 160) + x*2^(n-256) mod p + * + * Thus, we can nullify the high word if we reinject it at some + * proper emplacements. + * + * We use 64-bit intermediate words to allow for carries to + * accumulate easily, before performing the final propagation. + */ + for (i = 0; i < 18; i ++) { + s[i] = t[i]; + } + + for (i = 17; i >= 9; i --) { + uint64_t y; + + y = s[i]; + s[i - 1] += ARSHW(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= ARSHW(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= ARSHW(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += ARSHW(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; + } + + /* + * Carry propagation must be signed. Moreover, we may have overdone + * it a bit, and obtain a negative result. + * + * The loop above ran 9 times; each time, each word was augmented + * by at most one extra word (in absolute value). Thus, the top + * word must in fine fit in 39 bits, so the carry below will fit + * on 9 bits. + */ + cc = 0; + for (i = 0; i < 9; i ++) { + x = s[i] + cc; + d[i] = (uint32_t)x & 0x3FFFFFFF; + cc = ARSHW(x, 30); + } + + /* + * All nine words fit on 30 bits, but there may be an extra + * carry for a few bits (at most 9), and that carry may be + * negative. Moreover, we want the result to fit on 257 bits. + * The two lines below ensure that the word in d[] has length + * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The + * significant length of cc is less than 24 bits, so we will be + * able to switch to 32-bit operations. + */ + cc = ARSHW(x, 16); + d[8] &= 0xFFFF; + + /* + * One extra round of reduction, for cc*2^256, which means + * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + * value. If cc is negative, then it may happen (rarely, but + * not neglectibly so) that the result would be negative. In + * order to avoid that, if cc is negative, then we add the + * modulus once. Note that if cc is negative, then propagating + * that carry must yield a value lower than the modulus, so + * adding the modulus once will keep the final result under + * twice the modulus. + */ + z = (uint32_t)cc; + d[3] -= z << 6; + d[6] -= (z << 12) & 0x3FFFFFFF; + d[7] -= ARSH(z, 18); + d[7] += (z << 14) & 0x3FFFFFFF; + d[8] += ARSH(z, 16); + c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; + for (i = 0; i < 9; i ++) { + uint32_t w; + + w = d[i] + z; + d[i] = w & 0x3FFFFFFF; + z = ARSH(w, 30); + } +} + +/* + * Compute a square in F256. Source operand shall be less than + * twice the modulus. + */ +static void +square_f256(uint32_t *d, const uint32_t *a) +{ + uint32_t t[18]; + uint64_t s[18]; + uint64_t cc, x; + uint32_t z, c; + int i; + + square9(t, a); + + /* + * Modular reduction: each high word in added/subtracted where + * necessary. + * + * The modulus is: + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 + * Therefore: + * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + * + * For a word x at bit offset n (n >= 256), we have: + * x*2^n = x*2^(n-32) - x*2^(n-64) + * - x*2^(n - 160) + x*2^(n-256) mod p + * + * Thus, we can nullify the high word if we reinject it at some + * proper emplacements. + * + * We use 64-bit intermediate words to allow for carries to + * accumulate easily, before performing the final propagation. + */ + for (i = 0; i < 18; i ++) { + s[i] = t[i]; + } + + for (i = 17; i >= 9; i --) { + uint64_t y; + + y = s[i]; + s[i - 1] += ARSHW(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= ARSHW(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= ARSHW(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += ARSHW(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; + } + + /* + * Carry propagation must be signed. Moreover, we may have overdone + * it a bit, and obtain a negative result. + * + * The loop above ran 9 times; each time, each word was augmented + * by at most one extra word (in absolute value). Thus, the top + * word must in fine fit in 39 bits, so the carry below will fit + * on 9 bits. + */ + cc = 0; + for (i = 0; i < 9; i ++) { + x = s[i] + cc; + d[i] = (uint32_t)x & 0x3FFFFFFF; + cc = ARSHW(x, 30); + } + + /* + * All nine words fit on 30 bits, but there may be an extra + * carry for a few bits (at most 9), and that carry may be + * negative. Moreover, we want the result to fit on 257 bits. + * The two lines below ensure that the word in d[] has length + * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The + * significant length of cc is less than 24 bits, so we will be + * able to switch to 32-bit operations. + */ + cc = ARSHW(x, 16); + d[8] &= 0xFFFF; + + /* + * One extra round of reduction, for cc*2^256, which means + * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + * value. If cc is negative, then it may happen (rarely, but + * not neglectibly so) that the result would be negative. In + * order to avoid that, if cc is negative, then we add the + * modulus once. Note that if cc is negative, then propagating + * that carry must yield a value lower than the modulus, so + * adding the modulus once will keep the final result under + * twice the modulus. + */ + z = (uint32_t)cc; + d[3] -= z << 6; + d[6] -= (z << 12) & 0x3FFFFFFF; + d[7] -= ARSH(z, 18); + d[7] += (z << 14) & 0x3FFFFFFF; + d[8] += ARSH(z, 16); + c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; + for (i = 0; i < 9; i ++) { + uint32_t w; + + w = d[i] + z; + d[i] = w & 0x3FFFFFFF; + z = ARSH(w, 30); + } +} + +/* + * Perform a "final reduction" in field F256 (field for curve P-256). + * The source value must be less than twice the modulus. If the value + * is not lower than the modulus, then the modulus is subtracted and + * this function returns 1; otherwise, it leaves it untouched and it + * returns 0. + */ +static uint32_t +reduce_final_f256(uint32_t *d) +{ + uint32_t t[9]; + uint32_t cc; + int i; + + cc = 0; + for (i = 0; i < 9; i ++) { + uint32_t w; + + w = d[i] - F256[i] - cc; + cc = w >> 31; + t[i] = w & 0x3FFFFFFF; + } + cc ^= 1; + CCOPY(cc, d, t, sizeof t); + return cc; +} + +/* + * Jacobian coordinates for a point in P-256: affine coordinates (X,Y) + * are such that: + * X = x / z^2 + * Y = y / z^3 + * For the point at infinity, z = 0. + * Each point thus admits many possible representations. + * + * Coordinates are represented in arrays of 32-bit integers, each holding + * 30 bits of data. Values may also be slightly greater than the modulus, + * but they will always be lower than twice the modulus. + */ +typedef struct { + uint32_t x[9]; + uint32_t y[9]; + uint32_t z[9]; +} p256_jacobian; + +/* + * Convert a point to affine coordinates: + * - If the point is the point at infinity, then all three coordinates + * are set to 0. + * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y' + * coordinates are the 'X' and 'Y' affine coordinates. + * The coordinates are guaranteed to be lower than the modulus. + */ +static void +p256_to_affine(p256_jacobian *P) +{ + uint32_t t1[9], t2[9]; + int i; + + /* + * Invert z with a modular exponentiation: the modulus is + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is + * p-2. Exponent bit pattern (from high to low) is: + * - 32 bits of value 1 + * - 31 bits of value 0 + * - 1 bit of value 1 + * - 96 bits of value 0 + * - 94 bits of value 1 + * - 1 bit of value 0 + * - 1 bit of value 1 + * Thus, we precompute z^(2^31-1) to speed things up. + * + * If z = 0 (point at infinity) then the modular exponentiation + * will yield 0, which leads to the expected result (all three + * coordinates set to 0). + */ + + /* + * A simple square-and-multiply for z^(2^31-1). We could save about + * two dozen multiplications here with an addition chain, but + * this would require a bit more code, and extra stack buffers. + */ + memcpy(t1, P->z, sizeof P->z); + for (i = 0; i < 30; i ++) { + square_f256(t1, t1); + mul_f256(t1, t1, P->z); + } + + /* + * Square-and-multiply. Apart from the squarings, we have a few + * multiplications to set bits to 1; we multiply by the original z + * for setting 1 bit, and by t1 for setting 31 bits. + */ + memcpy(t2, P->z, sizeof P->z); + for (i = 1; i < 256; i ++) { + square_f256(t2, t2); + switch (i) { + case 31: + case 190: + case 221: + case 252: + mul_f256(t2, t2, t1); + break; + case 63: + case 253: + case 255: + mul_f256(t2, t2, P->z); + break; + } + } + + /* + * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3. + */ + mul_f256(t1, t2, t2); + mul_f256(P->x, t1, P->x); + mul_f256(t1, t1, t2); + mul_f256(P->y, t1, P->y); + reduce_final_f256(P->x); + reduce_final_f256(P->y); + + /* + * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise + * this will set z to 1. + */ + mul_f256(P->z, P->z, t2); + reduce_final_f256(P->z); +} + +/* + * Double a point in P-256. This function works for all valid points, + * including the point at infinity. + */ +static void +p256_double(p256_jacobian *Q) +{ + /* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * These formulas work for all points, including points of order 2 + * and points at infinity: + * - If y = 0 then z' = 0. But there is no such point in P-256 + * anyway. + * - If z = 0 then z' = 0. + */ + uint32_t t1[9], t2[9], t3[9], t4[9]; + + /* + * Compute z^2 in t1. + */ + square_f256(t1, Q->z); + + /* + * Compute x-z^2 in t2 and x+z^2 in t1. + */ + add_f256(t2, Q->x, t1); + sub_f256(t1, Q->x, t1); + + /* + * Compute 3*(x+z^2)*(x-z^2) in t1. + */ + mul_f256(t3, t1, t2); + add_f256(t1, t3, t3); + add_f256(t1, t3, t1); + + /* + * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + square_f256(t3, Q->y); + add_f256(t3, t3, t3); + mul_f256(t2, Q->x, t3); + add_f256(t2, t2, t2); + + /* + * Compute x' = m^2 - 2*s. + */ + square_f256(Q->x, t1); + sub_f256(Q->x, Q->x, t2); + sub_f256(Q->x, Q->x, t2); + + /* + * Compute z' = 2*y*z. + */ + mul_f256(t4, Q->y, Q->z); + add_f256(Q->z, t4, t4); + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + sub_f256(t2, t2, Q->x); + mul_f256(Q->y, t1, t2); + square_f256(t4, t3); + add_f256(t4, t4, t4); + sub_f256(Q->y, Q->y, t4); +} + +/* + * Add point P2 to point P1. + * + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 but P2 != 0 + * - If P1 != 0 but P2 == 0 + * - If P1 == P2 + * + * In all three cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y coordinate + * - P1 == 0 and P2 == 0 + * - The Y coordinate of one of the points is 0 and the other point is + * the point at infinity. + * + * The third case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + */ +static uint32_t +p256_add(p256_jacobian *P1, const p256_jacobian *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + */ + uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9]; + uint32_t ret; + int i; + + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + square_f256(t3, P2->z); + mul_f256(t1, P1->x, t3); + mul_f256(t4, P2->z, t3); + mul_f256(t3, P1->y, t4); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + square_f256(t4, P1->z); + mul_f256(t2, P2->x, t4); + mul_f256(t5, P1->z, t4); + mul_f256(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + sub_f256(t2, t2, t1); + sub_f256(t4, t4, t3); + reduce_final_f256(t4); + ret = 0; + for (i = 0; i < 9; i ++) { + ret |= t4[i]; + } + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + square_f256(t7, t2); + mul_f256(t6, t1, t7); + mul_f256(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + square_f256(P1->x, t4); + sub_f256(P1->x, P1->x, t5); + sub_f256(P1->x, P1->x, t6); + sub_f256(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + sub_f256(t6, t6, P1->x); + mul_f256(P1->y, t4, t6); + mul_f256(t1, t5, t3); + sub_f256(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1*z2. + */ + mul_f256(t1, P1->z, P2->z); + mul_f256(P1->z, t1, t2); + + return ret; +} + +/* + * Add point P2 to point P1. This is a specialised function for the + * case when P2 is a non-zero point in affine coordinate. + * + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 + * - If P1 == P2 + * + * In both cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y coordinate + * - The Y coordinate of P2 is 0 and P1 is the point at infinity. + * + * The second case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + */ +static uint32_t +p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 + * u2 = x2 * z1^2 + * s1 = y1 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 + */ + uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9]; + uint32_t ret; + int i; + + /* + * Compute u1 = x1 (in t1) and s1 = y1 (in t3). + */ + memcpy(t1, P1->x, sizeof t1); + memcpy(t3, P1->y, sizeof t3); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + square_f256(t4, P1->z); + mul_f256(t2, P2->x, t4); + mul_f256(t5, P1->z, t4); + mul_f256(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + sub_f256(t2, t2, t1); + sub_f256(t4, t4, t3); + reduce_final_f256(t4); + ret = 0; + for (i = 0; i < 9; i ++) { + ret |= t4[i]; + } + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + square_f256(t7, t2); + mul_f256(t6, t1, t7); + mul_f256(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + square_f256(P1->x, t4); + sub_f256(P1->x, P1->x, t5); + sub_f256(P1->x, P1->x, t6); + sub_f256(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + sub_f256(t6, t6, P1->x); + mul_f256(P1->y, t4, t6); + mul_f256(t1, t5, t3); + sub_f256(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1*z2. + */ + mul_f256(P1->z, P1->z, t2); + + return ret; +} + +/* + * Decode a P-256 point. This function does not support the point at + * infinity. Returned value is 0 if the point is invalid, 1 otherwise. + */ +static uint32_t +p256_decode(p256_jacobian *P, const void *src, size_t len) +{ + const unsigned char *buf; + uint32_t tx[9], ty[9], t1[9], t2[9]; + uint32_t bad; + int i; + + if (len != 65) { + return 0; + } + buf = src; + + /* + * First byte must be 0x04 (uncompressed format). We could support + * "hybrid format" (first byte is 0x06 or 0x07, and encodes the + * least significant bit of the Y coordinate), but it is explicitly + * forbidden by RFC 5480 (section 2.2). + */ + bad = NEQ(buf[0], 0x04); + + /* + * Decode the coordinates, and check that they are both lower + * than the modulus. + */ + tx[8] = be8_to_le30(tx, buf + 1, 32); + ty[8] = be8_to_le30(ty, buf + 33, 32); + bad |= reduce_final_f256(tx); + bad |= reduce_final_f256(ty); + + /* + * Check curve equation. + */ + square_f256(t1, tx); + mul_f256(t1, tx, t1); + square_f256(t2, ty); + sub_f256(t1, t1, tx); + sub_f256(t1, t1, tx); + sub_f256(t1, t1, tx); + add_f256(t1, t1, P256_B); + sub_f256(t1, t1, t2); + reduce_final_f256(t1); + for (i = 0; i < 9; i ++) { + bad |= t1[i]; + } + + /* + * Copy coordinates to the point structure. + */ + memcpy(P->x, tx, sizeof tx); + memcpy(P->y, ty, sizeof ty); + memset(P->z, 0, sizeof P->z); + P->z[0] = 1; + return EQ(bad, 0); +} + +/* + * Encode a point into a buffer. This function assumes that the point is + * valid, in affine coordinates, and not the point at infinity. + */ +static void +p256_encode(void *dst, const p256_jacobian *P) +{ + unsigned char *buf; + + buf = dst; + buf[0] = 0x04; + le30_to_be8(buf + 1, 32, P->x); + le30_to_be8(buf + 33, 32, P->y); +} + +/* + * Multiply a curve point by an integer. The integer is assumed to be + * lower than the curve order, and the base point must not be the point + * at infinity. + */ +static void +p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen) +{ + /* + * qz is a flag that is initially 1, and remains equal to 1 + * as long as the point is the point at infinity. + * + * We use a 2-bit window to handle multiplier bits by pairs. + * The precomputed window really is the points P2 and P3. + */ + uint32_t qz; + p256_jacobian P2, P3, Q, T, U; + + /* + * Compute window values. + */ + P2 = *P; + p256_double(&P2); + P3 = *P; + p256_add(&P3, &P2); + + /* + * We start with Q = 0. We process multiplier bits 2 by 2. + */ + memset(&Q, 0, sizeof Q); + qz = 1; + while (xlen -- > 0) { + int k; + + for (k = 6; k >= 0; k -= 2) { + uint32_t bits; + uint32_t bnz; + + p256_double(&Q); + p256_double(&Q); + T = *P; + U = Q; + bits = (*x >> k) & (uint32_t)3; + bnz = NEQ(bits, 0); + CCOPY(EQ(bits, 2), &T, &P2, sizeof T); + CCOPY(EQ(bits, 3), &T, &P3, sizeof T); + p256_add(&U, &T); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + } + x ++; + } + *P = Q; +} + +/* + * Precomputed window: k*G points, where G is the curve generator, and k + * is an integer from 1 to 15 (inclusive). The X and Y coordinates of + * the point are encoded as 9 words of 30 bits each (little-endian + * order). + */ +static const uint32_t Gwin[15][18] = { + + { 0x1898C296, 0x1284E517, 0x1EB33A0F, 0x00DF604B, + 0x2440F277, 0x339B958E, 0x04247F8B, 0x347CB84B, + 0x00006B17, 0x37BF51F5, 0x2ED901A0, 0x3315ECEC, + 0x338CD5DA, 0x0F9E162B, 0x1FAD29F0, 0x27F9B8EE, + 0x10B8BF86, 0x00004FE3 }, + + { 0x07669978, 0x182D23F1, 0x3F21B35A, 0x225A789D, + 0x351AC3C0, 0x08E00C12, 0x34F7E8A5, 0x1EC62340, + 0x00007CF2, 0x227873D1, 0x3812DE74, 0x0E982299, + 0x1F6B798F, 0x3430DBBA, 0x366B1A7D, 0x2D040293, + 0x154436E3, 0x00000777 }, + + { 0x06E7FD6C, 0x2D05986F, 0x3ADA985F, 0x31ADC87B, + 0x0BF165E6, 0x1FBE5475, 0x30A44C8F, 0x3934698C, + 0x00005ECB, 0x227D5032, 0x29E6C49E, 0x04FB83D9, + 0x0AAC0D8E, 0x24A2ECD8, 0x2C1B3869, 0x0FF7E374, + 0x19031266, 0x00008734 }, + + { 0x2B030852, 0x024C0911, 0x05596EF5, 0x07F8B6DE, + 0x262BD003, 0x3779967B, 0x08FBBA02, 0x128D4CB4, + 0x0000E253, 0x184ED8C6, 0x310B08FC, 0x30EE0055, + 0x3F25B0FC, 0x062D764E, 0x3FB97F6A, 0x33CC719D, + 0x15D69318, 0x0000E0F1 }, + + { 0x03D033ED, 0x05552837, 0x35BE5242, 0x2320BF47, + 0x268FDFEF, 0x13215821, 0x140D2D78, 0x02DE9454, + 0x00005159, 0x3DA16DA4, 0x0742ED13, 0x0D80888D, + 0x004BC035, 0x0A79260D, 0x06FCDAFE, 0x2727D8AE, + 0x1F6A2412, 0x0000E0C1 }, + + { 0x3C2291A9, 0x1AC2ABA4, 0x3B215B4C, 0x131D037A, + 0x17DDE302, 0x0C90B2E2, 0x0602C92D, 0x05CA9DA9, + 0x0000B01A, 0x0FC77FE2, 0x35F1214E, 0x07E16BDF, + 0x003DDC07, 0x2703791C, 0x3038B7EE, 0x3DAD56FE, + 0x041D0C8D, 0x0000E85C }, + + { 0x3187B2A3, 0x0018A1C0, 0x00FEF5B3, 0x3E7E2E2A, + 0x01FB607E, 0x2CC199F0, 0x37B4625B, 0x0EDBE82F, + 0x00008E53, 0x01F400B4, 0x15786A1B, 0x3041B21C, + 0x31CD8CF2, 0x35900053, 0x1A7E0E9B, 0x318366D0, + 0x076F780C, 0x000073EB }, + + { 0x1B6FB393, 0x13767707, 0x3CE97DBB, 0x348E2603, + 0x354CADC1, 0x09D0B4EA, 0x1B053404, 0x1DE76FBA, + 0x000062D9, 0x0F09957E, 0x295029A8, 0x3E76A78D, + 0x3B547DAE, 0x27CEE0A2, 0x0575DC45, 0x1D8244FF, + 0x332F647A, 0x0000AD5A }, + + { 0x10949EE0, 0x1E7A292E, 0x06DF8B3D, 0x02B2E30B, + 0x31F8729E, 0x24E35475, 0x30B71878, 0x35EDBFB7, + 0x0000EA68, 0x0DD048FA, 0x21688929, 0x0DE823FE, + 0x1C53FAA9, 0x0EA0C84D, 0x052A592A, 0x1FCE7870, + 0x11325CB2, 0x00002A27 }, + + { 0x04C5723F, 0x30D81A50, 0x048306E4, 0x329B11C7, + 0x223FB545, 0x085347A8, 0x2993E591, 0x1B5ACA8E, + 0x0000CEF6, 0x04AF0773, 0x28D2EEA9, 0x2751EEEC, + 0x037B4A7F, 0x3B4C1059, 0x08F37674, 0x2AE906E1, + 0x18A88A6A, 0x00008786 }, + + { 0x34BC21D1, 0x0CCE474D, 0x15048BF4, 0x1D0BB409, + 0x021CDA16, 0x20DE76C3, 0x34C59063, 0x04EDE20E, + 0x00003ED1, 0x282A3740, 0x0BE3BBF3, 0x29889DAE, + 0x03413697, 0x34C68A09, 0x210EBE93, 0x0C8A224C, + 0x0826B331, 0x00009099 }, + + { 0x0624E3C4, 0x140317BA, 0x2F82C99D, 0x260C0A2C, + 0x25D55179, 0x194DCC83, 0x3D95E462, 0x356F6A05, + 0x0000741D, 0x0D4481D3, 0x2657FC8B, 0x1BA5CA71, + 0x3AE44B0D, 0x07B1548E, 0x0E0D5522, 0x05FDC567, + 0x2D1AA70E, 0x00000770 }, + + { 0x06072C01, 0x23857675, 0x1EAD58A9, 0x0B8A12D9, + 0x1EE2FC79, 0x0177CB61, 0x0495A618, 0x20DEB82B, + 0x0000177C, 0x2FC7BFD8, 0x310EEF8B, 0x1FB4DF39, + 0x3B8530E8, 0x0F4E7226, 0x0246B6D0, 0x2A558A24, + 0x163353AF, 0x000063BB }, + + { 0x24D2920B, 0x1C249DCC, 0x2069C5E5, 0x09AB2F9E, + 0x36DF3CF1, 0x1991FD0C, 0x062B97A7, 0x1E80070E, + 0x000054E7, 0x20D0B375, 0x2E9F20BD, 0x35090081, + 0x1C7A9DDC, 0x22E7C371, 0x087E3016, 0x03175421, + 0x3C6ECA7D, 0x0000F599 }, + + { 0x259B9D5F, 0x0D9A318F, 0x23A0EF16, 0x00EBE4B7, + 0x088265AE, 0x2CDE2666, 0x2BAE7ADF, 0x1371A5C6, + 0x0000F045, 0x0D034F36, 0x1F967378, 0x1B5FA3F4, + 0x0EC8739D, 0x1643E62A, 0x1653947E, 0x22D1F4E6, + 0x0FB8D64B, 0x0000B5B9 } +}; + +/* + * Lookup one of the Gwin[] values, by index. This is constant-time. + */ +static void +lookup_Gwin(p256_jacobian *T, uint32_t idx) +{ + uint32_t xy[18]; + uint32_t k; + size_t u; + + memset(xy, 0, sizeof xy); + for (k = 0; k < 15; k ++) { + uint32_t m; + + m = -EQ(idx, k + 1); + for (u = 0; u < 18; u ++) { + xy[u] |= m & Gwin[k][u]; + } + } + memcpy(T->x, &xy[0], sizeof T->x); + memcpy(T->y, &xy[9], sizeof T->y); + memset(T->z, 0, sizeof T->z); + T->z[0] = 1; +} + +/* + * Multiply the generator by an integer. The integer is assumed non-zero + * and lower than the curve order. + */ +static void +p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen) +{ + /* + * qz is a flag that is initially 1, and remains equal to 1 + * as long as the point is the point at infinity. + * + * We use a 4-bit window to handle multiplier bits by groups + * of 4. The precomputed window is constant static data, with + * points in affine coordinates; we use a constant-time lookup. + */ + p256_jacobian Q; + uint32_t qz; + + memset(&Q, 0, sizeof Q); + qz = 1; + while (xlen -- > 0) { + int k; + unsigned bx; + + bx = *x ++; + for (k = 0; k < 2; k ++) { + uint32_t bits; + uint32_t bnz; + p256_jacobian T, U; + + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + bits = (bx >> 4) & 0x0F; + bnz = NEQ(bits, 0); + lookup_Gwin(&T, bits); + U = Q; + p256_add_mixed(&U, &T); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + bx <<= 4; + } + } + *P = Q; +} + +static const unsigned char P256_G[] = { + 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, + 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, + 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, + 0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, + 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B, + 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40, + 0x68, 0x37, 0xBF, 0x51, 0xF5 +}; + +static const unsigned char P256_N[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, + 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, + 0x25, 0x51 +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_G; + return P256_G; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_N; + return P256_N; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 1; +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *x, size_t xlen, int curve) +{ + uint32_t r; + p256_jacobian P; + + (void)curve; + if (Glen != 65) { + return 0; + } + r = p256_decode(&P, G, Glen); + p256_mul(&P, x, xlen); + p256_to_affine(&P); + p256_encode(G, &P); + return r; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + p256_jacobian P; + + (void)curve; + p256_mulgen(&P, x, xlen); + p256_to_affine(&P); + p256_encode(R, &P); + return 65; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + p256_jacobian P, Q; + uint32_t r, t, z; + int i; + + (void)curve; + if (len != 65) { + return 0; + } + r = p256_decode(&P, A, len); + p256_mul(&P, x, xlen); + if (B == NULL) { + p256_mulgen(&Q, y, ylen); + } else { + r &= p256_decode(&Q, B, len); + p256_mul(&Q, y, ylen); + } + + /* + * The final addition may fail in case both points are equal. + */ + t = p256_add(&P, &Q); + reduce_final_f256(P.z); + z = 0; + for (i = 0; i < 9; i ++) { + z |= P.z[i]; + } + z = EQ(z, 0); + p256_double(&Q); + + /* + * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * z = 0, t = 0 return P (normal addition) + * z = 0, t = 1 return P (normal addition) + * z = 1, t = 0 return Q (a 'double' case) + * z = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(z & ~t, &P, &Q, sizeof Q); + p256_to_affine(&P); + p256_encode(A, &P); + r &= ~(z & t); + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_p256_m31 = { + (uint32_t)0x00800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_p256_m62.c b/third_party/bearssl/src/ec_p256_m62.c new file mode 100644 index 0000000..a431790 --- /dev/null +++ b/third_party/bearssl/src/ec_p256_m62.c @@ -0,0 +1,1765 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#if BR_UMUL128 +#include <intrin.h> +#endif + +static const unsigned char P256_G[] = { + 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, + 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, + 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, + 0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, + 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B, + 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40, + 0x68, 0x37, 0xBF, 0x51, 0xF5 +}; + +static const unsigned char P256_N[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, + 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, + 0x25, 0x51 +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_G; + return P256_G; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_N; + return P256_N; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 1; +} + +/* + * A field element is encoded as five 64-bit integers, in basis 2^52. + * Limbs may occasionally exceed 2^52. + * + * A _partially reduced_ value is such that the following hold: + * - top limb is less than 2^48 + 2^30 + * - the other limbs fit on 53 bits each + * In particular, such a value is less than twice the modulus p. + */ + +#define BIT(n) ((uint64_t)1 << (n)) +#define MASK48 (BIT(48) - BIT(0)) +#define MASK52 (BIT(52) - BIT(0)) + +/* R = 2^260 mod p */ +static const uint64_t F256_R[] = { + 0x0000000000010, 0xF000000000000, 0xFFFFFFFFFFFFF, + 0xFFEFFFFFFFFFF, 0x00000000FFFFF +}; + +/* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p + (Montgomery representation of B). */ +static const uint64_t P256_B_MONTY[] = { + 0xDF6229C4BDDFD, 0xCA8843090D89C, 0x212ED6ACF005C, + 0x83415A220ABF7, 0x0C30061DD4874 +}; + +/* + * Addition in the field. Carry propagation is not performed. + * On input, limbs may be up to 63 bits each; on output, they will + * be up to one bit more than on input. + */ +static inline void +f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ + d[0] = a[0] + b[0]; + d[1] = a[1] + b[1]; + d[2] = a[2] + b[2]; + d[3] = a[3] + b[3]; + d[4] = a[4] + b[4]; +} + +/* + * Partially reduce the provided value. + * Input: limbs can go up to 61 bits each. + * Output: partially reduced. + */ +static inline void +f256_partial_reduce(uint64_t *a) +{ + uint64_t w, cc, s; + + /* + * Propagate carries. + */ + w = a[0]; + a[0] = w & MASK52; + cc = w >> 52; + w = a[1] + cc; + a[1] = w & MASK52; + cc = w >> 52; + w = a[2] + cc; + a[2] = w & MASK52; + cc = w >> 52; + w = a[3] + cc; + a[3] = w & MASK52; + cc = w >> 52; + a[4] += cc; + + s = a[4] >> 48; /* s < 2^14 */ + a[0] += s; /* a[0] < 2^52 + 2^14 */ + w = a[1] - (s << 44); + a[1] = w & MASK52; /* a[1] < 2^52 */ + cc = -(w >> 52) & 0xFFF; /* cc < 16 */ + w = a[2] - cc; + a[2] = w & MASK52; /* a[2] < 2^52 */ + cc = w >> 63; /* cc = 0 or 1 */ + w = a[3] - cc - (s << 36); + a[3] = w & MASK52; /* a[3] < 2^52 */ + cc = w >> 63; /* cc = 0 or 1 */ + w = a[4] & MASK48; + a[4] = w + (s << 16) - cc; /* a[4] < 2^48 + 2^30 */ +} + +/* + * Subtraction in the field. + * Input: limbs must fit on 60 bits each; in particular, the complete + * integer will be less than 2^268 + 2^217. + * Output: partially reduced. + */ +static inline void +f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ + uint64_t t[5], w, s, cc; + + /* + * We compute d = 2^13*p + a - b; this ensures a positive + * intermediate value. + * + * Each individual addition/subtraction may yield a positive or + * negative result; thus, we need to handle a signed carry, thus + * with sign extension. We prefer not to use signed types (int64_t) + * because conversion from unsigned to signed is cumbersome (a + * direct cast with the top bit set is undefined behavior; instead, + * we have to use pointer aliasing, using the guaranteed properties + * of exact-width types, but this requires the compiler to optimize + * away the writes and reads from RAM), and right-shifting a + * signed negative value is implementation-defined. Therefore, + * we use a custom sign extension. + */ + + w = a[0] - b[0] - BIT(13); + t[0] = w & MASK52; + cc = w >> 52; + cc |= -(cc & BIT(11)); + w = a[1] - b[1] + cc; + t[1] = w & MASK52; + cc = w >> 52; + cc |= -(cc & BIT(11)); + w = a[2] - b[2] + cc; + t[2] = (w & MASK52) + BIT(5); + cc = w >> 52; + cc |= -(cc & BIT(11)); + w = a[3] - b[3] + cc; + t[3] = (w & MASK52) + BIT(49); + cc = w >> 52; + cc |= -(cc & BIT(11)); + t[4] = (BIT(61) - BIT(29)) + a[4] - b[4] + cc; + + /* + * Perform partial reduction. Rule is: + * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + * + * At that point: + * 0 <= t[0] <= 2^52 - 1 + * 0 <= t[1] <= 2^52 - 1 + * 2^5 <= t[2] <= 2^52 + 2^5 - 1 + * 2^49 <= t[3] <= 2^52 + 2^49 - 1 + * 2^59 < t[4] <= 2^61 + 2^60 - 2^29 + * + * Thus, the value 's' (t[4] / 2^48) will be necessarily + * greater than 2048, and less than 12288. + */ + s = t[4] >> 48; + + d[0] = t[0] + s; /* d[0] <= 2^52 + 12287 */ + w = t[1] - (s << 44); + d[1] = w & MASK52; /* d[1] <= 2^52 - 1 */ + cc = -(w >> 52) & 0xFFF; /* cc <= 48 */ + w = t[2] - cc; + cc = w >> 63; /* cc = 0 or 1 */ + d[2] = w + (cc << 52); /* d[2] <= 2^52 + 31 */ + w = t[3] - cc - (s << 36); + cc = w >> 63; /* cc = 0 or 1 */ + d[3] = w + (cc << 52); /* t[3] <= 2^52 + 2^49 - 1 */ + d[4] = (t[4] & MASK48) + (s << 16) - cc; /* d[4] < 2^48 + 2^30 */ + + /* + * If s = 0, then none of the limbs is modified, and there cannot + * be an overflow; if s != 0, then (s << 16) > cc, and there is + * no overflow either. + */ +} + +/* + * Montgomery multiplication in the field. + * Input: limbs must fit on 56 bits each. + * Output: partially reduced. + */ +static void +f256_montymul(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ +#if BR_INT128 + + int i; + uint64_t t[5]; + + t[0] = 0; + t[1] = 0; + t[2] = 0; + t[3] = 0; + t[4] = 0; + for (i = 0; i < 5; i ++) { + uint64_t x, f, cc, w, s; + unsigned __int128 z; + + /* + * Since limbs of a[] and b[] fit on 56 bits each, + * each individual product fits on 112 bits. Also, + * the factor f fits on 52 bits, so f<<48 fits on + * 112 bits too. This guarantees that carries (cc) + * will fit on 62 bits, thus no overflow. + * + * The operations below compute: + * t <- (t + x*b + f*p) / 2^64 + */ + x = a[i]; + z = (unsigned __int128)b[0] * (unsigned __int128)x + + (unsigned __int128)t[0]; + f = (uint64_t)z & MASK52; + cc = (uint64_t)(z >> 52); + z = (unsigned __int128)b[1] * (unsigned __int128)x + + (unsigned __int128)t[1] + cc + + ((unsigned __int128)f << 44); + t[0] = (uint64_t)z & MASK52; + cc = (uint64_t)(z >> 52); + z = (unsigned __int128)b[2] * (unsigned __int128)x + + (unsigned __int128)t[2] + cc; + t[1] = (uint64_t)z & MASK52; + cc = (uint64_t)(z >> 52); + z = (unsigned __int128)b[3] * (unsigned __int128)x + + (unsigned __int128)t[3] + cc + + ((unsigned __int128)f << 36); + t[2] = (uint64_t)z & MASK52; + cc = (uint64_t)(z >> 52); + z = (unsigned __int128)b[4] * (unsigned __int128)x + + (unsigned __int128)t[4] + cc + + ((unsigned __int128)f << 48) + - ((unsigned __int128)f << 16); + t[3] = (uint64_t)z & MASK52; + t[4] = (uint64_t)(z >> 52); + + /* + * t[4] may be up to 62 bits here; we need to do a + * partial reduction. Note that limbs t[0] to t[3] + * fit on 52 bits each. + */ + s = t[4] >> 48; /* s < 2^14 */ + t[0] += s; /* t[0] < 2^52 + 2^14 */ + w = t[1] - (s << 44); + t[1] = w & MASK52; /* t[1] < 2^52 */ + cc = -(w >> 52) & 0xFFF; /* cc < 16 */ + w = t[2] - cc; + t[2] = w & MASK52; /* t[2] < 2^52 */ + cc = w >> 63; /* cc = 0 or 1 */ + w = t[3] - cc - (s << 36); + t[3] = w & MASK52; /* t[3] < 2^52 */ + cc = w >> 63; /* cc = 0 or 1 */ + w = t[4] & MASK48; + t[4] = w + (s << 16) - cc; /* t[4] < 2^48 + 2^30 */ + + /* + * The final t[4] cannot overflow because cc is 0 or 1, + * and cc can be 1 only if s != 0. + */ + } + + d[0] = t[0]; + d[1] = t[1]; + d[2] = t[2]; + d[3] = t[3]; + d[4] = t[4]; + +#elif BR_UMUL128 + + int i; + uint64_t t[5]; + + t[0] = 0; + t[1] = 0; + t[2] = 0; + t[3] = 0; + t[4] = 0; + for (i = 0; i < 5; i ++) { + uint64_t x, f, cc, w, s, zh, zl; + unsigned char k; + + /* + * Since limbs of a[] and b[] fit on 56 bits each, + * each individual product fits on 112 bits. Also, + * the factor f fits on 52 bits, so f<<48 fits on + * 112 bits too. This guarantees that carries (cc) + * will fit on 62 bits, thus no overflow. + * + * The operations below compute: + * t <- (t + x*b + f*p) / 2^64 + */ + x = a[i]; + zl = _umul128(b[0], x, &zh); + k = _addcarry_u64(0, t[0], zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + f = zl & MASK52; + cc = (zl >> 52) | (zh << 12); + + zl = _umul128(b[1], x, &zh); + k = _addcarry_u64(0, t[1], zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, cc, zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, f << 44, zl, &zl); + (void)_addcarry_u64(k, f >> 20, zh, &zh); + t[0] = zl & MASK52; + cc = (zl >> 52) | (zh << 12); + + zl = _umul128(b[2], x, &zh); + k = _addcarry_u64(0, t[2], zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, cc, zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + t[1] = zl & MASK52; + cc = (zl >> 52) | (zh << 12); + + zl = _umul128(b[3], x, &zh); + k = _addcarry_u64(0, t[3], zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, cc, zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, f << 36, zl, &zl); + (void)_addcarry_u64(k, f >> 28, zh, &zh); + t[2] = zl & MASK52; + cc = (zl >> 52) | (zh << 12); + + zl = _umul128(b[4], x, &zh); + k = _addcarry_u64(0, t[4], zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, cc, zl, &zl); + (void)_addcarry_u64(k, 0, zh, &zh); + k = _addcarry_u64(0, f << 48, zl, &zl); + (void)_addcarry_u64(k, f >> 16, zh, &zh); + k = _subborrow_u64(0, zl, f << 16, &zl); + (void)_subborrow_u64(k, zh, f >> 48, &zh); + t[3] = zl & MASK52; + t[4] = (zl >> 52) | (zh << 12); + + /* + * t[4] may be up to 62 bits here; we need to do a + * partial reduction. Note that limbs t[0] to t[3] + * fit on 52 bits each. + */ + s = t[4] >> 48; /* s < 2^14 */ + t[0] += s; /* t[0] < 2^52 + 2^14 */ + w = t[1] - (s << 44); + t[1] = w & MASK52; /* t[1] < 2^52 */ + cc = -(w >> 52) & 0xFFF; /* cc < 16 */ + w = t[2] - cc; + t[2] = w & MASK52; /* t[2] < 2^52 */ + cc = w >> 63; /* cc = 0 or 1 */ + w = t[3] - cc - (s << 36); + t[3] = w & MASK52; /* t[3] < 2^52 */ + cc = w >> 63; /* cc = 0 or 1 */ + w = t[4] & MASK48; + t[4] = w + (s << 16) - cc; /* t[4] < 2^48 + 2^30 */ + + /* + * The final t[4] cannot overflow because cc is 0 or 1, + * and cc can be 1 only if s != 0. + */ + } + + d[0] = t[0]; + d[1] = t[1]; + d[2] = t[2]; + d[3] = t[3]; + d[4] = t[4]; + +#endif +} + +/* + * Montgomery squaring in the field; currently a basic wrapper around + * multiplication (inline, should be optimized away). + * TODO: see if some extra speed can be gained here. + */ +static inline void +f256_montysquare(uint64_t *d, const uint64_t *a) +{ + f256_montymul(d, a, a); +} + +/* + * Convert to Montgomery representation. + */ +static void +f256_tomonty(uint64_t *d, const uint64_t *a) +{ + /* + * R2 = 2^520 mod p. + * If R = 2^260 mod p, then R2 = R^2 mod p; and the Montgomery + * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the + * conversion to Montgomery representation. + */ + static const uint64_t R2[] = { + 0x0000000000300, 0xFFFFFFFF00000, 0xFFFFEFFFFFFFB, + 0xFDFFFFFFFFFFF, 0x0000004FFFFFF + }; + + f256_montymul(d, a, R2); +} + +/* + * Convert from Montgomery representation. + */ +static void +f256_frommonty(uint64_t *d, const uint64_t *a) +{ + /* + * Montgomery multiplication by 1 is division by 2^260 modulo p. + */ + static const uint64_t one[] = { 1, 0, 0, 0, 0 }; + + f256_montymul(d, a, one); +} + +/* + * Inversion in the field. If the source value is 0 modulo p, then this + * returns 0 or p. This function uses Montgomery representation. + */ +static void +f256_invert(uint64_t *d, const uint64_t *a) +{ + /* + * We compute a^(p-2) mod p. The exponent pattern (from high to + * low) is: + * - 32 bits of value 1 + * - 31 bits of value 0 + * - 1 bit of value 1 + * - 96 bits of value 0 + * - 94 bits of value 1 + * - 1 bit of value 0 + * - 1 bit of value 1 + * To speed up the square-and-multiply algorithm, we precompute + * a^(2^31-1). + */ + + uint64_t r[5], t[5]; + int i; + + memcpy(t, a, sizeof t); + for (i = 0; i < 30; i ++) { + f256_montysquare(t, t); + f256_montymul(t, t, a); + } + + memcpy(r, t, sizeof t); + for (i = 224; i >= 0; i --) { + f256_montysquare(r, r); + switch (i) { + case 0: + case 2: + case 192: + case 224: + f256_montymul(r, r, a); + break; + case 3: + case 34: + case 65: + f256_montymul(r, r, t); + break; + } + } + memcpy(d, r, sizeof r); +} + +/* + * Finalize reduction. + * Input value should be partially reduced. + * On output, limbs a[0] to a[3] fit on 52 bits each, limb a[4] fits + * on 48 bits, and the integer is less than p. + */ +static inline void +f256_final_reduce(uint64_t *a) +{ + uint64_t r[5], t[5], w, cc; + int i; + + /* + * Propagate carries to ensure that limbs 0 to 3 fit on 52 bits. + */ + cc = 0; + for (i = 0; i < 5; i ++) { + w = a[i] + cc; + r[i] = w & MASK52; + cc = w >> 52; + } + + /* + * We compute t = r + (2^256 - p) = r + 2^224 - 2^192 - 2^96 + 1. + * If t < 2^256, then r < p, and we return r. Otherwise, we + * want to return r - p = t - 2^256. + */ + + /* + * Add 2^224 + 1, and propagate carries to ensure that limbs + * t[0] to t[3] fit in 52 bits each. + */ + w = r[0] + 1; + t[0] = w & MASK52; + cc = w >> 52; + w = r[1] + cc; + t[1] = w & MASK52; + cc = w >> 52; + w = r[2] + cc; + t[2] = w & MASK52; + cc = w >> 52; + w = r[3] + cc; + t[3] = w & MASK52; + cc = w >> 52; + t[4] = r[4] + cc + BIT(16); + + /* + * Subtract 2^192 + 2^96. Since we just added 2^224 + 1, the + * result cannot be negative. + */ + w = t[1] - BIT(44); + t[1] = w & MASK52; + cc = w >> 63; + w = t[2] - cc; + t[2] = w & MASK52; + cc = w >> 63; + w = t[3] - BIT(36) - cc; + t[3] = w & MASK52; + cc = w >> 63; + t[4] -= cc; + + /* + * If the top limb t[4] fits on 48 bits, then r[] is already + * in the proper range. Otherwise, t[] is the value to return + * (truncated to 256 bits). + */ + cc = -(t[4] >> 48); + t[4] &= MASK48; + for (i = 0; i < 5; i ++) { + a[i] = r[i] ^ (cc & (r[i] ^ t[i])); + } +} + +/* + * Points in affine and Jacobian coordinates. + * + * - In affine coordinates, the point-at-infinity cannot be encoded. + * - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3); + * if Z = 0 then this is the point-at-infinity. + */ +typedef struct { + uint64_t x[5]; + uint64_t y[5]; +} p256_affine; + +typedef struct { + uint64_t x[5]; + uint64_t y[5]; + uint64_t z[5]; +} p256_jacobian; + +/* + * Decode a field element (unsigned big endian notation). + */ +static void +f256_decode(uint64_t *a, const unsigned char *buf) +{ + uint64_t w0, w1, w2, w3; + + w3 = br_dec64be(buf + 0); + w2 = br_dec64be(buf + 8); + w1 = br_dec64be(buf + 16); + w0 = br_dec64be(buf + 24); + a[0] = w0 & MASK52; + a[1] = ((w0 >> 52) | (w1 << 12)) & MASK52; + a[2] = ((w1 >> 40) | (w2 << 24)) & MASK52; + a[3] = ((w2 >> 28) | (w3 << 36)) & MASK52; + a[4] = w3 >> 16; +} + +/* + * Encode a field element (unsigned big endian notation). The field + * element MUST be fully reduced. + */ +static void +f256_encode(unsigned char *buf, const uint64_t *a) +{ + uint64_t w0, w1, w2, w3; + + w0 = a[0] | (a[1] << 52); + w1 = (a[1] >> 12) | (a[2] << 40); + w2 = (a[2] >> 24) | (a[3] << 28); + w3 = (a[3] >> 36) | (a[4] << 16); + br_enc64be(buf + 0, w3); + br_enc64be(buf + 8, w2); + br_enc64be(buf + 16, w1); + br_enc64be(buf + 24, w0); +} + +/* + * Decode a point. The returned point is in Jacobian coordinates, but + * with z = 1. If the encoding is invalid, or encodes a point which is + * not on the curve, or encodes the point at infinity, then this function + * returns 0. Otherwise, 1 is returned. + * + * The buffer is assumed to have length exactly 65 bytes. + */ +static uint32_t +point_decode(p256_jacobian *P, const unsigned char *buf) +{ + uint64_t x[5], y[5], t[5], x3[5], tt; + uint32_t r; + + /* + * Header byte shall be 0x04. + */ + r = EQ(buf[0], 0x04); + + /* + * Decode X and Y coordinates, and convert them into + * Montgomery representation. + */ + f256_decode(x, buf + 1); + f256_decode(y, buf + 33); + f256_tomonty(x, x); + f256_tomonty(y, y); + + /* + * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3. + * Note that the Montgomery representation of 0 is 0. We must + * take care to apply the final reduction to make sure we have + * 0 and not p. + */ + f256_montysquare(t, y); + f256_montysquare(x3, x); + f256_montymul(x3, x3, x); + f256_sub(t, t, x3); + f256_add(t, t, x); + f256_add(t, t, x); + f256_add(t, t, x); + f256_sub(t, t, P256_B_MONTY); + f256_final_reduce(t); + tt = t[0] | t[1] | t[2] | t[3] | t[4]; + r &= EQ((uint32_t)(tt | (tt >> 32)), 0); + + /* + * Return the point in Jacobian coordinates (and Montgomery + * representation). + */ + memcpy(P->x, x, sizeof x); + memcpy(P->y, y, sizeof y); + memcpy(P->z, F256_R, sizeof F256_R); + return r; +} + +/* + * Final conversion for a point: + * - The point is converted back to affine coordinates. + * - Final reduction is performed. + * - The point is encoded into the provided buffer. + * + * If the point is the point-at-infinity, all operations are performed, + * but the buffer contents are indeterminate, and 0 is returned. Otherwise, + * the encoded point is written in the buffer, and 1 is returned. + */ +static uint32_t +point_encode(unsigned char *buf, const p256_jacobian *P) +{ + uint64_t t1[5], t2[5], z; + + /* Set t1 = 1/z^2 and t2 = 1/z^3. */ + f256_invert(t2, P->z); + f256_montysquare(t1, t2); + f256_montymul(t2, t2, t1); + + /* Compute affine coordinates x (in t1) and y (in t2). */ + f256_montymul(t1, P->x, t1); + f256_montymul(t2, P->y, t2); + + /* Convert back from Montgomery representation, and finalize + reductions. */ + f256_frommonty(t1, t1); + f256_frommonty(t2, t2); + f256_final_reduce(t1); + f256_final_reduce(t2); + + /* Encode. */ + buf[0] = 0x04; + f256_encode(buf + 1, t1); + f256_encode(buf + 33, t2); + + /* Return success if and only if P->z != 0. */ + z = P->z[0] | P->z[1] | P->z[2] | P->z[3] | P->z[4]; + return NEQ((uint32_t)(z | z >> 32), 0); +} + +/* + * Point doubling in Jacobian coordinates: point P is doubled. + * Note: if the source point is the point-at-infinity, then the result is + * still the point-at-infinity, which is correct. Moreover, if the three + * coordinates were zero, then they still are zero in the returned value. + */ +static void +p256_double(p256_jacobian *P) +{ + /* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * These formulas work for all points, including points of order 2 + * and points at infinity: + * - If y = 0 then z' = 0. But there is no such point in P-256 + * anyway. + * - If z = 0 then z' = 0. + */ + uint64_t t1[5], t2[5], t3[5], t4[5]; + + /* + * Compute z^2 in t1. + */ + f256_montysquare(t1, P->z); + + /* + * Compute x-z^2 in t2 and x+z^2 in t1. + */ + f256_add(t2, P->x, t1); + f256_sub(t1, P->x, t1); + + /* + * Compute 3*(x+z^2)*(x-z^2) in t1. + */ + f256_montymul(t3, t1, t2); + f256_add(t1, t3, t3); + f256_add(t1, t3, t1); + + /* + * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + f256_montysquare(t3, P->y); + f256_add(t3, t3, t3); + f256_montymul(t2, P->x, t3); + f256_add(t2, t2, t2); + + /* + * Compute x' = m^2 - 2*s. + */ + f256_montysquare(P->x, t1); + f256_sub(P->x, P->x, t2); + f256_sub(P->x, P->x, t2); + + /* + * Compute z' = 2*y*z. + */ + f256_montymul(t4, P->y, P->z); + f256_add(P->z, t4, t4); + f256_partial_reduce(P->z); + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + f256_sub(t2, t2, P->x); + f256_montymul(P->y, t1, t2); + f256_montysquare(t4, t3); + f256_add(t4, t4, t4); + f256_sub(P->y, P->y, t4); +} + +/* + * Point addition (Jacobian coordinates): P1 is replaced with P1+P2. + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 but P2 != 0 + * - If P1 != 0 but P2 == 0 + * - If P1 == P2 + * + * In all three cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y coordinate. + * - P1 == 0 and P2 == 0. + * - The Y coordinate of one of the points is 0 and the other point is + * the point at infinity. + * + * The third case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + * + * Note that you can get a returned value of 0 with a correct result, + * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates. + */ +static uint32_t +p256_add(p256_jacobian *P1, const p256_jacobian *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + */ + uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt; + uint32_t ret; + + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + f256_montysquare(t3, P2->z); + f256_montymul(t1, P1->x, t3); + f256_montymul(t4, P2->z, t3); + f256_montymul(t3, P1->y, t4); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + f256_montysquare(t4, P1->z); + f256_montymul(t2, P2->x, t4); + f256_montymul(t5, P1->z, t4); + f256_montymul(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + f256_sub(t2, t2, t1); + f256_sub(t4, t4, t3); + f256_final_reduce(t4); + tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4]; + ret = (uint32_t)(tt | (tt >> 32)); + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + f256_montysquare(t7, t2); + f256_montymul(t6, t1, t7); + f256_montymul(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + f256_montysquare(P1->x, t4); + f256_sub(P1->x, P1->x, t5); + f256_sub(P1->x, P1->x, t6); + f256_sub(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + f256_sub(t6, t6, P1->x); + f256_montymul(P1->y, t4, t6); + f256_montymul(t1, t5, t3); + f256_sub(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1*z2. + */ + f256_montymul(t1, P1->z, P2->z); + f256_montymul(P1->z, t1, t2); + + return ret; +} + +/* + * Point addition (mixed coordinates): P1 is replaced with P1+P2. + * This is a specialised function for the case when P2 is a non-zero point + * in affine coordinates. + * + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 + * - If P1 == P2 + * + * In both cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y (affine) coordinate. + * - The Y coordinate of P2 is 0 and P1 is the point at infinity. + * + * The second case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + * + * Again, a value of 0 may be returned in some cases where the addition + * result is correct. + */ +static uint32_t +p256_add_mixed(p256_jacobian *P1, const p256_affine *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 + * u2 = x2 * z1^2 + * s1 = y1 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 + */ + uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt; + uint32_t ret; + + /* + * Compute u1 = x1 (in t1) and s1 = y1 (in t3). + */ + memcpy(t1, P1->x, sizeof t1); + memcpy(t3, P1->y, sizeof t3); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + f256_montysquare(t4, P1->z); + f256_montymul(t2, P2->x, t4); + f256_montymul(t5, P1->z, t4); + f256_montymul(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + f256_sub(t2, t2, t1); + f256_sub(t4, t4, t3); + f256_final_reduce(t4); + tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4]; + ret = (uint32_t)(tt | (tt >> 32)); + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + f256_montysquare(t7, t2); + f256_montymul(t6, t1, t7); + f256_montymul(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + f256_montysquare(P1->x, t4); + f256_sub(P1->x, P1->x, t5); + f256_sub(P1->x, P1->x, t6); + f256_sub(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + f256_sub(t6, t6, P1->x); + f256_montymul(P1->y, t4, t6); + f256_montymul(t1, t5, t3); + f256_sub(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1*z2. + */ + f256_montymul(P1->z, P1->z, t2); + + return ret; +} + +#if 0 +/* unused */ +/* + * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2. + * This is a specialised function for the case when P2 is a non-zero point + * in affine coordinates. + * + * This function returns the correct result in all cases. + */ +static uint32_t +p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2) +{ + /* + * Addtions formulas, in the general case, are: + * + * u1 = x1 + * u2 = x2 * z1^2 + * s1 = y1 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 + * + * These formulas mishandle the two following cases: + * + * - If P1 is the point-at-infinity (z1 = 0), then z3 is + * incorrectly set to 0. + * + * - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3 + * are all set to 0. + * + * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then + * we correctly get z3 = 0 (the point-at-infinity). + * + * To fix the case P1 = 0, we perform at the end a copy of P2 + * over P1, conditional to z1 = 0. + * + * For P1 = P2: in that case, both h and r are set to 0, and + * we get x3, y3 and z3 equal to 0. We can test for that + * occurrence to make a mask which will be all-one if P1 = P2, + * or all-zero otherwise; then we can compute the double of P2 + * and add it, combined with the mask, to (x3,y3,z3). + * + * Using the doubling formulas in p256_double() on (x2,y2), + * simplifying since P2 is affine (i.e. z2 = 1, implicitly), + * we get: + * s = 4*x2*y2^2 + * m = 3*(x2 + 1)*(x2 - 1) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y2^4 + * z' = 2*y2 + * which requires only 6 multiplications. Added to the 11 + * multiplications of the normal mixed addition in Jacobian + * coordinates, we get a cost of 17 multiplications in total. + */ + uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt, zz; + int i; + + /* + * Set zz to -1 if P1 is the point at infinity, 0 otherwise. + */ + zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3] | P1->z[4]; + zz = ((zz | -zz) >> 63) - (uint64_t)1; + + /* + * Compute u1 = x1 (in t1) and s1 = y1 (in t3). + */ + memcpy(t1, P1->x, sizeof t1); + memcpy(t3, P1->y, sizeof t3); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + f256_montysquare(t4, P1->z); + f256_montymul(t2, P2->x, t4); + f256_montymul(t5, P1->z, t4); + f256_montymul(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * reduce. + */ + f256_sub(t2, t2, t1); + f256_sub(t4, t4, t3); + + /* + * If both h = 0 and r = 0, then P1 = P2, and we want to set + * the mask tt to -1; otherwise, the mask will be 0. + */ + f256_final_reduce(t2); + f256_final_reduce(t4); + tt = t2[0] | t2[1] | t2[2] | t2[3] | t2[4] + | t4[0] | t4[1] | t4[2] | t4[3] | t4[4]; + tt = ((tt | -tt) >> 63) - (uint64_t)1; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + f256_montysquare(t7, t2); + f256_montymul(t6, t1, t7); + f256_montymul(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + f256_montysquare(P1->x, t4); + f256_sub(P1->x, P1->x, t5); + f256_sub(P1->x, P1->x, t6); + f256_sub(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + f256_sub(t6, t6, P1->x); + f256_montymul(P1->y, t4, t6); + f256_montymul(t1, t5, t3); + f256_sub(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1. + */ + f256_montymul(P1->z, P1->z, t2); + + /* + * The "double" result, in case P1 = P2. + */ + + /* + * Compute z' = 2*y2 (in t1). + */ + f256_add(t1, P2->y, P2->y); + f256_partial_reduce(t1); + + /* + * Compute 2*(y2^2) (in t2) and s = 4*x2*(y2^2) (in t3). + */ + f256_montysquare(t2, P2->y); + f256_add(t2, t2, t2); + f256_add(t3, t2, t2); + f256_montymul(t3, P2->x, t3); + + /* + * Compute m = 3*(x2^2 - 1) (in t4). + */ + f256_montysquare(t4, P2->x); + f256_sub(t4, t4, F256_R); + f256_add(t5, t4, t4); + f256_add(t4, t4, t5); + + /* + * Compute x' = m^2 - 2*s (in t5). + */ + f256_montysquare(t5, t4); + f256_sub(t5, t3); + f256_sub(t5, t3); + + /* + * Compute y' = m*(s - x') - 8*y2^4 (in t6). + */ + f256_sub(t6, t3, t5); + f256_montymul(t6, t6, t4); + f256_montysquare(t7, t2); + f256_sub(t6, t6, t7); + f256_sub(t6, t6, t7); + + /* + * We now have the alternate (doubling) coordinates in (t5,t6,t1). + * We combine them with (x3,y3,z3). + */ + for (i = 0; i < 5; i ++) { + P1->x[i] |= tt & t5[i]; + P1->y[i] |= tt & t6[i]; + P1->z[i] |= tt & t1[i]; + } + + /* + * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0, + * then we want to replace the result with a copy of P2. The + * test on z1 was done at the start, in the zz mask. + */ + for (i = 0; i < 5; i ++) { + P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]); + P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]); + P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]); + } +} +#endif + +/* + * Inner function for computing a point multiplication. A window is + * provided, with points 1*P to 15*P in affine coordinates. + * + * Assumptions: + * - All provided points are valid points on the curve. + * - Multiplier is non-zero, and smaller than the curve order. + * - Everything is in Montgomery representation. + */ +static void +point_mul_inner(p256_jacobian *R, const p256_affine *W, + const unsigned char *k, size_t klen) +{ + p256_jacobian Q; + uint32_t qz; + + memset(&Q, 0, sizeof Q); + qz = 1; + while (klen -- > 0) { + int i; + unsigned bk; + + bk = *k ++; + for (i = 0; i < 2; i ++) { + uint32_t bits; + uint32_t bnz; + p256_affine T; + p256_jacobian U; + uint32_t n; + int j; + uint64_t m; + + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + bits = (bk >> 4) & 0x0F; + bnz = NEQ(bits, 0); + + /* + * Lookup point in window. If the bits are 0, + * we get something invalid, which is not a + * problem because we will use it only if the + * bits are non-zero. + */ + memset(&T, 0, sizeof T); + for (n = 0; n < 15; n ++) { + m = -(uint64_t)EQ(bits, n + 1); + T.x[0] |= m & W[n].x[0]; + T.x[1] |= m & W[n].x[1]; + T.x[2] |= m & W[n].x[2]; + T.x[3] |= m & W[n].x[3]; + T.x[4] |= m & W[n].x[4]; + T.y[0] |= m & W[n].y[0]; + T.y[1] |= m & W[n].y[1]; + T.y[2] |= m & W[n].y[2]; + T.y[3] |= m & W[n].y[3]; + T.y[4] |= m & W[n].y[4]; + } + + U = Q; + p256_add_mixed(&U, &T); + + /* + * If qz is still 1, then Q was all-zeros, and this + * is conserved through p256_double(). + */ + m = -(uint64_t)(bnz & qz); + for (j = 0; j < 5; j ++) { + Q.x[j] ^= m & (Q.x[j] ^ T.x[j]); + Q.y[j] ^= m & (Q.y[j] ^ T.y[j]); + Q.z[j] ^= m & (Q.z[j] ^ F256_R[j]); + } + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + bk <<= 4; + } + } + *R = Q; +} + +/* + * Convert a window from Jacobian to affine coordinates. A single + * field inversion is used. This function works for windows up to + * 32 elements. + * + * The destination array (aff[]) and the source array (jac[]) may + * overlap, provided that the start of aff[] is not after the start of + * jac[]. Even if the arrays do _not_ overlap, the source array is + * modified. + */ +static void +window_to_affine(p256_affine *aff, p256_jacobian *jac, int num) +{ + /* + * Convert the window points to affine coordinates. We use the + * following trick to mutualize the inversion computation: if + * we have z1, z2, z3, and z4, and want to invert all of them, + * we compute u = 1/(z1*z2*z3*z4), and then we have: + * 1/z1 = u*z2*z3*z4 + * 1/z2 = u*z1*z3*z4 + * 1/z3 = u*z1*z2*z4 + * 1/z4 = u*z1*z2*z3 + * + * The partial products are computed recursively: + * + * - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2 + * - on input (z_1,z_2,... z_n): + * recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1 + * recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2 + * multiply elements of r1 by m2 -> s1 + * multiply elements of r2 by m1 -> s2 + * return r1||r2 and m1*m2 + * + * In the example below, we suppose that we have 14 elements. + * Let z1, z2,... zE be the 14 values to invert (index noted in + * hexadecimal, starting at 1). + * + * - Depth 1: + * swap(z1, z2); z12 = z1*z2 + * swap(z3, z4); z34 = z3*z4 + * swap(z5, z6); z56 = z5*z6 + * swap(z7, z8); z78 = z7*z8 + * swap(z9, zA); z9A = z9*zA + * swap(zB, zC); zBC = zB*zC + * swap(zD, zE); zDE = zD*zE + * + * - Depth 2: + * z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12 + * z1234 = z12*z34 + * z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56 + * z5678 = z56*z78 + * z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A + * z9ABC = z9A*zBC + * + * - Depth 3: + * z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678 + * z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234 + * z12345678 = z1234*z5678 + * z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE + * zD <- zD*z9ABC, zE*z9ABC + * z9ABCDE = z9ABC*zDE + * + * - Depth 4: + * multiply z1..z8 by z9ABCDE + * multiply z9..zE by z12345678 + * final z = z12345678*z9ABCDE + */ + + uint64_t z[16][5]; + int i, k, s; +#define zt (z[15]) +#define zu (z[14]) +#define zv (z[13]) + + /* + * First recursion step (pairwise swapping and multiplication). + * If there is an odd number of elements, then we "invent" an + * extra one with coordinate Z = 1 (in Montgomery representation). + */ + for (i = 0; (i + 1) < num; i += 2) { + memcpy(zt, jac[i].z, sizeof zt); + memcpy(jac[i].z, jac[i + 1].z, sizeof zt); + memcpy(jac[i + 1].z, zt, sizeof zt); + f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z); + } + if ((num & 1) != 0) { + memcpy(z[num >> 1], jac[num - 1].z, sizeof zt); + memcpy(jac[num - 1].z, F256_R, sizeof F256_R); + } + + /* + * Perform further recursion steps. At the entry of each step, + * the process has been done for groups of 's' points. The + * integer k is the log2 of s. + */ + for (k = 1, s = 2; s < num; k ++, s <<= 1) { + int n; + + for (i = 0; i < num; i ++) { + f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]); + } + n = (num + s - 1) >> k; + for (i = 0; i < (n >> 1); i ++) { + f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]); + } + if ((n & 1) != 0) { + memmove(z[n >> 1], z[n], sizeof zt); + } + } + + /* + * Invert the final result, and convert all points. + */ + f256_invert(zt, z[0]); + for (i = 0; i < num; i ++) { + f256_montymul(zv, jac[i].z, zt); + f256_montysquare(zu, zv); + f256_montymul(zv, zv, zu); + f256_montymul(aff[i].x, jac[i].x, zu); + f256_montymul(aff[i].y, jac[i].y, zv); + } +} + +/* + * Multiply the provided point by an integer. + * Assumptions: + * - Source point is a valid curve point. + * - Source point is not the point-at-infinity. + * - Integer is not 0, and is lower than the curve order. + * If these conditions are not met, then the result is indeterminate + * (but the process is still constant-time). + */ +static void +p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen) +{ + union { + p256_affine aff[15]; + p256_jacobian jac[15]; + } window; + int i; + + /* + * Compute window, in Jacobian coordinates. + */ + window.jac[0] = *P; + for (i = 2; i < 16; i ++) { + window.jac[i - 1] = window.jac[(i >> 1) - 1]; + if ((i & 1) == 0) { + p256_double(&window.jac[i - 1]); + } else { + p256_add(&window.jac[i - 1], &window.jac[i >> 1]); + } + } + + /* + * Convert the window points to affine coordinates. Point + * window[0] is the source point, already in affine coordinates. + */ + window_to_affine(window.aff, window.jac, 15); + + /* + * Perform point multiplication. + */ + point_mul_inner(P, window.aff, k, klen); +} + +/* + * Precomputed window for the conventional generator: P256_Gwin[n] + * contains (n+1)*G (affine coordinates, in Montgomery representation). + */ +static const p256_affine P256_Gwin[] = { + { + { 0x30D418A9143C1, 0xC4FEDB60179E7, 0x62251075BA95F, + 0x5C669FB732B77, 0x08905F76B5375 }, + { 0x5357CE95560A8, 0x43A19E45CDDF2, 0x21F3258B4AB8E, + 0xD8552E88688DD, 0x0571FF18A5885 } + }, + { + { 0x46D410DDD64DF, 0x0B433827D8500, 0x1490D9AA6AE3C, + 0xA3A832205038D, 0x06BB32E52DCF3 }, + { 0x48D361BEE1A57, 0xB7B236FF82F36, 0x042DBE152CD7C, + 0xA3AA9A8FB0E92, 0x08C577517A5B8 } + }, + { + { 0x3F904EEBC1272, 0x9E87D81FBFFAC, 0xCBBC98B027F84, + 0x47E46AD77DD87, 0x06936A3FD6FF7 }, + { 0x5C1FC983A7EBD, 0xC3861FE1AB04C, 0x2EE98E583E47A, + 0xC06A88208311A, 0x05F06A2AB587C } + }, + { + { 0xB50D46918DCC5, 0xD7623C17374B0, 0x100AF24650A6E, + 0x76ABCDAACACE8, 0x077362F591B01 }, + { 0xF24CE4CBABA68, 0x17AD6F4472D96, 0xDDD22E1762847, + 0x862EB6C36DEE5, 0x04B14C39CC5AB } + }, + { + { 0x8AAEC45C61F5C, 0x9D4B9537DBE1B, 0x76C20C90EC649, + 0x3C7D41CB5AAD0, 0x0907960649052 }, + { 0x9B4AE7BA4F107, 0xF75EB882BEB30, 0x7A1F6873C568E, + 0x915C540A9877E, 0x03A076BB9DD1E } + }, + { + { 0x47373E77664A1, 0xF246CEE3E4039, 0x17A3AD55AE744, + 0x673C50A961A5B, 0x03074B5964213 }, + { 0x6220D377E44BA, 0x30DFF14B593D3, 0x639F11299C2B5, + 0x75F5424D44CEF, 0x04C9916DEA07F } + }, + { + { 0x354EA0173B4F1, 0x3C23C00F70746, 0x23BB082BD2021, + 0xE03E43EAAB50C, 0x03BA5119D3123 }, + { 0xD0303F5B9D4DE, 0x17DA67BDD2847, 0xC941956742F2F, + 0x8670F933BDC77, 0x0AEDD9164E240 } + }, + { + { 0x4CD19499A78FB, 0x4BF9B345527F1, 0x2CFC6B462AB5C, + 0x30CDF90F02AF0, 0x0763891F62652 }, + { 0xA3A9532D49775, 0xD7F9EBA15F59D, 0x60BBF021E3327, + 0xF75C23C7B84BE, 0x06EC12F2C706D } + }, + { + { 0x6E8F264E20E8E, 0xC79A7A84175C9, 0xC8EB00ABE6BFE, + 0x16A4CC09C0444, 0x005B3081D0C4E }, + { 0x777AA45F33140, 0xDCE5D45E31EB7, 0xB12F1A56AF7BE, + 0xF9B2B6E019A88, 0x086659CDFD835 } + }, + { + { 0xDBD19DC21EC8C, 0x94FCF81392C18, 0x250B4998F9868, + 0x28EB37D2CD648, 0x0C61C947E4B34 }, + { 0x407880DD9E767, 0x0C83FBE080C2B, 0x9BE5D2C43A899, + 0xAB4EF7D2D6577, 0x08719A555B3B4 } + }, + { + { 0x260A6245E4043, 0x53E7FDFE0EA7D, 0xAC1AB59DE4079, + 0x072EFF3A4158D, 0x0E7090F1949C9 }, + { 0x85612B944E886, 0xE857F61C81A76, 0xAD643D250F939, + 0x88DAC0DAA891E, 0x089300244125B } + }, + { + { 0x1AA7D26977684, 0x58A345A3304B7, 0x37385EABDEDEF, + 0x155E409D29DEE, 0x0EE1DF780B83E }, + { 0x12D91CBB5B437, 0x65A8956370CAC, 0xDE6D66170ED2F, + 0xAC9B8228CFA8A, 0x0FF57C95C3238 } + }, + { + { 0x25634B2ED7097, 0x9156FD30DCCC4, 0x9E98110E35676, + 0x7594CBCD43F55, 0x038477ACC395B }, + { 0x2B90C00EE17FF, 0xF842ED2E33575, 0x1F5BC16874838, + 0x7968CD06422BD, 0x0BC0876AB9E7B } + }, + { + { 0xA35BB0CF664AF, 0x68F9707E3A242, 0x832660126E48F, + 0x72D2717BF54C6, 0x0AAE7333ED12C }, + { 0x2DB7995D586B1, 0xE732237C227B5, 0x65E7DBBE29569, + 0xBBBD8E4193E2A, 0x052706DC3EAA1 } + }, + { + { 0xD8B7BC60055BE, 0xD76E27E4B72BC, 0x81937003CC23E, + 0xA090E337424E4, 0x02AA0E43EAD3D }, + { 0x524F6383C45D2, 0x422A41B2540B8, 0x8A4797D766355, + 0xDF444EFA6DE77, 0x0042170A9079A } + }, +}; + +/* + * Multiply the conventional generator of the curve by the provided + * integer. Return is written in *P. + * + * Assumptions: + * - Integer is not 0, and is lower than the curve order. + * If this conditions is not met, then the result is indeterminate + * (but the process is still constant-time). + */ +static void +p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen) +{ + point_mul_inner(P, P256_Gwin, k, klen); +} + +/* + * Return 1 if all of the following hold: + * - klen <= 32 + * - k != 0 + * - k is lower than the curve order + * Otherwise, return 0. + * + * Constant-time behaviour: only klen may be observable. + */ +static uint32_t +check_scalar(const unsigned char *k, size_t klen) +{ + uint32_t z; + int32_t c; + size_t u; + + if (klen > 32) { + return 0; + } + z = 0; + for (u = 0; u < klen; u ++) { + z |= k[u]; + } + if (klen == 32) { + c = 0; + for (u = 0; u < klen; u ++) { + c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]); + } + } else { + c = -1; + } + return NEQ(z, 0) & LT0(c); +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *k, size_t klen, int curve) +{ + uint32_t r; + p256_jacobian P; + + (void)curve; + if (Glen != 65) { + return 0; + } + r = check_scalar(k, klen); + r &= point_decode(&P, G); + p256_mul(&P, k, klen); + r &= point_encode(G, &P); + return r; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *k, size_t klen, int curve) +{ + p256_jacobian P; + + (void)curve; + p256_mulgen(&P, k, klen); + point_encode(R, &P); + return 65; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We might want to use Shamir's trick here: make a composite + * window of u*P+v*Q points, to merge the two doubling-ladders + * into one. This, however, has some complications: + * + * - During the computation, we may hit the point-at-infinity. + * Thus, we would need p256_add_complete_mixed() (complete + * formulas for point addition), with a higher cost (17 muls + * instead of 11). + * + * - A 4-bit window would be too large, since it would involve + * 16*16-1 = 255 points. For the same window size as in the + * p256_mul() case, we would need to reduce the window size + * to 2 bits, and thus perform twice as many non-doubling + * point additions. + * + * - The window may itself contain the point-at-infinity, and + * thus cannot be in all generality be made of affine points. + * Instead, we would need to make it a window of points in + * Jacobian coordinates. Even p256_add_complete_mixed() would + * be inappropriate. + * + * For these reasons, the code below performs two separate + * point multiplications, then computes the final point addition + * (which is both a "normal" addition, and a doubling, to handle + * all cases). + */ + + p256_jacobian P, Q; + uint32_t r, t, s; + uint64_t z; + + (void)curve; + if (len != 65) { + return 0; + } + r = point_decode(&P, A); + p256_mul(&P, x, xlen); + if (B == NULL) { + p256_mulgen(&Q, y, ylen); + } else { + r &= point_decode(&Q, B); + p256_mul(&Q, y, ylen); + } + + /* + * The final addition may fail in case both points are equal. + */ + t = p256_add(&P, &Q); + f256_final_reduce(P.z); + z = P.z[0] | P.z[1] | P.z[2] | P.z[3] | P.z[4]; + s = EQ((uint32_t)(z | (z >> 32)), 0); + p256_double(&Q); + + /* + * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * s = 0, t = 0 return P (normal addition) + * s = 0, t = 1 return P (normal addition) + * s = 1, t = 0 return Q (a 'double' case) + * s = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(s & ~t, &P, &Q, sizeof Q); + point_encode(A, &P); + r &= ~(s & t); + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_p256_m62 = { + (uint32_t)0x00800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_p256_m62_get(void) +{ + return &br_ec_p256_m62; +} + +#else + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_p256_m62_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/ec_p256_m64.c b/third_party/bearssl/src/ec_p256_m64.c new file mode 100644 index 0000000..71a527c --- /dev/null +++ b/third_party/bearssl/src/ec_p256_m64.c @@ -0,0 +1,1781 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#if BR_UMUL128 +#include <intrin.h> +#endif + +static const unsigned char P256_G[] = { + 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, + 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, + 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, + 0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, + 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B, + 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40, + 0x68, 0x37, 0xBF, 0x51, 0xF5 +}; + +static const unsigned char P256_N[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, + 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, + 0x25, 0x51 +}; + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_G; + return P256_G; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + (void)curve; + *len = sizeof P256_N; + return P256_N; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 1; +} + +/* + * A field element is encoded as four 64-bit integers, in basis 2^64. + * Values may reach up to 2^256-1. Montgomery multiplication is used. + */ + +/* R = 2^256 mod p */ +static const uint64_t F256_R[] = { + 0x0000000000000001, 0xFFFFFFFF00000000, + 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFE +}; + +/* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p + (Montgomery representation of B). */ +static const uint64_t P256_B_MONTY[] = { + 0xD89CDF6229C4BDDF, 0xACF005CD78843090, + 0xE5A220ABF7212ED6, 0xDC30061D04874834 +}; + +/* + * Addition in the field. + */ +static inline void +f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ +#if BR_INT128 + unsigned __int128 w; + uint64_t t; + + /* + * Do the addition, with an extra carry in t. + */ + w = (unsigned __int128)a[0] + b[0]; + d[0] = (uint64_t)w; + w = (unsigned __int128)a[1] + b[1] + (w >> 64); + d[1] = (uint64_t)w; + w = (unsigned __int128)a[2] + b[2] + (w >> 64); + d[2] = (uint64_t)w; + w = (unsigned __int128)a[3] + b[3] + (w >> 64); + d[3] = (uint64_t)w; + t = (uint64_t)(w >> 64); + + /* + * Fold carry t, using: 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p. + */ + w = (unsigned __int128)d[0] + t; + d[0] = (uint64_t)w; + w = (unsigned __int128)d[1] + (w >> 64) - (t << 32); + d[1] = (uint64_t)w; + /* Here, carry "w >> 64" can only be 0 or -1 */ + w = (unsigned __int128)d[2] - ((w >> 64) & 1); + d[2] = (uint64_t)w; + /* Again, carry is 0 or -1. But there can be carry only if t = 1, + in which case the addition of (t << 32) - t is positive. */ + w = (unsigned __int128)d[3] - ((w >> 64) & 1) + (t << 32) - t; + d[3] = (uint64_t)w; + t = (uint64_t)(w >> 64); + + /* + * There can be an extra carry here, which we must fold again. + */ + w = (unsigned __int128)d[0] + t; + d[0] = (uint64_t)w; + w = (unsigned __int128)d[1] + (w >> 64) - (t << 32); + d[1] = (uint64_t)w; + w = (unsigned __int128)d[2] - ((w >> 64) & 1); + d[2] = (uint64_t)w; + d[3] += (t << 32) - t - (uint64_t)((w >> 64) & 1); + +#elif BR_UMUL128 + + unsigned char cc; + uint64_t t; + + cc = _addcarry_u64(0, a[0], b[0], &d[0]); + cc = _addcarry_u64(cc, a[1], b[1], &d[1]); + cc = _addcarry_u64(cc, a[2], b[2], &d[2]); + cc = _addcarry_u64(cc, a[3], b[3], &d[3]); + + /* + * If there is a carry, then we want to subtract p, which we + * do by adding 2^256 - p. + */ + t = cc; + cc = _addcarry_u64(cc, d[0], 0, &d[0]); + cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]); + cc = _addcarry_u64(cc, d[2], -t, &d[2]); + cc = _addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]); + + /* + * We have to do it again if there still is a carry. + */ + t = cc; + cc = _addcarry_u64(cc, d[0], 0, &d[0]); + cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]); + cc = _addcarry_u64(cc, d[2], -t, &d[2]); + (void)_addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]); + +#endif +} + +/* + * Subtraction in the field. + */ +static inline void +f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ +#if BR_INT128 + + unsigned __int128 w; + uint64_t t; + + w = (unsigned __int128)a[0] - b[0]; + d[0] = (uint64_t)w; + w = (unsigned __int128)a[1] - b[1] - ((w >> 64) & 1); + d[1] = (uint64_t)w; + w = (unsigned __int128)a[2] - b[2] - ((w >> 64) & 1); + d[2] = (uint64_t)w; + w = (unsigned __int128)a[3] - b[3] - ((w >> 64) & 1); + d[3] = (uint64_t)w; + t = (uint64_t)(w >> 64) & 1; + + /* + * If there is a borrow (t = 1), then we must add the modulus + * p = 2^256 - 2^224 + 2^192 + 2^96 - 1. + */ + w = (unsigned __int128)d[0] - t; + d[0] = (uint64_t)w; + w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1); + d[1] = (uint64_t)w; + /* Here, carry "w >> 64" can only be 0 or +1 */ + w = (unsigned __int128)d[2] + (w >> 64); + d[2] = (uint64_t)w; + /* Again, carry is 0 or +1 */ + w = (unsigned __int128)d[3] + (w >> 64) - (t << 32) + t; + d[3] = (uint64_t)w; + t = (uint64_t)(w >> 64) & 1; + + /* + * There may be again a borrow, in which case we must add the + * modulus again. + */ + w = (unsigned __int128)d[0] - t; + d[0] = (uint64_t)w; + w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1); + d[1] = (uint64_t)w; + w = (unsigned __int128)d[2] + (w >> 64); + d[2] = (uint64_t)w; + d[3] += (uint64_t)(w >> 64) - (t << 32) + t; + +#elif BR_UMUL128 + + unsigned char cc; + uint64_t t; + + cc = _subborrow_u64(0, a[0], b[0], &d[0]); + cc = _subborrow_u64(cc, a[1], b[1], &d[1]); + cc = _subborrow_u64(cc, a[2], b[2], &d[2]); + cc = _subborrow_u64(cc, a[3], b[3], &d[3]); + + /* + * If there is a borrow, then we need to add p. We (virtually) + * add 2^256, then subtract 2^256 - p. + */ + t = cc; + cc = _subborrow_u64(0, d[0], t, &d[0]); + cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]); + cc = _subborrow_u64(cc, d[2], -t, &d[2]); + cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]); + + /* + * If there still is a borrow, then we need to add p again. + */ + t = cc; + cc = _subborrow_u64(0, d[0], t, &d[0]); + cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]); + cc = _subborrow_u64(cc, d[2], -t, &d[2]); + (void)_subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]); + +#endif +} + +/* + * Montgomery multiplication in the field. + */ +static void +f256_montymul(uint64_t *d, const uint64_t *a, const uint64_t *b) +{ +#if BR_INT128 + + uint64_t x, f, t0, t1, t2, t3, t4; + unsigned __int128 z, ff; + int i; + + /* + * When computing d <- d + a[u]*b, we also add f*p such + * that d + a[u]*b + f*p is a multiple of 2^64. Since + * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64. + */ + + /* + * Step 1: t <- (a[0]*b + f*p) / 2^64 + * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this + * ensures that (a[0]*b + f*p) is a multiple of 2^64. + * + * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f. + */ + x = a[0]; + z = (unsigned __int128)b[0] * x; + f = (uint64_t)z; + z = (unsigned __int128)b[1] * x + (z >> 64) + (uint64_t)(f << 32); + t0 = (uint64_t)z; + z = (unsigned __int128)b[2] * x + (z >> 64) + (uint64_t)(f >> 32); + t1 = (uint64_t)z; + z = (unsigned __int128)b[3] * x + (z >> 64) + f; + t2 = (uint64_t)z; + t3 = (uint64_t)(z >> 64); + ff = ((unsigned __int128)f << 64) - ((unsigned __int128)f << 32); + z = (unsigned __int128)t2 + (uint64_t)ff; + t2 = (uint64_t)z; + z = (unsigned __int128)t3 + (z >> 64) + (ff >> 64); + t3 = (uint64_t)z; + t4 = (uint64_t)(z >> 64); + + /* + * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64 + */ + for (i = 1; i < 4; i ++) { + x = a[i]; + + /* t <- (t + x*b - f) / 2^64 */ + z = (unsigned __int128)b[0] * x + t0; + f = (uint64_t)z; + z = (unsigned __int128)b[1] * x + t1 + (z >> 64); + t0 = (uint64_t)z; + z = (unsigned __int128)b[2] * x + t2 + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)b[3] * x + t3 + (z >> 64); + t2 = (uint64_t)z; + z = t4 + (z >> 64); + t3 = (uint64_t)z; + t4 = (uint64_t)(z >> 64); + + /* t <- t + f*2^32, carry in the upper half of z */ + z = (unsigned __int128)t0 + (uint64_t)(f << 32); + t0 = (uint64_t)z; + z = (z >> 64) + (unsigned __int128)t1 + (uint64_t)(f >> 32); + t1 = (uint64_t)z; + + /* t <- t + f*2^192 - f*2^160 + f*2^128 */ + ff = ((unsigned __int128)f << 64) + - ((unsigned __int128)f << 32) + f; + z = (z >> 64) + (unsigned __int128)t2 + (uint64_t)ff; + t2 = (uint64_t)z; + z = (unsigned __int128)t3 + (z >> 64) + (ff >> 64); + t3 = (uint64_t)z; + t4 += (uint64_t)(z >> 64); + } + + /* + * At that point, we have computed t = (a*b + F*p) / 2^256, where + * F is a 256-bit integer whose limbs are the "f" coefficients + * in the steps above. We have: + * a <= 2^256-1 + * b <= 2^256-1 + * F <= 2^256-1 + * Hence: + * a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1) + * a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p + * Therefore: + * t < 2^256 + p - 2 + * Since p < 2^256, it follows that: + * t4 can be only 0 or 1 + * t - p < 2^256 + * We can therefore subtract p from t, conditionally on t4, to + * get a nonnegative result that fits on 256 bits. + */ + z = (unsigned __int128)t0 + t4; + t0 = (uint64_t)z; + z = (unsigned __int128)t1 - (t4 << 32) + (z >> 64); + t1 = (uint64_t)z; + z = (unsigned __int128)t2 - (z >> 127); + t2 = (uint64_t)z; + t3 = t3 - (uint64_t)(z >> 127) - t4 + (t4 << 32); + + d[0] = t0; + d[1] = t1; + d[2] = t2; + d[3] = t3; + +#elif BR_UMUL128 + + uint64_t x, f, t0, t1, t2, t3, t4; + uint64_t zl, zh, ffl, ffh; + unsigned char k, m; + int i; + + /* + * When computing d <- d + a[u]*b, we also add f*p such + * that d + a[u]*b + f*p is a multiple of 2^64. Since + * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64. + */ + + /* + * Step 1: t <- (a[0]*b + f*p) / 2^64 + * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this + * ensures that (a[0]*b + f*p) is a multiple of 2^64. + * + * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f. + */ + x = a[0]; + + zl = _umul128(b[0], x, &zh); + f = zl; + t0 = zh; + + zl = _umul128(b[1], x, &zh); + k = _addcarry_u64(0, zl, t0, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + k = _addcarry_u64(0, zl, f << 32, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + t0 = zl; + t1 = zh; + + zl = _umul128(b[2], x, &zh); + k = _addcarry_u64(0, zl, t1, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + k = _addcarry_u64(0, zl, f >> 32, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + t1 = zl; + t2 = zh; + + zl = _umul128(b[3], x, &zh); + k = _addcarry_u64(0, zl, t2, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + k = _addcarry_u64(0, zl, f, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + t2 = zl; + t3 = zh; + + t4 = _addcarry_u64(0, t3, f, &t3); + k = _subborrow_u64(0, t2, f << 32, &t2); + k = _subborrow_u64(k, t3, f >> 32, &t3); + (void)_subborrow_u64(k, t4, 0, &t4); + + /* + * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64 + */ + for (i = 1; i < 4; i ++) { + x = a[i]; + /* f = t0 + x * b[0]; -- computed below */ + + /* t <- (t + x*b - f) / 2^64 */ + zl = _umul128(b[0], x, &zh); + k = _addcarry_u64(0, zl, t0, &f); + (void)_addcarry_u64(k, zh, 0, &t0); + + zl = _umul128(b[1], x, &zh); + k = _addcarry_u64(0, zl, t0, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + k = _addcarry_u64(0, zl, t1, &t0); + (void)_addcarry_u64(k, zh, 0, &t1); + + zl = _umul128(b[2], x, &zh); + k = _addcarry_u64(0, zl, t1, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + k = _addcarry_u64(0, zl, t2, &t1); + (void)_addcarry_u64(k, zh, 0, &t2); + + zl = _umul128(b[3], x, &zh); + k = _addcarry_u64(0, zl, t2, &zl); + (void)_addcarry_u64(k, zh, 0, &zh); + k = _addcarry_u64(0, zl, t3, &t2); + (void)_addcarry_u64(k, zh, 0, &t3); + + t4 = _addcarry_u64(0, t3, t4, &t3); + + /* t <- t + f*2^32, carry in k */ + k = _addcarry_u64(0, t0, f << 32, &t0); + k = _addcarry_u64(k, t1, f >> 32, &t1); + + /* t <- t + f*2^192 - f*2^160 + f*2^128 */ + m = _subborrow_u64(0, f, f << 32, &ffl); + (void)_subborrow_u64(m, f, f >> 32, &ffh); + k = _addcarry_u64(k, t2, ffl, &t2); + k = _addcarry_u64(k, t3, ffh, &t3); + (void)_addcarry_u64(k, t4, 0, &t4); + } + + /* + * At that point, we have computed t = (a*b + F*p) / 2^256, where + * F is a 256-bit integer whose limbs are the "f" coefficients + * in the steps above. We have: + * a <= 2^256-1 + * b <= 2^256-1 + * F <= 2^256-1 + * Hence: + * a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1) + * a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p + * Therefore: + * t < 2^256 + p - 2 + * Since p < 2^256, it follows that: + * t4 can be only 0 or 1 + * t - p < 2^256 + * We can therefore subtract p from t, conditionally on t4, to + * get a nonnegative result that fits on 256 bits. + */ + k = _addcarry_u64(0, t0, t4, &t0); + k = _addcarry_u64(k, t1, -(t4 << 32), &t1); + k = _addcarry_u64(k, t2, -t4, &t2); + (void)_addcarry_u64(k, t3, (t4 << 32) - (t4 << 1), &t3); + + d[0] = t0; + d[1] = t1; + d[2] = t2; + d[3] = t3; + +#endif +} + +/* + * Montgomery squaring in the field; currently a basic wrapper around + * multiplication (inline, should be optimized away). + * TODO: see if some extra speed can be gained here. + */ +static inline void +f256_montysquare(uint64_t *d, const uint64_t *a) +{ + f256_montymul(d, a, a); +} + +/* + * Convert to Montgomery representation. + */ +static void +f256_tomonty(uint64_t *d, const uint64_t *a) +{ + /* + * R2 = 2^512 mod p. + * If R = 2^256 mod p, then R2 = R^2 mod p; and the Montgomery + * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the + * conversion to Montgomery representation. + */ + static const uint64_t R2[] = { + 0x0000000000000003, + 0xFFFFFFFBFFFFFFFF, + 0xFFFFFFFFFFFFFFFE, + 0x00000004FFFFFFFD + }; + + f256_montymul(d, a, R2); +} + +/* + * Convert from Montgomery representation. + */ +static void +f256_frommonty(uint64_t *d, const uint64_t *a) +{ + /* + * Montgomery multiplication by 1 is division by 2^256 modulo p. + */ + static const uint64_t one[] = { 1, 0, 0, 0 }; + + f256_montymul(d, a, one); +} + +/* + * Inversion in the field. If the source value is 0 modulo p, then this + * returns 0 or p. This function uses Montgomery representation. + */ +static void +f256_invert(uint64_t *d, const uint64_t *a) +{ + /* + * We compute a^(p-2) mod p. The exponent pattern (from high to + * low) is: + * - 32 bits of value 1 + * - 31 bits of value 0 + * - 1 bit of value 1 + * - 96 bits of value 0 + * - 94 bits of value 1 + * - 1 bit of value 0 + * - 1 bit of value 1 + * To speed up the square-and-multiply algorithm, we precompute + * a^(2^31-1). + */ + + uint64_t r[4], t[4]; + int i; + + memcpy(t, a, sizeof t); + for (i = 0; i < 30; i ++) { + f256_montysquare(t, t); + f256_montymul(t, t, a); + } + + memcpy(r, t, sizeof t); + for (i = 224; i >= 0; i --) { + f256_montysquare(r, r); + switch (i) { + case 0: + case 2: + case 192: + case 224: + f256_montymul(r, r, a); + break; + case 3: + case 34: + case 65: + f256_montymul(r, r, t); + break; + } + } + memcpy(d, r, sizeof r); +} + +/* + * Finalize reduction. + * Input value fits on 256 bits. This function subtracts p if and only + * if the input is greater than or equal to p. + */ +static inline void +f256_final_reduce(uint64_t *a) +{ +#if BR_INT128 + + uint64_t t0, t1, t2, t3, cc; + unsigned __int128 z; + + /* + * We add 2^224 - 2^192 - 2^96 + 1 to a. If there is no carry, + * then a < p; otherwise, the addition result we computed is + * the value we must return. + */ + z = (unsigned __int128)a[0] + 1; + t0 = (uint64_t)z; + z = (unsigned __int128)a[1] + (z >> 64) - ((uint64_t)1 << 32); + t1 = (uint64_t)z; + z = (unsigned __int128)a[2] - (z >> 127); + t2 = (uint64_t)z; + z = (unsigned __int128)a[3] - (z >> 127) + 0xFFFFFFFF; + t3 = (uint64_t)z; + cc = -(uint64_t)(z >> 64); + + a[0] ^= cc & (a[0] ^ t0); + a[1] ^= cc & (a[1] ^ t1); + a[2] ^= cc & (a[2] ^ t2); + a[3] ^= cc & (a[3] ^ t3); + +#elif BR_UMUL128 + + uint64_t t0, t1, t2, t3, m; + unsigned char k; + + k = _addcarry_u64(0, a[0], (uint64_t)1, &t0); + k = _addcarry_u64(k, a[1], -((uint64_t)1 << 32), &t1); + k = _addcarry_u64(k, a[2], -(uint64_t)1, &t2); + k = _addcarry_u64(k, a[3], ((uint64_t)1 << 32) - 2, &t3); + m = -(uint64_t)k; + + a[0] ^= m & (a[0] ^ t0); + a[1] ^= m & (a[1] ^ t1); + a[2] ^= m & (a[2] ^ t2); + a[3] ^= m & (a[3] ^ t3); + +#endif +} + +/* + * Points in affine and Jacobian coordinates. + * + * - In affine coordinates, the point-at-infinity cannot be encoded. + * - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3); + * if Z = 0 then this is the point-at-infinity. + */ +typedef struct { + uint64_t x[4]; + uint64_t y[4]; +} p256_affine; + +typedef struct { + uint64_t x[4]; + uint64_t y[4]; + uint64_t z[4]; +} p256_jacobian; + +/* + * Decode a point. The returned point is in Jacobian coordinates, but + * with z = 1. If the encoding is invalid, or encodes a point which is + * not on the curve, or encodes the point at infinity, then this function + * returns 0. Otherwise, 1 is returned. + * + * The buffer is assumed to have length exactly 65 bytes. + */ +static uint32_t +point_decode(p256_jacobian *P, const unsigned char *buf) +{ + uint64_t x[4], y[4], t[4], x3[4], tt; + uint32_t r; + + /* + * Header byte shall be 0x04. + */ + r = EQ(buf[0], 0x04); + + /* + * Decode X and Y coordinates, and convert them into + * Montgomery representation. + */ + x[3] = br_dec64be(buf + 1); + x[2] = br_dec64be(buf + 9); + x[1] = br_dec64be(buf + 17); + x[0] = br_dec64be(buf + 25); + y[3] = br_dec64be(buf + 33); + y[2] = br_dec64be(buf + 41); + y[1] = br_dec64be(buf + 49); + y[0] = br_dec64be(buf + 57); + f256_tomonty(x, x); + f256_tomonty(y, y); + + /* + * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3. + * Note that the Montgomery representation of 0 is 0. We must + * take care to apply the final reduction to make sure we have + * 0 and not p. + */ + f256_montysquare(t, y); + f256_montysquare(x3, x); + f256_montymul(x3, x3, x); + f256_sub(t, t, x3); + f256_add(t, t, x); + f256_add(t, t, x); + f256_add(t, t, x); + f256_sub(t, t, P256_B_MONTY); + f256_final_reduce(t); + tt = t[0] | t[1] | t[2] | t[3]; + r &= EQ((uint32_t)(tt | (tt >> 32)), 0); + + /* + * Return the point in Jacobian coordinates (and Montgomery + * representation). + */ + memcpy(P->x, x, sizeof x); + memcpy(P->y, y, sizeof y); + memcpy(P->z, F256_R, sizeof F256_R); + return r; +} + +/* + * Final conversion for a point: + * - The point is converted back to affine coordinates. + * - Final reduction is performed. + * - The point is encoded into the provided buffer. + * + * If the point is the point-at-infinity, all operations are performed, + * but the buffer contents are indeterminate, and 0 is returned. Otherwise, + * the encoded point is written in the buffer, and 1 is returned. + */ +static uint32_t +point_encode(unsigned char *buf, const p256_jacobian *P) +{ + uint64_t t1[4], t2[4], z; + + /* Set t1 = 1/z^2 and t2 = 1/z^3. */ + f256_invert(t2, P->z); + f256_montysquare(t1, t2); + f256_montymul(t2, t2, t1); + + /* Compute affine coordinates x (in t1) and y (in t2). */ + f256_montymul(t1, P->x, t1); + f256_montymul(t2, P->y, t2); + + /* Convert back from Montgomery representation, and finalize + reductions. */ + f256_frommonty(t1, t1); + f256_frommonty(t2, t2); + f256_final_reduce(t1); + f256_final_reduce(t2); + + /* Encode. */ + buf[0] = 0x04; + br_enc64be(buf + 1, t1[3]); + br_enc64be(buf + 9, t1[2]); + br_enc64be(buf + 17, t1[1]); + br_enc64be(buf + 25, t1[0]); + br_enc64be(buf + 33, t2[3]); + br_enc64be(buf + 41, t2[2]); + br_enc64be(buf + 49, t2[1]); + br_enc64be(buf + 57, t2[0]); + + /* Return success if and only if P->z != 0. */ + z = P->z[0] | P->z[1] | P->z[2] | P->z[3]; + return NEQ((uint32_t)(z | z >> 32), 0); +} + +/* + * Point doubling in Jacobian coordinates: point P is doubled. + * Note: if the source point is the point-at-infinity, then the result is + * still the point-at-infinity, which is correct. Moreover, if the three + * coordinates were zero, then they still are zero in the returned value. + * + * (Note: this is true even without the final reduction: if the three + * coordinates are encoded as four words of value zero each, then the + * result will also have all-zero coordinate encodings, not the alternate + * encoding as the integer p.) + */ +static void +p256_double(p256_jacobian *P) +{ + /* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * These formulas work for all points, including points of order 2 + * and points at infinity: + * - If y = 0 then z' = 0. But there is no such point in P-256 + * anyway. + * - If z = 0 then z' = 0. + */ + uint64_t t1[4], t2[4], t3[4], t4[4]; + + /* + * Compute z^2 in t1. + */ + f256_montysquare(t1, P->z); + + /* + * Compute x-z^2 in t2 and x+z^2 in t1. + */ + f256_add(t2, P->x, t1); + f256_sub(t1, P->x, t1); + + /* + * Compute 3*(x+z^2)*(x-z^2) in t1. + */ + f256_montymul(t3, t1, t2); + f256_add(t1, t3, t3); + f256_add(t1, t3, t1); + + /* + * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + f256_montysquare(t3, P->y); + f256_add(t3, t3, t3); + f256_montymul(t2, P->x, t3); + f256_add(t2, t2, t2); + + /* + * Compute x' = m^2 - 2*s. + */ + f256_montysquare(P->x, t1); + f256_sub(P->x, P->x, t2); + f256_sub(P->x, P->x, t2); + + /* + * Compute z' = 2*y*z. + */ + f256_montymul(t4, P->y, P->z); + f256_add(P->z, t4, t4); + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + f256_sub(t2, t2, P->x); + f256_montymul(P->y, t1, t2); + f256_montysquare(t4, t3); + f256_add(t4, t4, t4); + f256_sub(P->y, P->y, t4); +} + +/* + * Point addition (Jacobian coordinates): P1 is replaced with P1+P2. + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 but P2 != 0 + * - If P1 != 0 but P2 == 0 + * - If P1 == P2 + * + * In all three cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y coordinate. + * - P1 == 0 and P2 == 0. + * - The Y coordinate of one of the points is 0 and the other point is + * the point at infinity. + * + * The third case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + * + * Note that you can get a returned value of 0 with a correct result, + * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates. + */ +static uint32_t +p256_add(p256_jacobian *P1, const p256_jacobian *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + */ + uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt; + uint32_t ret; + + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + f256_montysquare(t3, P2->z); + f256_montymul(t1, P1->x, t3); + f256_montymul(t4, P2->z, t3); + f256_montymul(t3, P1->y, t4); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + f256_montysquare(t4, P1->z); + f256_montymul(t2, P2->x, t4); + f256_montymul(t5, P1->z, t4); + f256_montymul(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + f256_sub(t2, t2, t1); + f256_sub(t4, t4, t3); + f256_final_reduce(t4); + tt = t4[0] | t4[1] | t4[2] | t4[3]; + ret = (uint32_t)(tt | (tt >> 32)); + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + f256_montysquare(t7, t2); + f256_montymul(t6, t1, t7); + f256_montymul(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + f256_montysquare(P1->x, t4); + f256_sub(P1->x, P1->x, t5); + f256_sub(P1->x, P1->x, t6); + f256_sub(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + f256_sub(t6, t6, P1->x); + f256_montymul(P1->y, t4, t6); + f256_montymul(t1, t5, t3); + f256_sub(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1*z2. + */ + f256_montymul(t1, P1->z, P2->z); + f256_montymul(P1->z, t1, t2); + + return ret; +} + +/* + * Point addition (mixed coordinates): P1 is replaced with P1+P2. + * This is a specialised function for the case when P2 is a non-zero point + * in affine coordinates. + * + * This function computes the wrong result in the following cases: + * + * - If P1 == 0 + * - If P1 == P2 + * + * In both cases, P1 is set to the point at infinity. + * + * Returned value is 0 if one of the following occurs: + * + * - P1 and P2 have the same Y (affine) coordinate. + * - The Y coordinate of P2 is 0 and P1 is the point at infinity. + * + * The second case cannot actually happen with valid points, since a point + * with Y == 0 is a point of order 2, and there is no point of order 2 on + * curve P-256. + * + * Therefore, assuming that P1 != 0 on input, then the caller + * can apply the following: + * + * - If the result is not the point at infinity, then it is correct. + * - Otherwise, if the returned value is 1, then this is a case of + * P1+P2 == 0, so the result is indeed the point at infinity. + * - Otherwise, P1 == P2, so a "double" operation should have been + * performed. + * + * Again, a value of 0 may be returned in some cases where the addition + * result is correct. + */ +static uint32_t +p256_add_mixed(p256_jacobian *P1, const p256_affine *P2) +{ + /* + * Addtions formulas are: + * + * u1 = x1 + * u2 = x2 * z1^2 + * s1 = y1 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 + */ + uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt; + uint32_t ret; + + /* + * Compute u1 = x1 (in t1) and s1 = y1 (in t3). + */ + memcpy(t1, P1->x, sizeof t1); + memcpy(t3, P1->y, sizeof t3); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + f256_montysquare(t4, P1->z); + f256_montymul(t2, P2->x, t4); + f256_montymul(t5, P1->z, t4); + f256_montymul(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * We need to test whether r is zero, so we will do some extra + * reduce. + */ + f256_sub(t2, t2, t1); + f256_sub(t4, t4, t3); + f256_final_reduce(t4); + tt = t4[0] | t4[1] | t4[2] | t4[3]; + ret = (uint32_t)(tt | (tt >> 32)); + ret = (ret | -ret) >> 31; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + f256_montysquare(t7, t2); + f256_montymul(t6, t1, t7); + f256_montymul(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + f256_montysquare(P1->x, t4); + f256_sub(P1->x, P1->x, t5); + f256_sub(P1->x, P1->x, t6); + f256_sub(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + f256_sub(t6, t6, P1->x); + f256_montymul(P1->y, t4, t6); + f256_montymul(t1, t5, t3); + f256_sub(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1*z2. + */ + f256_montymul(P1->z, P1->z, t2); + + return ret; +} + +#if 0 +/* unused */ +/* + * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2. + * This is a specialised function for the case when P2 is a non-zero point + * in affine coordinates. + * + * This function returns the correct result in all cases. + */ +static uint32_t +p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2) +{ + /* + * Addtions formulas, in the general case, are: + * + * u1 = x1 + * u2 = x2 * z1^2 + * s1 = y1 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 + * + * These formulas mishandle the two following cases: + * + * - If P1 is the point-at-infinity (z1 = 0), then z3 is + * incorrectly set to 0. + * + * - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3 + * are all set to 0. + * + * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then + * we correctly get z3 = 0 (the point-at-infinity). + * + * To fix the case P1 = 0, we perform at the end a copy of P2 + * over P1, conditional to z1 = 0. + * + * For P1 = P2: in that case, both h and r are set to 0, and + * we get x3, y3 and z3 equal to 0. We can test for that + * occurrence to make a mask which will be all-one if P1 = P2, + * or all-zero otherwise; then we can compute the double of P2 + * and add it, combined with the mask, to (x3,y3,z3). + * + * Using the doubling formulas in p256_double() on (x2,y2), + * simplifying since P2 is affine (i.e. z2 = 1, implicitly), + * we get: + * s = 4*x2*y2^2 + * m = 3*(x2 + 1)*(x2 - 1) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y2^4 + * z' = 2*y2 + * which requires only 6 multiplications. Added to the 11 + * multiplications of the normal mixed addition in Jacobian + * coordinates, we get a cost of 17 multiplications in total. + */ + uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt, zz; + int i; + + /* + * Set zz to -1 if P1 is the point at infinity, 0 otherwise. + */ + zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3]; + zz = ((zz | -zz) >> 63) - (uint64_t)1; + + /* + * Compute u1 = x1 (in t1) and s1 = y1 (in t3). + */ + memcpy(t1, P1->x, sizeof t1); + memcpy(t3, P1->y, sizeof t3); + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + f256_montysquare(t4, P1->z); + f256_montymul(t2, P2->x, t4); + f256_montymul(t5, P1->z, t4); + f256_montymul(t4, P2->y, t5); + + /* + * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + * reduce. + */ + f256_sub(t2, t2, t1); + f256_sub(t4, t4, t3); + + /* + * If both h = 0 and r = 0, then P1 = P2, and we want to set + * the mask tt to -1; otherwise, the mask will be 0. + */ + f256_final_reduce(t2); + f256_final_reduce(t4); + tt = t2[0] | t2[1] | t2[2] | t2[3] | t4[0] | t4[1] | t4[2] | t4[3]; + tt = ((tt | -tt) >> 63) - (uint64_t)1; + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5); + */ + f256_montysquare(t7, t2); + f256_montymul(t6, t1, t7); + f256_montymul(t5, t7, t2); + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + */ + f256_montysquare(P1->x, t4); + f256_sub(P1->x, P1->x, t5); + f256_sub(P1->x, P1->x, t6); + f256_sub(P1->x, P1->x, t6); + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + f256_sub(t6, t6, P1->x); + f256_montymul(P1->y, t4, t6); + f256_montymul(t1, t5, t3); + f256_sub(P1->y, P1->y, t1); + + /* + * Compute z3 = h*z1. + */ + f256_montymul(P1->z, P1->z, t2); + + /* + * The "double" result, in case P1 = P2. + */ + + /* + * Compute z' = 2*y2 (in t1). + */ + f256_add(t1, P2->y, P2->y); + + /* + * Compute 2*(y2^2) (in t2) and s = 4*x2*(y2^2) (in t3). + */ + f256_montysquare(t2, P2->y); + f256_add(t2, t2, t2); + f256_add(t3, t2, t2); + f256_montymul(t3, P2->x, t3); + + /* + * Compute m = 3*(x2^2 - 1) (in t4). + */ + f256_montysquare(t4, P2->x); + f256_sub(t4, t4, F256_R); + f256_add(t5, t4, t4); + f256_add(t4, t4, t5); + + /* + * Compute x' = m^2 - 2*s (in t5). + */ + f256_montysquare(t5, t4); + f256_sub(t5, t3); + f256_sub(t5, t3); + + /* + * Compute y' = m*(s - x') - 8*y2^4 (in t6). + */ + f256_sub(t6, t3, t5); + f256_montymul(t6, t6, t4); + f256_montysquare(t7, t2); + f256_sub(t6, t6, t7); + f256_sub(t6, t6, t7); + + /* + * We now have the alternate (doubling) coordinates in (t5,t6,t1). + * We combine them with (x3,y3,z3). + */ + for (i = 0; i < 4; i ++) { + P1->x[i] |= tt & t5[i]; + P1->y[i] |= tt & t6[i]; + P1->z[i] |= tt & t1[i]; + } + + /* + * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0, + * then we want to replace the result with a copy of P2. The + * test on z1 was done at the start, in the zz mask. + */ + for (i = 0; i < 4; i ++) { + P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]); + P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]); + P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]); + } +} +#endif + +/* + * Inner function for computing a point multiplication. A window is + * provided, with points 1*P to 15*P in affine coordinates. + * + * Assumptions: + * - All provided points are valid points on the curve. + * - Multiplier is non-zero, and smaller than the curve order. + * - Everything is in Montgomery representation. + */ +static void +point_mul_inner(p256_jacobian *R, const p256_affine *W, + const unsigned char *k, size_t klen) +{ + p256_jacobian Q; + uint32_t qz; + + memset(&Q, 0, sizeof Q); + qz = 1; + while (klen -- > 0) { + int i; + unsigned bk; + + bk = *k ++; + for (i = 0; i < 2; i ++) { + uint32_t bits; + uint32_t bnz; + p256_affine T; + p256_jacobian U; + uint32_t n; + int j; + uint64_t m; + + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + p256_double(&Q); + bits = (bk >> 4) & 0x0F; + bnz = NEQ(bits, 0); + + /* + * Lookup point in window. If the bits are 0, + * we get something invalid, which is not a + * problem because we will use it only if the + * bits are non-zero. + */ + memset(&T, 0, sizeof T); + for (n = 0; n < 15; n ++) { + m = -(uint64_t)EQ(bits, n + 1); + T.x[0] |= m & W[n].x[0]; + T.x[1] |= m & W[n].x[1]; + T.x[2] |= m & W[n].x[2]; + T.x[3] |= m & W[n].x[3]; + T.y[0] |= m & W[n].y[0]; + T.y[1] |= m & W[n].y[1]; + T.y[2] |= m & W[n].y[2]; + T.y[3] |= m & W[n].y[3]; + } + + U = Q; + p256_add_mixed(&U, &T); + + /* + * If qz is still 1, then Q was all-zeros, and this + * is conserved through p256_double(). + */ + m = -(uint64_t)(bnz & qz); + for (j = 0; j < 4; j ++) { + Q.x[j] |= m & T.x[j]; + Q.y[j] |= m & T.y[j]; + Q.z[j] |= m & F256_R[j]; + } + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + bk <<= 4; + } + } + *R = Q; +} + +/* + * Convert a window from Jacobian to affine coordinates. A single + * field inversion is used. This function works for windows up to + * 32 elements. + * + * The destination array (aff[]) and the source array (jac[]) may + * overlap, provided that the start of aff[] is not after the start of + * jac[]. Even if the arrays do _not_ overlap, the source array is + * modified. + */ +static void +window_to_affine(p256_affine *aff, p256_jacobian *jac, int num) +{ + /* + * Convert the window points to affine coordinates. We use the + * following trick to mutualize the inversion computation: if + * we have z1, z2, z3, and z4, and want to inverse all of them, + * we compute u = 1/(z1*z2*z3*z4), and then we have: + * 1/z1 = u*z2*z3*z4 + * 1/z2 = u*z1*z3*z4 + * 1/z3 = u*z1*z2*z4 + * 1/z4 = u*z1*z2*z3 + * + * The partial products are computed recursively: + * + * - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2 + * - on input (z_1,z_2,... z_n): + * recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1 + * recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2 + * multiply elements of r1 by m2 -> s1 + * multiply elements of r2 by m1 -> s2 + * return r1||r2 and m1*m2 + * + * In the example below, we suppose that we have 14 elements. + * Let z1, z2,... zE be the 14 values to invert (index noted in + * hexadecimal, starting at 1). + * + * - Depth 1: + * swap(z1, z2); z12 = z1*z2 + * swap(z3, z4); z34 = z3*z4 + * swap(z5, z6); z56 = z5*z6 + * swap(z7, z8); z78 = z7*z8 + * swap(z9, zA); z9A = z9*zA + * swap(zB, zC); zBC = zB*zC + * swap(zD, zE); zDE = zD*zE + * + * - Depth 2: + * z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12 + * z1234 = z12*z34 + * z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56 + * z5678 = z56*z78 + * z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A + * z9ABC = z9A*zBC + * + * - Depth 3: + * z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678 + * z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234 + * z12345678 = z1234*z5678 + * z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE + * zD <- zD*z9ABC, zE*z9ABC + * z9ABCDE = z9ABC*zDE + * + * - Depth 4: + * multiply z1..z8 by z9ABCDE + * multiply z9..zE by z12345678 + * final z = z12345678*z9ABCDE + */ + + uint64_t z[16][4]; + int i, k, s; +#define zt (z[15]) +#define zu (z[14]) +#define zv (z[13]) + + /* + * First recursion step (pairwise swapping and multiplication). + * If there is an odd number of elements, then we "invent" an + * extra one with coordinate Z = 1 (in Montgomery representation). + */ + for (i = 0; (i + 1) < num; i += 2) { + memcpy(zt, jac[i].z, sizeof zt); + memcpy(jac[i].z, jac[i + 1].z, sizeof zt); + memcpy(jac[i + 1].z, zt, sizeof zt); + f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z); + } + if ((num & 1) != 0) { + memcpy(z[num >> 1], jac[num - 1].z, sizeof zt); + memcpy(jac[num - 1].z, F256_R, sizeof F256_R); + } + + /* + * Perform further recursion steps. At the entry of each step, + * the process has been done for groups of 's' points. The + * integer k is the log2 of s. + */ + for (k = 1, s = 2; s < num; k ++, s <<= 1) { + int n; + + for (i = 0; i < num; i ++) { + f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]); + } + n = (num + s - 1) >> k; + for (i = 0; i < (n >> 1); i ++) { + f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]); + } + if ((n & 1) != 0) { + memmove(z[n >> 1], z[n], sizeof zt); + } + } + + /* + * Invert the final result, and convert all points. + */ + f256_invert(zt, z[0]); + for (i = 0; i < num; i ++) { + f256_montymul(zv, jac[i].z, zt); + f256_montysquare(zu, zv); + f256_montymul(zv, zv, zu); + f256_montymul(aff[i].x, jac[i].x, zu); + f256_montymul(aff[i].y, jac[i].y, zv); + } +} + +/* + * Multiply the provided point by an integer. + * Assumptions: + * - Source point is a valid curve point. + * - Source point is not the point-at-infinity. + * - Integer is not 0, and is lower than the curve order. + * If these conditions are not met, then the result is indeterminate + * (but the process is still constant-time). + */ +static void +p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen) +{ + union { + p256_affine aff[15]; + p256_jacobian jac[15]; + } window; + int i; + + /* + * Compute window, in Jacobian coordinates. + */ + window.jac[0] = *P; + for (i = 2; i < 16; i ++) { + window.jac[i - 1] = window.jac[(i >> 1) - 1]; + if ((i & 1) == 0) { + p256_double(&window.jac[i - 1]); + } else { + p256_add(&window.jac[i - 1], &window.jac[i >> 1]); + } + } + + /* + * Convert the window points to affine coordinates. Point + * window[0] is the source point, already in affine coordinates. + */ + window_to_affine(window.aff, window.jac, 15); + + /* + * Perform point multiplication. + */ + point_mul_inner(P, window.aff, k, klen); +} + +/* + * Precomputed window for the conventional generator: P256_Gwin[n] + * contains (n+1)*G (affine coordinates, in Montgomery representation). + */ +static const p256_affine P256_Gwin[] = { + { + { 0x79E730D418A9143C, 0x75BA95FC5FEDB601, + 0x79FB732B77622510, 0x18905F76A53755C6 }, + { 0xDDF25357CE95560A, 0x8B4AB8E4BA19E45C, + 0xD2E88688DD21F325, 0x8571FF1825885D85 } + }, + { + { 0x850046D410DDD64D, 0xAA6AE3C1A433827D, + 0x732205038D1490D9, 0xF6BB32E43DCF3A3B }, + { 0x2F3648D361BEE1A5, 0x152CD7CBEB236FF8, + 0x19A8FB0E92042DBE, 0x78C577510A5B8A3B } + }, + { + { 0xFFAC3F904EEBC127, 0xB027F84A087D81FB, + 0x66AD77DD87CBBC98, 0x26936A3FB6FF747E }, + { 0xB04C5C1FC983A7EB, 0x583E47AD0861FE1A, + 0x788208311A2EE98E, 0xD5F06A29E587CC07 } + }, + { + { 0x74B0B50D46918DCC, 0x4650A6EDC623C173, + 0x0CDAACACE8100AF2, 0x577362F541B0176B }, + { 0x2D96F24CE4CBABA6, 0x17628471FAD6F447, + 0x6B6C36DEE5DDD22E, 0x84B14C394C5AB863 } + }, + { + { 0xBE1B8AAEC45C61F5, 0x90EC649A94B9537D, + 0x941CB5AAD076C20C, 0xC9079605890523C8 }, + { 0xEB309B4AE7BA4F10, 0x73C568EFE5EB882B, + 0x3540A9877E7A1F68, 0x73A076BB2DD1E916 } + }, + { + { 0x403947373E77664A, 0x55AE744F346CEE3E, + 0xD50A961A5B17A3AD, 0x13074B5954213673 }, + { 0x93D36220D377E44B, 0x299C2B53ADFF14B5, + 0xF424D44CEF639F11, 0xA4C9916D4A07F75F } + }, + { + { 0x0746354EA0173B4F, 0x2BD20213D23C00F7, + 0xF43EAAB50C23BB08, 0x13BA5119C3123E03 }, + { 0x2847D0303F5B9D4D, 0x6742F2F25DA67BDD, + 0xEF933BDC77C94195, 0xEAEDD9156E240867 } + }, + { + { 0x27F14CD19499A78F, 0x462AB5C56F9B3455, + 0x8F90F02AF02CFC6B, 0xB763891EB265230D }, + { 0xF59DA3A9532D4977, 0x21E3327DCF9EBA15, + 0x123C7B84BE60BBF0, 0x56EC12F27706DF76 } + }, + { + { 0x75C96E8F264E20E8, 0xABE6BFED59A7A841, + 0x2CC09C0444C8EB00, 0xE05B3080F0C4E16B }, + { 0x1EB7777AA45F3314, 0x56AF7BEDCE5D45E3, + 0x2B6E019A88B12F1A, 0x086659CDFD835F9B } + }, + { + { 0x2C18DBD19DC21EC8, 0x98F9868A0FCF8139, + 0x737D2CD648250B49, 0xCC61C94724B3428F }, + { 0x0C2B407880DD9E76, 0xC43A8991383FBE08, + 0x5F7D2D65779BE5D2, 0x78719A54EB3B4AB5 } + }, + { + { 0xEA7D260A6245E404, 0x9DE407956E7FDFE0, + 0x1FF3A4158DAC1AB5, 0x3E7090F1649C9073 }, + { 0x1A7685612B944E88, 0x250F939EE57F61C8, + 0x0C0DAA891EAD643D, 0x68930023E125B88E } + }, + { + { 0x04B71AA7D2697768, 0xABDEDEF5CA345A33, + 0x2409D29DEE37385E, 0x4EE1DF77CB83E156 }, + { 0x0CAC12D91CBB5B43, 0x170ED2F6CA895637, + 0x28228CFA8ADE6D66, 0x7FF57C9553238ACA } + }, + { + { 0xCCC425634B2ED709, 0x0E356769856FD30D, + 0xBCBCD43F559E9811, 0x738477AC5395B759 }, + { 0x35752B90C00EE17F, 0x68748390742ED2E3, + 0x7CD06422BD1F5BC1, 0xFBC08769C9E7B797 } + }, + { + { 0xA242A35BB0CF664A, 0x126E48F77F9707E3, + 0x1717BF54C6832660, 0xFAAE7332FD12C72E }, + { 0x27B52DB7995D586B, 0xBE29569E832237C2, + 0xE8E4193E2A65E7DB, 0x152706DC2EAA1BBB } + }, + { + { 0x72BCD8B7BC60055B, 0x03CC23EE56E27E4B, + 0xEE337424E4819370, 0xE2AA0E430AD3DA09 }, + { 0x40B8524F6383C45D, 0xD766355442A41B25, + 0x64EFA6DE778A4797, 0x2042170A7079ADF4 } + } +}; + +/* + * Multiply the conventional generator of the curve by the provided + * integer. Return is written in *P. + * + * Assumptions: + * - Integer is not 0, and is lower than the curve order. + * If this conditions is not met, then the result is indeterminate + * (but the process is still constant-time). + */ +static void +p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen) +{ + point_mul_inner(P, P256_Gwin, k, klen); +} + +/* + * Return 1 if all of the following hold: + * - klen <= 32 + * - k != 0 + * - k is lower than the curve order + * Otherwise, return 0. + * + * Constant-time behaviour: only klen may be observable. + */ +static uint32_t +check_scalar(const unsigned char *k, size_t klen) +{ + uint32_t z; + int32_t c; + size_t u; + + if (klen > 32) { + return 0; + } + z = 0; + for (u = 0; u < klen; u ++) { + z |= k[u]; + } + if (klen == 32) { + c = 0; + for (u = 0; u < klen; u ++) { + c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]); + } + } else { + c = -1; + } + return NEQ(z, 0) & LT0(c); +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *k, size_t klen, int curve) +{ + uint32_t r; + p256_jacobian P; + + (void)curve; + if (Glen != 65) { + return 0; + } + r = check_scalar(k, klen); + r &= point_decode(&P, G); + p256_mul(&P, k, klen); + r &= point_encode(G, &P); + return r; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *k, size_t klen, int curve) +{ + p256_jacobian P; + + (void)curve; + p256_mulgen(&P, k, klen); + point_encode(R, &P); + return 65; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + /* + * We might want to use Shamir's trick here: make a composite + * window of u*P+v*Q points, to merge the two doubling-ladders + * into one. This, however, has some complications: + * + * - During the computation, we may hit the point-at-infinity. + * Thus, we would need p256_add_complete_mixed() (complete + * formulas for point addition), with a higher cost (17 muls + * instead of 11). + * + * - A 4-bit window would be too large, since it would involve + * 16*16-1 = 255 points. For the same window size as in the + * p256_mul() case, we would need to reduce the window size + * to 2 bits, and thus perform twice as many non-doubling + * point additions. + * + * - The window may itself contain the point-at-infinity, and + * thus cannot be in all generality be made of affine points. + * Instead, we would need to make it a window of points in + * Jacobian coordinates. Even p256_add_complete_mixed() would + * be inappropriate. + * + * For these reasons, the code below performs two separate + * point multiplications, then computes the final point addition + * (which is both a "normal" addition, and a doubling, to handle + * all cases). + */ + + p256_jacobian P, Q; + uint32_t r, t, s; + uint64_t z; + + (void)curve; + if (len != 65) { + return 0; + } + r = point_decode(&P, A); + p256_mul(&P, x, xlen); + if (B == NULL) { + p256_mulgen(&Q, y, ylen); + } else { + r &= point_decode(&Q, B); + p256_mul(&Q, y, ylen); + } + + /* + * The final addition may fail in case both points are equal. + */ + t = p256_add(&P, &Q); + f256_final_reduce(P.z); + z = P.z[0] | P.z[1] | P.z[2] | P.z[3]; + s = EQ((uint32_t)(z | (z >> 32)), 0); + p256_double(&Q); + + /* + * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * s = 0, t = 0 return P (normal addition) + * s = 0, t = 1 return P (normal addition) + * s = 1, t = 0 return Q (a 'double' case) + * s = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(s & ~t, &P, &Q, sizeof Q); + point_encode(A, &P); + r &= ~(s & t); + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_p256_m64 = { + (uint32_t)0x00800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_p256_m64_get(void) +{ + return &br_ec_p256_m64; +} + +#else + +/* see bearssl_ec.h */ +const br_ec_impl * +br_ec_p256_m64_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/ec_prime_i15.c b/third_party/bearssl/src/ec_prime_i15.c new file mode 100644 index 0000000..f86dbe6 --- /dev/null +++ b/third_party/bearssl/src/ec_prime_i15.c @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Parameters for supported curves: + * - field modulus p + * - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p) + * - b*R mod p (b is the second curve equation parameter) + */ + +static const uint16_t P256_P[] = { + 0x0111, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF, + 0x7FFF, 0x0001 +}; + +static const uint16_t P256_R2[] = { + 0x0111, + 0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF, + 0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF, + 0x4FFF, 0x0000 +}; + +static const uint16_t P256_B[] = { + 0x0111, + 0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993, + 0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16, + 0x0187, 0x0000 +}; + +static const uint16_t P384_P[] = { + 0x0199, + 0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF, + 0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x01FF +}; + +static const uint16_t P384_R2[] = { + 0x0199, + 0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010, + 0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000, + 0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000 +}; + +static const uint16_t P384_B[] = { + 0x0199, + 0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB, + 0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2, + 0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09, + 0x0452, 0x0084 +}; + +static const uint16_t P521_P[] = { + 0x022B, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x07FF +}; + +static const uint16_t P521_R2[] = { + 0x022B, + 0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000 +}; + +static const uint16_t P521_B[] = { + 0x022B, + 0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC, + 0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F, + 0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD, + 0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4, + 0x1618, 0x27D7, 0x0465 +}; + +typedef struct { + const uint16_t *p; + const uint16_t *b; + const uint16_t *R2; + uint16_t p0i; + size_t point_len; +} curve_params; + +static inline const curve_params * +id_to_curve(int curve) +{ + static const curve_params pp[] = { + { P256_P, P256_B, P256_R2, 0x0001, 65 }, + { P384_P, P384_B, P384_R2, 0x0001, 97 }, + { P521_P, P521_B, P521_R2, 0x0001, 133 } + }; + + return &pp[curve - BR_EC_secp256r1]; +} + +#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) + +/* + * Type for a point in Jacobian coordinates: + * -- three values, x, y and z, in Montgomery representation + * -- affine coordinates are X = x / z^2 and Y = y / z^3 + * -- for the point at infinity, z = 0 + */ +typedef struct { + uint16_t c[3][I15_LEN]; +} jacobian; + +/* + * We use a custom interpreter that uses a dozen registers, and + * only six operations: + * MSET(d, a) copy a into d + * MADD(d, a) d = d+a (modular) + * MSUB(d, a) d = d-a (modular) + * MMUL(d, a, b) d = a*b (Montgomery multiplication) + * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers + * MTZ(d) clear return value if d = 0 + * Destination of MMUL (d) must be distinct from operands (a and b). + * There is no such constraint for MSUB and MADD. + * + * Registers include the operand coordinates, and temporaries. + */ +#define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4)) +#define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4)) +#define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4)) +#define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b)) +#define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b)) +#define MTZ(d) (0x5000 + ((d) << 8)) +#define ENDCODE 0 + +/* + * Registers for the input operands. + */ +#define P1x 0 +#define P1y 1 +#define P1z 2 +#define P2x 3 +#define P2y 4 +#define P2z 5 + +/* + * Alternate names for the first input operand. + */ +#define Px 0 +#define Py 1 +#define Pz 2 + +/* + * Temporaries. + */ +#define t1 6 +#define t2 7 +#define t3 8 +#define t4 9 +#define t5 10 +#define t6 11 +#define t7 12 + +/* + * Extra scratch registers available when there is no second operand (e.g. + * for "double" and "affine"). + */ +#define t8 3 +#define t9 4 +#define t10 5 + +/* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it + * should. This case should not happen anyway, because our curves have + * prime order, and thus do not contain any point of order 2. + * + * If P is infinity (z = 0), then again the formulas yield infinity, + * which is correct. Thus, this code works for all points. + * + * Cost: 8 multiplications + */ +static const uint16_t code_double[] = { + /* + * Compute z^2 (in t1). + */ + MMUL(t1, Pz, Pz), + + /* + * Compute x-z^2 (in t2) and then x+z^2 (in t1). + */ + MSET(t2, Px), + MSUB(t2, t1), + MADD(t1, Px), + + /* + * Compute m = 3*(x+z^2)*(x-z^2) (in t1). + */ + MMUL(t3, t1, t2), + MSET(t1, t3), + MADD(t1, t3), + MADD(t1, t3), + + /* + * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + MMUL(t3, Py, Py), + MADD(t3, t3), + MMUL(t2, Px, t3), + MADD(t2, t2), + + /* + * Compute x' = m^2 - 2*s. + */ + MMUL(Px, t1, t1), + MSUB(Px, t2), + MSUB(Px, t2), + + /* + * Compute z' = 2*y*z. + */ + MMUL(t4, Py, Pz), + MSET(Pz, t4), + MADD(Pz, t4), + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + MSUB(t2, Px), + MMUL(Py, t1, t2), + MMUL(t4, t3, t3), + MSUB(Py, t4), + MSUB(Py, t4), + + ENDCODE +}; + +/* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + * + * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that + * z3 == 0, so the result is correct. + * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is + * not correct. + * h == 0 only if u1 == u2; this happens in two cases: + * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2 + * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity) + * + * Thus, the following situations are not handled correctly: + * -- P1 = 0 and P2 != 0 + * -- P1 != 0 and P2 = 0 + * -- P1 = P2 + * All other cases are properly computed. However, even in "incorrect" + * situations, the three coordinates still are properly formed field + * elements. + * + * The returned flag is cleared if r == 0. This happens in the following + * cases: + * -- Both points are on the same horizontal line (same Y coordinate). + * -- Both points are infinity. + * -- One point is infinity and the other is on line Y = 0. + * The third case cannot happen with our curves (there is no valid point + * on line Y = 0 since that would be a point of order 2). If the two + * source points are non-infinity, then remains only the case where the + * two points are on the same horizontal line. + * + * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and + * P2 != 0: + * -- If the returned value is not the point at infinity, then it was properly + * computed. + * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result + * is indeed the point at infinity. + * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should + * use the 'double' code. + * + * Cost: 16 multiplications + */ +static const uint16_t code_add[] = { + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + MMUL(t3, P2z, P2z), + MMUL(t1, P1x, t3), + MMUL(t4, P2z, t3), + MMUL(t3, P1y, t4), + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + MMUL(t4, P1z, P1z), + MMUL(t2, P2x, t4), + MMUL(t5, P1z, t4), + MMUL(t4, P2y, t5), + + /* + * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4). + */ + MSUB(t2, t1), + MSUB(t4, t3), + + /* + * Report cases where r = 0 through the returned flag. + */ + MTZ(t4), + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5). + */ + MMUL(t7, t2, t2), + MMUL(t6, t1, t7), + MMUL(t5, t7, t2), + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + * t1 and t7 can be used as scratch registers. + */ + MMUL(P1x, t4, t4), + MSUB(P1x, t5), + MSUB(P1x, t6), + MSUB(P1x, t6), + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + MSUB(t6, P1x), + MMUL(P1y, t4, t6), + MMUL(t1, t5, t3), + MSUB(P1y, t1), + + /* + * Compute z3 = h*z1*z2. + */ + MMUL(t1, P1z, P2z), + MMUL(P1z, t1, t2), + + ENDCODE +}; + +/* + * Check that the point is on the curve. This code snippet assumes the + * following conventions: + * -- Coordinates x and y have been freshly decoded in P1 (but not + * converted to Montgomery coordinates yet). + * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1. + */ +static const uint16_t code_check[] = { + + /* Convert x and y to Montgomery representation. */ + MMUL(t1, P1x, P2x), + MMUL(t2, P1y, P2x), + MSET(P1x, t1), + MSET(P1y, t2), + + /* Compute x^3 in t1. */ + MMUL(t2, P1x, P1x), + MMUL(t1, P1x, t2), + + /* Subtract 3*x from t1. */ + MSUB(t1, P1x), + MSUB(t1, P1x), + MSUB(t1, P1x), + + /* Add b. */ + MADD(t1, P2y), + + /* Compute y^2 in t2. */ + MMUL(t2, P1y, P1y), + + /* Compare y^2 with x^3 - 3*x + b; they must match. */ + MSUB(t1, t2), + MTZ(t1), + + /* Set z to 1 (in Montgomery representation). */ + MMUL(P1z, P2x, P2z), + + ENDCODE +}; + +/* + * Conversion back to affine coordinates. This code snippet assumes that + * the z coordinate of P2 is set to 1 (not in Montgomery representation). + */ +static const uint16_t code_affine[] = { + + /* Save z*R in t1. */ + MSET(t1, P1z), + + /* Compute z^3 in t2. */ + MMUL(t2, P1z, P1z), + MMUL(t3, P1z, t2), + MMUL(t2, t3, P2z), + + /* Invert to (1/z^3) in t2. */ + MINV(t2, t3, t4), + + /* Compute y. */ + MSET(t3, P1y), + MMUL(P1y, t2, t3), + + /* Compute (1/z^2) in t3. */ + MMUL(t3, t2, t1), + + /* Compute x. */ + MSET(t2, P1x), + MMUL(P1x, t2, t3), + + ENDCODE +}; + +static uint32_t +run_code(jacobian *P1, const jacobian *P2, + const curve_params *cc, const uint16_t *code) +{ + uint32_t r; + uint16_t t[13][I15_LEN]; + size_t u; + + r = 1; + + /* + * Copy the two operands in the dedicated registers. + */ + memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t)); + memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t)); + + /* + * Run formulas. + */ + for (u = 0;; u ++) { + unsigned op, d, a, b; + + op = code[u]; + if (op == 0) { + break; + } + d = (op >> 8) & 0x0F; + a = (op >> 4) & 0x0F; + b = op & 0x0F; + op >>= 12; + switch (op) { + uint32_t ctl; + size_t plen; + unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3]; + + case 0: + memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t)); + break; + case 1: + ctl = br_i15_add(t[d], t[a], 1); + ctl |= NOT(br_i15_sub(t[d], cc->p, 0)); + br_i15_sub(t[d], cc->p, ctl); + break; + case 2: + br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1)); + break; + case 3: + br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i); + break; + case 4: + plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; + br_i15_encode(tp, plen, cc->p); + tp[plen - 1] -= 2; + br_i15_modpow(t[d], tp, plen, + cc->p, cc->p0i, t[a], t[b]); + break; + default: + r &= ~br_i15_iszero(t[d]); + break; + } + } + + /* + * Copy back result. + */ + memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t)); + return r; +} + +static void +set_one(uint16_t *x, const uint16_t *p) +{ + size_t plen; + + plen = (p[0] + 31) >> 4; + memset(x, 0, plen * sizeof *x); + x[0] = p[0]; + x[1] = 0x0001; +} + +static void +point_zero(jacobian *P, const curve_params *cc) +{ + memset(P, 0, sizeof *P); + P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0]; +} + +static inline void +point_double(jacobian *P, const curve_params *cc) +{ + run_code(P, P, cc, code_double); +} + +static inline uint32_t +point_add(jacobian *P1, const jacobian *P2, const curve_params *cc) +{ + return run_code(P1, P2, cc, code_add); +} + +static void +point_mul(jacobian *P, const unsigned char *x, size_t xlen, + const curve_params *cc) +{ + /* + * We do a simple double-and-add ladder with a 2-bit window + * to make only one add every two doublings. We thus first + * precompute 2P and 3P in some local buffers. + * + * We always perform two doublings and one addition; the + * addition is with P, 2P and 3P and is done in a temporary + * array. + * + * The addition code cannot handle cases where one of the + * operands is infinity, which is the case at the start of the + * ladder. We therefore need to maintain a flag that controls + * this situation. + */ + uint32_t qz; + jacobian P2, P3, Q, T, U; + + memcpy(&P2, P, sizeof P2); + point_double(&P2, cc); + memcpy(&P3, P, sizeof P3); + point_add(&P3, &P2, cc); + + point_zero(&Q, cc); + qz = 1; + while (xlen -- > 0) { + int k; + + for (k = 6; k >= 0; k -= 2) { + uint32_t bits; + uint32_t bnz; + + point_double(&Q, cc); + point_double(&Q, cc); + memcpy(&T, P, sizeof T); + memcpy(&U, &Q, sizeof U); + bits = (*x >> k) & (uint32_t)3; + bnz = NEQ(bits, 0); + CCOPY(EQ(bits, 2), &T, &P2, sizeof T); + CCOPY(EQ(bits, 3), &T, &P3, sizeof T); + point_add(&U, &T, cc); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + } + x ++; + } + memcpy(P, &Q, sizeof Q); +} + +/* + * Decode point into Jacobian coordinates. This function does not support + * the point at infinity. If the point is invalid then this returns 0, but + * the coordinates are still set to properly formed field elements. + */ +static uint32_t +point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc) +{ + /* + * Points must use uncompressed format: + * -- first byte is 0x04; + * -- coordinates X and Y use unsigned big-endian, with the same + * length as the field modulus. + * + * We don't support hybrid format (uncompressed, but first byte + * has value 0x06 or 0x07, depending on the least significant bit + * of Y) because it is rather useless, and explicitly forbidden + * by PKIX (RFC 5480, section 2.2). + * + * We don't support compressed format either, because it is not + * much used in practice (there are or were patent-related + * concerns about point compression, which explains the lack of + * generalised support). Also, point compression support would + * need a bit more code. + */ + const unsigned char *buf; + size_t plen, zlen; + uint32_t r; + jacobian Q; + + buf = src; + point_zero(P, cc); + plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; + if (len != 1 + (plen << 1)) { + return 0; + } + r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p); + r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p); + + /* + * Check first byte. + */ + r &= EQ(buf[0], 0x04); + /* obsolete + r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06) + & ~(uint32_t)(buf[0] ^ buf[plen << 1])); + */ + + /* + * Convert coordinates and check that the point is valid. + */ + zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t); + memcpy(Q.c[0], cc->R2, zlen); + memcpy(Q.c[1], cc->b, zlen); + set_one(Q.c[2], cc->p); + r &= ~run_code(P, &Q, cc, code_check); + return r; +} + +/* + * Encode a point. This method assumes that the point is correct and is + * not the point at infinity. Encoded size is always 1+2*plen, where + * plen is the field modulus length, in bytes. + */ +static void +point_encode(void *dst, const jacobian *P, const curve_params *cc) +{ + unsigned char *buf; + size_t plen; + jacobian Q, T; + + buf = dst; + plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; + buf[0] = 0x04; + memcpy(&Q, P, sizeof *P); + set_one(T.c[2], cc->p); + run_code(&Q, &T, cc, code_affine); + br_i15_encode(buf + 1, plen, Q.c[0]); + br_i15_encode(buf + 1 + plen, plen, Q.c[1]); +} + +static const br_ec_curve_def * +id_to_curve_def(int curve) +{ + switch (curve) { + case BR_EC_secp256r1: + return &br_secp256r1; + case BR_EC_secp384r1: + return &br_secp384r1; + case BR_EC_secp521r1: + return &br_secp521r1; + } + return NULL; +} + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + const br_ec_curve_def *cd; + + cd = id_to_curve_def(curve); + *len = cd->generator_len; + return cd->generator; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + const br_ec_curve_def *cd; + + cd = id_to_curve_def(curve); + *len = cd->order_len; + return cd->order; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + api_generator(curve, len); + *len >>= 1; + return 1; +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *x, size_t xlen, int curve) +{ + uint32_t r; + const curve_params *cc; + jacobian P; + + cc = id_to_curve(curve); + if (Glen != cc->point_len) { + return 0; + } + r = point_decode(&P, G, Glen, cc); + point_mul(&P, x, xlen, cc); + point_encode(G, &P, cc); + return r; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + uint32_t r, t, z; + const curve_params *cc; + jacobian P, Q; + + /* + * TODO: see about merging the two ladders. Right now, we do + * two independent point multiplications, which is a bit + * wasteful of CPU resources (but yields short code). + */ + + cc = id_to_curve(curve); + if (len != cc->point_len) { + return 0; + } + r = point_decode(&P, A, len, cc); + if (B == NULL) { + size_t Glen; + + B = api_generator(curve, &Glen); + } + r &= point_decode(&Q, B, len, cc); + point_mul(&P, x, xlen, cc); + point_mul(&Q, y, ylen, cc); + + /* + * We want to compute P+Q. Since the base points A and B are distinct + * from infinity, and the multipliers are non-zero and lower than the + * curve order, then we know that P and Q are non-infinity. This + * leaves two special situations to test for: + * -- If P = Q then we must use point_double(). + * -- If P+Q = 0 then we must report an error. + */ + t = point_add(&P, &Q, cc); + point_double(&Q, cc); + z = br_i15_iszero(P.c[2]); + + /* + * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * z = 0, t = 0 return P (normal addition) + * z = 0, t = 1 return P (normal addition) + * z = 1, t = 0 return Q (a 'double' case) + * z = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(z & ~t, &P, &Q, sizeof Q); + point_encode(A, &P, cc); + r &= ~(z & t); + + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_prime_i15 = { + (uint32_t)0x03800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_prime_i31.c b/third_party/bearssl/src/ec_prime_i31.c new file mode 100644 index 0000000..b205f36 --- /dev/null +++ b/third_party/bearssl/src/ec_prime_i31.c @@ -0,0 +1,826 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Parameters for supported curves (field modulus, and 'b' equation + * parameter; both values use the 'i31' format, and 'b' is in Montgomery + * representation). + */ + +static const uint32_t P256_P[] = { + 0x00000108, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007, + 0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80, + 0x000000FF +}; + +static const uint32_t P256_R2[] = { + 0x00000108, + 0x00014000, 0x00018000, 0x00000000, 0x7FF40000, + 0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF, + 0x00000000 +}; + +static const uint32_t P256_B[] = { + 0x00000108, + 0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA, + 0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D, + 0x0000000E +}; + +static const uint32_t P384_P[] = { + 0x0000018C, + 0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8, + 0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x00000FFF +}; + +static const uint32_t P384_R2[] = { + 0x0000018C, + 0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF, + 0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF, + 0x00008000, 0x00008000, 0x00000000, 0x00000000, + 0x00000000 +}; + +static const uint32_t P384_B[] = { + 0x0000018C, + 0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C, + 0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B, + 0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE, + 0x000008A5 +}; + +static const uint32_t P521_P[] = { + 0x00000219, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, + 0x01FFFFFF +}; + +static const uint32_t P521_R2[] = { + 0x00000219, + 0x00001000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000 +}; + +static const uint32_t P521_B[] = { + 0x00000219, + 0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A, + 0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F, + 0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366, + 0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393, + 0x00654FAE +}; + +typedef struct { + const uint32_t *p; + const uint32_t *b; + const uint32_t *R2; + uint32_t p0i; + size_t point_len; +} curve_params; + +static inline const curve_params * +id_to_curve(int curve) +{ + static const curve_params pp[] = { + { P256_P, P256_B, P256_R2, 0x00000001, 65 }, + { P384_P, P384_B, P384_R2, 0x00000001, 97 }, + { P521_P, P521_B, P521_R2, 0x00000001, 133 } + }; + + return &pp[curve - BR_EC_secp256r1]; +} + +#define I31_LEN ((BR_MAX_EC_SIZE + 61) / 31) + +/* + * Type for a point in Jacobian coordinates: + * -- three values, x, y and z, in Montgomery representation + * -- affine coordinates are X = x / z^2 and Y = y / z^3 + * -- for the point at infinity, z = 0 + */ +typedef struct { + uint32_t c[3][I31_LEN]; +} jacobian; + +/* + * We use a custom interpreter that uses a dozen registers, and + * only six operations: + * MSET(d, a) copy a into d + * MADD(d, a) d = d+a (modular) + * MSUB(d, a) d = d-a (modular) + * MMUL(d, a, b) d = a*b (Montgomery multiplication) + * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers + * MTZ(d) clear return value if d = 0 + * Destination of MMUL (d) must be distinct from operands (a and b). + * There is no such constraint for MSUB and MADD. + * + * Registers include the operand coordinates, and temporaries. + */ +#define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4)) +#define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4)) +#define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4)) +#define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b)) +#define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b)) +#define MTZ(d) (0x5000 + ((d) << 8)) +#define ENDCODE 0 + +/* + * Registers for the input operands. + */ +#define P1x 0 +#define P1y 1 +#define P1z 2 +#define P2x 3 +#define P2y 4 +#define P2z 5 + +/* + * Alternate names for the first input operand. + */ +#define Px 0 +#define Py 1 +#define Pz 2 + +/* + * Temporaries. + */ +#define t1 6 +#define t2 7 +#define t3 8 +#define t4 9 +#define t5 10 +#define t6 11 +#define t7 12 + +/* + * Extra scratch registers available when there is no second operand (e.g. + * for "double" and "affine"). + */ +#define t8 3 +#define t9 4 +#define t10 5 + +/* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it + * should. This case should not happen anyway, because our curves have + * prime order, and thus do not contain any point of order 2. + * + * If P is infinity (z = 0), then again the formulas yield infinity, + * which is correct. Thus, this code works for all points. + * + * Cost: 8 multiplications + */ +static const uint16_t code_double[] = { + /* + * Compute z^2 (in t1). + */ + MMUL(t1, Pz, Pz), + + /* + * Compute x-z^2 (in t2) and then x+z^2 (in t1). + */ + MSET(t2, Px), + MSUB(t2, t1), + MADD(t1, Px), + + /* + * Compute m = 3*(x+z^2)*(x-z^2) (in t1). + */ + MMUL(t3, t1, t2), + MSET(t1, t3), + MADD(t1, t3), + MADD(t1, t3), + + /* + * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + MMUL(t3, Py, Py), + MADD(t3, t3), + MMUL(t2, Px, t3), + MADD(t2, t2), + + /* + * Compute x' = m^2 - 2*s. + */ + MMUL(Px, t1, t1), + MSUB(Px, t2), + MSUB(Px, t2), + + /* + * Compute z' = 2*y*z. + */ + MMUL(t4, Py, Pz), + MSET(Pz, t4), + MADD(Pz, t4), + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + MSUB(t2, Px), + MMUL(Py, t1, t2), + MMUL(t4, t3, t3), + MSUB(Py, t4), + MSUB(Py, t4), + + ENDCODE +}; + +/* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + * + * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that + * z3 == 0, so the result is correct. + * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is + * not correct. + * h == 0 only if u1 == u2; this happens in two cases: + * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2 + * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity) + * + * Thus, the following situations are not handled correctly: + * -- P1 = 0 and P2 != 0 + * -- P1 != 0 and P2 = 0 + * -- P1 = P2 + * All other cases are properly computed. However, even in "incorrect" + * situations, the three coordinates still are properly formed field + * elements. + * + * The returned flag is cleared if r == 0. This happens in the following + * cases: + * -- Both points are on the same horizontal line (same Y coordinate). + * -- Both points are infinity. + * -- One point is infinity and the other is on line Y = 0. + * The third case cannot happen with our curves (there is no valid point + * on line Y = 0 since that would be a point of order 2). If the two + * source points are non-infinity, then remains only the case where the + * two points are on the same horizontal line. + * + * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and + * P2 != 0: + * -- If the returned value is not the point at infinity, then it was properly + * computed. + * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result + * is indeed the point at infinity. + * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should + * use the 'double' code. + * + * Cost: 16 multiplications + */ +static const uint16_t code_add[] = { + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + MMUL(t3, P2z, P2z), + MMUL(t1, P1x, t3), + MMUL(t4, P2z, t3), + MMUL(t3, P1y, t4), + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + MMUL(t4, P1z, P1z), + MMUL(t2, P2x, t4), + MMUL(t5, P1z, t4), + MMUL(t4, P2y, t5), + + /* + * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4). + */ + MSUB(t2, t1), + MSUB(t4, t3), + + /* + * Report cases where r = 0 through the returned flag. + */ + MTZ(t4), + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5). + */ + MMUL(t7, t2, t2), + MMUL(t6, t1, t7), + MMUL(t5, t7, t2), + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + * t1 and t7 can be used as scratch registers. + */ + MMUL(P1x, t4, t4), + MSUB(P1x, t5), + MSUB(P1x, t6), + MSUB(P1x, t6), + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + MSUB(t6, P1x), + MMUL(P1y, t4, t6), + MMUL(t1, t5, t3), + MSUB(P1y, t1), + + /* + * Compute z3 = h*z1*z2. + */ + MMUL(t1, P1z, P2z), + MMUL(P1z, t1, t2), + + ENDCODE +}; + +/* + * Check that the point is on the curve. This code snippet assumes the + * following conventions: + * -- Coordinates x and y have been freshly decoded in P1 (but not + * converted to Montgomery coordinates yet). + * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1. + */ +static const uint16_t code_check[] = { + + /* Convert x and y to Montgomery representation. */ + MMUL(t1, P1x, P2x), + MMUL(t2, P1y, P2x), + MSET(P1x, t1), + MSET(P1y, t2), + + /* Compute x^3 in t1. */ + MMUL(t2, P1x, P1x), + MMUL(t1, P1x, t2), + + /* Subtract 3*x from t1. */ + MSUB(t1, P1x), + MSUB(t1, P1x), + MSUB(t1, P1x), + + /* Add b. */ + MADD(t1, P2y), + + /* Compute y^2 in t2. */ + MMUL(t2, P1y, P1y), + + /* Compare y^2 with x^3 - 3*x + b; they must match. */ + MSUB(t1, t2), + MTZ(t1), + + /* Set z to 1 (in Montgomery representation). */ + MMUL(P1z, P2x, P2z), + + ENDCODE +}; + +/* + * Conversion back to affine coordinates. This code snippet assumes that + * the z coordinate of P2 is set to 1 (not in Montgomery representation). + */ +static const uint16_t code_affine[] = { + + /* Save z*R in t1. */ + MSET(t1, P1z), + + /* Compute z^3 in t2. */ + MMUL(t2, P1z, P1z), + MMUL(t3, P1z, t2), + MMUL(t2, t3, P2z), + + /* Invert to (1/z^3) in t2. */ + MINV(t2, t3, t4), + + /* Compute y. */ + MSET(t3, P1y), + MMUL(P1y, t2, t3), + + /* Compute (1/z^2) in t3. */ + MMUL(t3, t2, t1), + + /* Compute x. */ + MSET(t2, P1x), + MMUL(P1x, t2, t3), + + ENDCODE +}; + +static uint32_t +run_code(jacobian *P1, const jacobian *P2, + const curve_params *cc, const uint16_t *code) +{ + uint32_t r; + uint32_t t[13][I31_LEN]; + size_t u; + + r = 1; + + /* + * Copy the two operands in the dedicated registers. + */ + memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t)); + memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t)); + + /* + * Run formulas. + */ + for (u = 0;; u ++) { + unsigned op, d, a, b; + + op = code[u]; + if (op == 0) { + break; + } + d = (op >> 8) & 0x0F; + a = (op >> 4) & 0x0F; + b = op & 0x0F; + op >>= 12; + switch (op) { + uint32_t ctl; + size_t plen; + unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3]; + + case 0: + memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t)); + break; + case 1: + ctl = br_i31_add(t[d], t[a], 1); + ctl |= NOT(br_i31_sub(t[d], cc->p, 0)); + br_i31_sub(t[d], cc->p, ctl); + break; + case 2: + br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1)); + break; + case 3: + br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i); + break; + case 4: + plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3; + br_i31_encode(tp, plen, cc->p); + tp[plen - 1] -= 2; + br_i31_modpow(t[d], tp, plen, + cc->p, cc->p0i, t[a], t[b]); + break; + default: + r &= ~br_i31_iszero(t[d]); + break; + } + } + + /* + * Copy back result. + */ + memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t)); + return r; +} + +static void +set_one(uint32_t *x, const uint32_t *p) +{ + size_t plen; + + plen = (p[0] + 63) >> 5; + memset(x, 0, plen * sizeof *x); + x[0] = p[0]; + x[1] = 0x00000001; +} + +static void +point_zero(jacobian *P, const curve_params *cc) +{ + memset(P, 0, sizeof *P); + P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0]; +} + +static inline void +point_double(jacobian *P, const curve_params *cc) +{ + run_code(P, P, cc, code_double); +} + +static inline uint32_t +point_add(jacobian *P1, const jacobian *P2, const curve_params *cc) +{ + return run_code(P1, P2, cc, code_add); +} + +static void +point_mul(jacobian *P, const unsigned char *x, size_t xlen, + const curve_params *cc) +{ + /* + * We do a simple double-and-add ladder with a 2-bit window + * to make only one add every two doublings. We thus first + * precompute 2P and 3P in some local buffers. + * + * We always perform two doublings and one addition; the + * addition is with P, 2P and 3P and is done in a temporary + * array. + * + * The addition code cannot handle cases where one of the + * operands is infinity, which is the case at the start of the + * ladder. We therefore need to maintain a flag that controls + * this situation. + */ + uint32_t qz; + jacobian P2, P3, Q, T, U; + + memcpy(&P2, P, sizeof P2); + point_double(&P2, cc); + memcpy(&P3, P, sizeof P3); + point_add(&P3, &P2, cc); + + point_zero(&Q, cc); + qz = 1; + while (xlen -- > 0) { + int k; + + for (k = 6; k >= 0; k -= 2) { + uint32_t bits; + uint32_t bnz; + + point_double(&Q, cc); + point_double(&Q, cc); + memcpy(&T, P, sizeof T); + memcpy(&U, &Q, sizeof U); + bits = (*x >> k) & (uint32_t)3; + bnz = NEQ(bits, 0); + CCOPY(EQ(bits, 2), &T, &P2, sizeof T); + CCOPY(EQ(bits, 3), &T, &P3, sizeof T); + point_add(&U, &T, cc); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + } + x ++; + } + memcpy(P, &Q, sizeof Q); +} + +/* + * Decode point into Jacobian coordinates. This function does not support + * the point at infinity. If the point is invalid then this returns 0, but + * the coordinates are still set to properly formed field elements. + */ +static uint32_t +point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc) +{ + /* + * Points must use uncompressed format: + * -- first byte is 0x04; + * -- coordinates X and Y use unsigned big-endian, with the same + * length as the field modulus. + * + * We don't support hybrid format (uncompressed, but first byte + * has value 0x06 or 0x07, depending on the least significant bit + * of Y) because it is rather useless, and explicitly forbidden + * by PKIX (RFC 5480, section 2.2). + * + * We don't support compressed format either, because it is not + * much used in practice (there are or were patent-related + * concerns about point compression, which explains the lack of + * generalised support). Also, point compression support would + * need a bit more code. + */ + const unsigned char *buf; + size_t plen, zlen; + uint32_t r; + jacobian Q; + + buf = src; + point_zero(P, cc); + plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3; + if (len != 1 + (plen << 1)) { + return 0; + } + r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p); + r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p); + + /* + * Check first byte. + */ + r &= EQ(buf[0], 0x04); + /* obsolete + r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06) + & ~(uint32_t)(buf[0] ^ buf[plen << 1])); + */ + + /* + * Convert coordinates and check that the point is valid. + */ + zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t); + memcpy(Q.c[0], cc->R2, zlen); + memcpy(Q.c[1], cc->b, zlen); + set_one(Q.c[2], cc->p); + r &= ~run_code(P, &Q, cc, code_check); + return r; +} + +/* + * Encode a point. This method assumes that the point is correct and is + * not the point at infinity. Encoded size is always 1+2*plen, where + * plen is the field modulus length, in bytes. + */ +static void +point_encode(void *dst, const jacobian *P, const curve_params *cc) +{ + unsigned char *buf; + uint32_t xbl; + size_t plen; + jacobian Q, T; + + buf = dst; + xbl = cc->p[0]; + xbl -= (xbl >> 5); + plen = (xbl + 7) >> 3; + buf[0] = 0x04; + memcpy(&Q, P, sizeof *P); + set_one(T.c[2], cc->p); + run_code(&Q, &T, cc, code_affine); + br_i31_encode(buf + 1, plen, Q.c[0]); + br_i31_encode(buf + 1 + plen, plen, Q.c[1]); +} + +static const br_ec_curve_def * +id_to_curve_def(int curve) +{ + switch (curve) { + case BR_EC_secp256r1: + return &br_secp256r1; + case BR_EC_secp384r1: + return &br_secp384r1; + case BR_EC_secp521r1: + return &br_secp521r1; + } + return NULL; +} + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + const br_ec_curve_def *cd; + + cd = id_to_curve_def(curve); + *len = cd->generator_len; + return cd->generator; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + const br_ec_curve_def *cd; + + cd = id_to_curve_def(curve); + *len = cd->order_len; + return cd->order; +} + +static size_t +api_xoff(int curve, size_t *len) +{ + api_generator(curve, len); + *len >>= 1; + return 1; +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *x, size_t xlen, int curve) +{ + uint32_t r; + const curve_params *cc; + jacobian P; + + cc = id_to_curve(curve); + if (Glen != cc->point_len) { + return 0; + } + r = point_decode(&P, G, Glen, cc); + point_mul(&P, x, xlen, cc); + point_encode(G, &P, cc); + return r; +} + +static size_t +api_mulgen(unsigned char *R, + const unsigned char *x, size_t xlen, int curve) +{ + const unsigned char *G; + size_t Glen; + + G = api_generator(curve, &Glen); + memcpy(R, G, Glen); + api_mul(R, Glen, x, xlen, curve); + return Glen; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + uint32_t r, t, z; + const curve_params *cc; + jacobian P, Q; + + /* + * TODO: see about merging the two ladders. Right now, we do + * two independent point multiplications, which is a bit + * wasteful of CPU resources (but yields short code). + */ + + cc = id_to_curve(curve); + if (len != cc->point_len) { + return 0; + } + r = point_decode(&P, A, len, cc); + if (B == NULL) { + size_t Glen; + + B = api_generator(curve, &Glen); + } + r &= point_decode(&Q, B, len, cc); + point_mul(&P, x, xlen, cc); + point_mul(&Q, y, ylen, cc); + + /* + * We want to compute P+Q. Since the base points A and B are distinct + * from infinity, and the multipliers are non-zero and lower than the + * curve order, then we know that P and Q are non-infinity. This + * leaves two special situations to test for: + * -- If P = Q then we must use point_double(). + * -- If P+Q = 0 then we must report an error. + */ + t = point_add(&P, &Q, cc); + point_double(&Q, cc); + z = br_i31_iszero(P.c[2]); + + /* + * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * z = 0, t = 0 return P (normal addition) + * z = 0, t = 1 return P (normal addition) + * z = 1, t = 0 return Q (a 'double' case) + * z = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(z & ~t, &P, &Q, sizeof Q); + point_encode(A, &P, cc); + r &= ~(z & t); + + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_prime_i31 = { + (uint32_t)0x03800000, + &api_generator, + &api_order, + &api_xoff, + &api_mul, + &api_mulgen, + &api_muladd +}; diff --git a/third_party/bearssl/src/ec_pubkey.c b/third_party/bearssl/src/ec_pubkey.c new file mode 100644 index 0000000..383ff28 --- /dev/null +++ b/third_party/bearssl/src/ec_pubkey.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char POINT_LEN[] = { + 0, /* 0: not a valid curve ID */ + 43, /* sect163k1 */ + 43, /* sect163r1 */ + 43, /* sect163r2 */ + 51, /* sect193r1 */ + 51, /* sect193r2 */ + 61, /* sect233k1 */ + 61, /* sect233r1 */ + 61, /* sect239k1 */ + 73, /* sect283k1 */ + 73, /* sect283r1 */ + 105, /* sect409k1 */ + 105, /* sect409r1 */ + 145, /* sect571k1 */ + 145, /* sect571r1 */ + 41, /* secp160k1 */ + 41, /* secp160r1 */ + 41, /* secp160r2 */ + 49, /* secp192k1 */ + 49, /* secp192r1 */ + 57, /* secp224k1 */ + 57, /* secp224r1 */ + 65, /* secp256k1 */ + 65, /* secp256r1 */ + 97, /* secp384r1 */ + 133, /* secp521r1 */ + 65, /* brainpoolP256r1 */ + 97, /* brainpoolP384r1 */ + 129, /* brainpoolP512r1 */ + 32, /* curve25519 */ + 56, /* curve448 */ +}; + +/* see bearssl_ec.h */ +size_t +br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk, + void *kbuf, const br_ec_private_key *sk) +{ + int curve; + size_t len; + + curve = sk->curve; + if (curve < 0 || curve >= 32 || curve >= (int)(sizeof POINT_LEN) + || ((impl->supported_curves >> curve) & 1) == 0) + { + return 0; + } + if (kbuf == NULL) { + return POINT_LEN[curve]; + } + len = impl->mulgen(kbuf, sk->x, sk->xlen, curve); + if (pk != NULL) { + pk->curve = curve; + pk->q = kbuf; + pk->qlen = len; + } + return len; +} diff --git a/third_party/bearssl/src/ec_secp256r1.c b/third_party/bearssl/src/ec_secp256r1.c new file mode 100644 index 0000000..a9d6c45 --- /dev/null +++ b/third_party/bearssl/src/ec_secp256r1.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char P256_N[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, + 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51 +}; + +static const unsigned char P256_G[] = { + 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, + 0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, + 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, + 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, + 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, + 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, + 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, + 0xCE, 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, + 0xF5 +}; + +/* see inner.h */ +const br_ec_curve_def br_secp256r1 = { + BR_EC_secp256r1, + P256_N, sizeof P256_N, + P256_G, sizeof P256_G +}; diff --git a/third_party/bearssl/src/ec_secp384r1.c b/third_party/bearssl/src/ec_secp384r1.c new file mode 100644 index 0000000..693d93e --- /dev/null +++ b/third_party/bearssl/src/ec_secp384r1.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char P384_N[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, + 0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A, + 0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73 +}; + +static const unsigned char P384_G[] = { + 0x04, 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, + 0x37, 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD, + 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, + 0x98, 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, + 0x38, 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29, + 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, + 0xB7, 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, + 0x6F, 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC, + 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, + 0x7C, 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, + 0xC0, 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81, + 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, + 0x5F +}; + +/* see inner.h */ +const br_ec_curve_def br_secp384r1 = { + BR_EC_secp384r1, + P384_N, sizeof P384_N, + P384_G, sizeof P384_G +}; diff --git a/third_party/bearssl/src/ec_secp521r1.c b/third_party/bearssl/src/ec_secp521r1.c new file mode 100644 index 0000000..161acd0 --- /dev/null +++ b/third_party/bearssl/src/ec_secp521r1.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const unsigned char P521_N[] = { + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F, + 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09, + 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, + 0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38, + 0x64, 0x09 +}; + +static const unsigned char P521_G[] = { + 0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, + 0x04, 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23, + 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, + 0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, + 0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF, + 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, + 0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, + 0x6A, 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2, + 0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A, + 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F, + 0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, + 0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD, + 0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72, + 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9, + 0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70, + 0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94, + 0x76, 0x9F, 0xD1, 0x66, 0x50 +}; + +/* see inner.h */ +const br_ec_curve_def br_secp521r1 = { + BR_EC_secp521r1, + P521_N, sizeof P521_N, + P521_G, sizeof P521_G +}; diff --git a/third_party/bearssl/src/ecdsa_atr.c b/third_party/bearssl/src/ecdsa_atr.c new file mode 100644 index 0000000..3a11226 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_atr.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +size_t +br_ecdsa_asn1_to_raw(void *sig, size_t sig_len) +{ + /* + * Note: this code is a bit lenient in that it accepts a few + * deviations to DER with regards to minimality of encoding of + * lengths and integer values. These deviations are still + * unambiguous. + * + * Signature format is a SEQUENCE of two INTEGER values. We + * support only integers of less than 127 bytes each (signed + * encoding) so the resulting raw signature will have length + * at most 254 bytes. + */ + + unsigned char *buf, *r, *s; + size_t zlen, rlen, slen, off; + unsigned char tmp[254]; + + buf = sig; + if (sig_len < 8) { + return 0; + } + + /* + * First byte is SEQUENCE tag. + */ + if (buf[0] != 0x30) { + return 0; + } + + /* + * The SEQUENCE length will be encoded over one or two bytes. We + * limit the total SEQUENCE contents to 255 bytes, because it + * makes things simpler; this is enough for subgroup orders up + * to 999 bits. + */ + zlen = buf[1]; + if (zlen > 0x80) { + if (zlen != 0x81) { + return 0; + } + zlen = buf[2]; + if (zlen != sig_len - 3) { + return 0; + } + off = 3; + } else { + if (zlen != sig_len - 2) { + return 0; + } + off = 2; + } + + /* + * First INTEGER (r). + */ + if (buf[off ++] != 0x02) { + return 0; + } + rlen = buf[off ++]; + if (rlen >= 0x80) { + return 0; + } + r = buf + off; + off += rlen; + + /* + * Second INTEGER (s). + */ + if (off + 2 > sig_len) { + return 0; + } + if (buf[off ++] != 0x02) { + return 0; + } + slen = buf[off ++]; + if (slen >= 0x80 || slen != sig_len - off) { + return 0; + } + s = buf + off; + + /* + * Removing leading zeros from r and s. + */ + while (rlen > 0 && *r == 0) { + rlen --; + r ++; + } + while (slen > 0 && *s == 0) { + slen --; + s ++; + } + + /* + * Compute common length for the two integers, then copy integers + * into the temporary buffer, and finally copy it back over the + * signature buffer. + */ + zlen = rlen > slen ? rlen : slen; + sig_len = zlen << 1; + memset(tmp, 0, sig_len); + memcpy(tmp + zlen - rlen, r, rlen); + memcpy(tmp + sig_len - slen, s, slen); + memcpy(sig, tmp, sig_len); + return sig_len; +} diff --git a/third_party/bearssl/src/ecdsa_default_sign_asn1.c b/third_party/bearssl/src/ecdsa_default_sign_asn1.c new file mode 100644 index 0000000..afbf8ac --- /dev/null +++ b/third_party/bearssl/src/ecdsa_default_sign_asn1.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +br_ecdsa_sign +br_ecdsa_sign_asn1_get_default(void) +{ +#if BR_LOMUL + return &br_ecdsa_i15_sign_asn1; +#else + return &br_ecdsa_i31_sign_asn1; +#endif +} diff --git a/third_party/bearssl/src/ecdsa_default_sign_raw.c b/third_party/bearssl/src/ecdsa_default_sign_raw.c new file mode 100644 index 0000000..287c970 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_default_sign_raw.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +br_ecdsa_sign +br_ecdsa_sign_raw_get_default(void) +{ +#if BR_LOMUL + return &br_ecdsa_i15_sign_raw; +#else + return &br_ecdsa_i31_sign_raw; +#endif +} diff --git a/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c new file mode 100644 index 0000000..fe0996e --- /dev/null +++ b/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +br_ecdsa_vrfy +br_ecdsa_vrfy_asn1_get_default(void) +{ +#if BR_LOMUL + return &br_ecdsa_i15_vrfy_asn1; +#else + return &br_ecdsa_i31_vrfy_asn1; +#endif +} diff --git a/third_party/bearssl/src/ecdsa_default_vrfy_raw.c b/third_party/bearssl/src/ecdsa_default_vrfy_raw.c new file mode 100644 index 0000000..e564a10 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_default_vrfy_raw.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ec.h */ +br_ecdsa_vrfy +br_ecdsa_vrfy_raw_get_default(void) +{ +#if BR_LOMUL + return &br_ecdsa_i15_vrfy_raw; +#else + return &br_ecdsa_i31_vrfy_raw; +#endif +} diff --git a/third_party/bearssl/src/ecdsa_i15_bits.c b/third_party/bearssl/src/ecdsa_i15_bits.c new file mode 100644 index 0000000..402d14a --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i15_bits.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_ecdsa_i15_bits2int(uint16_t *x, + const void *src, size_t len, uint32_t ebitlen) +{ + uint32_t bitlen, hbitlen; + int sc; + + bitlen = ebitlen - (ebitlen >> 4); + hbitlen = (uint32_t)len << 3; + if (hbitlen > bitlen) { + len = (bitlen + 7) >> 3; + sc = (int)((hbitlen - bitlen) & 7); + } else { + sc = 0; + } + br_i15_zero(x, ebitlen); + br_i15_decode(x, src, len); + br_i15_rshift(x, sc); + x[0] = ebitlen; +} diff --git a/third_party/bearssl/src/ecdsa_i15_sign_asn1.c b/third_party/bearssl/src/ecdsa_i15_sign_asn1.c new file mode 100644 index 0000000..ab4a283 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i15_sign_asn1.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define ORDER_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +size_t +br_ecdsa_i15_sign_asn1(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig) +{ + unsigned char rsig[(ORDER_LEN << 1) + 12]; + size_t sig_len; + + sig_len = br_ecdsa_i15_sign_raw(impl, hf, hash_value, sk, rsig); + if (sig_len == 0) { + return 0; + } + sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len); + memcpy(sig, rsig, sig_len); + return sig_len; +} diff --git a/third_party/bearssl/src/ecdsa_i15_sign_raw.c b/third_party/bearssl/src/ecdsa_i15_sign_raw.c new file mode 100644 index 0000000..39b2e1d --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i15_sign_raw.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) +#define POINT_LEN (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1)) +#define ORDER_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +size_t +br_ecdsa_i15_sign_raw(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig) +{ + /* + * IMPORTANT: this code is fit only for curves with a prime + * order. This is needed so that modular reduction of the X + * coordinate of a point can be done with a simple subtraction. + * We also rely on the last byte of the curve order to be distinct + * from 0 and 1. + */ + const br_ec_curve_def *cd; + uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], x[I15_LEN]; + uint16_t m[I15_LEN], k[I15_LEN], t1[I15_LEN], t2[I15_LEN]; + unsigned char tt[ORDER_LEN << 1]; + unsigned char eU[POINT_LEN]; + size_t hash_len, nlen, ulen; + uint16_t n0i; + uint32_t ctl; + br_hmac_drbg_context drbg; + + /* + * If the curve is not supported, then exit with an error. + */ + if (((impl->supported_curves >> sk->curve) & 1) == 0) { + return 0; + } + + /* + * Get the curve parameters (generator and order). + */ + switch (sk->curve) { + case BR_EC_secp256r1: + cd = &br_secp256r1; + break; + case BR_EC_secp384r1: + cd = &br_secp384r1; + break; + case BR_EC_secp521r1: + cd = &br_secp521r1; + break; + default: + return 0; + } + + /* + * Get modulus. + */ + nlen = cd->order_len; + br_i15_decode(n, cd->order, nlen); + n0i = br_i15_ninv15(n[1]); + + /* + * Get private key as an i15 integer. This also checks that the + * private key is well-defined (not zero, and less than the + * curve order). + */ + if (!br_i15_decode_mod(x, sk->x, sk->xlen, n)) { + return 0; + } + if (br_i15_iszero(x)) { + return 0; + } + + /* + * Get hash length. + */ + hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK; + + /* + * Truncate and reduce the hash value modulo the curve order. + */ + br_ecdsa_i15_bits2int(m, hash_value, hash_len, n[0]); + br_i15_sub(m, n, br_i15_sub(m, n, 0) ^ 1); + + /* + * RFC 6979 generation of the "k" value. + * + * The process uses HMAC_DRBG (with the hash function used to + * process the message that is to be signed). The seed is the + * concatenation of the encodings of the private key and + * the hash value (after truncation and modular reduction). + */ + br_i15_encode(tt, nlen, x); + br_i15_encode(tt + nlen, nlen, m); + br_hmac_drbg_init(&drbg, hf, tt, nlen << 1); + for (;;) { + br_hmac_drbg_generate(&drbg, tt, nlen); + br_ecdsa_i15_bits2int(k, tt, nlen, n[0]); + if (br_i15_iszero(k)) { + continue; + } + if (br_i15_sub(k, n, 0)) { + break; + } + } + + /* + * Compute k*G and extract the X coordinate, then reduce it + * modulo the curve order. Since we support only curves with + * prime order, that reduction is only a matter of computing + * a subtraction. + */ + br_i15_encode(tt, nlen, k); + ulen = impl->mulgen(eU, tt, nlen, sk->curve); + br_i15_zero(r, n[0]); + br_i15_decode(r, &eU[1], ulen >> 1); + r[0] = n[0]; + br_i15_sub(r, n, br_i15_sub(r, n, 0) ^ 1); + + /* + * Compute 1/k in double-Montgomery representation. We do so by + * first converting _from_ Montgomery representation (twice), + * then using a modular exponentiation. + */ + br_i15_from_monty(k, n, n0i); + br_i15_from_monty(k, n, n0i); + memcpy(tt, cd->order, nlen); + tt[nlen - 1] -= 2; + br_i15_modpow(k, tt, nlen, n, n0i, t1, t2); + + /* + * Compute s = (m+xr)/k (mod n). + * The k[] array contains R^2/k (double-Montgomery representation); + * we thus can use direct Montgomery multiplications and conversions + * from Montgomery, avoiding any call to br_i15_to_monty() (which + * is slower). + */ + br_i15_from_monty(m, n, n0i); + br_i15_montymul(t1, x, r, n, n0i); + ctl = br_i15_add(t1, m, 1); + ctl |= br_i15_sub(t1, n, 0) ^ 1; + br_i15_sub(t1, n, ctl); + br_i15_montymul(s, t1, k, n, n0i); + + /* + * Encode r and s in the signature. + */ + br_i15_encode(sig, nlen, r); + br_i15_encode((unsigned char *)sig + nlen, nlen, s); + return nlen << 1; +} diff --git a/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c new file mode 100644 index 0000000..f4bef99 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define FIELD_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +uint32_t +br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, + const void *sig, size_t sig_len) +{ + /* + * We use a double-sized buffer because a malformed ASN.1 signature + * may trigger a size expansion when converting to "raw" format. + */ + unsigned char rsig[(FIELD_LEN << 2) + 24]; + + if (sig_len > ((sizeof rsig) >> 1)) { + return 0; + } + memcpy(rsig, sig, sig_len); + sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len); + return br_ecdsa_i15_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len); +} diff --git a/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c b/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c new file mode 100644 index 0000000..14dd5e4 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) +#define POINT_LEN (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1)) + +/* see bearssl_ec.h */ +uint32_t +br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, + const void *sig, size_t sig_len) +{ + /* + * IMPORTANT: this code is fit only for curves with a prime + * order. This is needed so that modular reduction of the X + * coordinate of a point can be done with a simple subtraction. + */ + const br_ec_curve_def *cd; + uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], t1[I15_LEN], t2[I15_LEN]; + unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3]; + unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3]; + unsigned char eU[POINT_LEN]; + size_t nlen, rlen, ulen; + uint16_t n0i; + uint32_t res; + + /* + * If the curve is not supported, then report an error. + */ + if (((impl->supported_curves >> pk->curve) & 1) == 0) { + return 0; + } + + /* + * Get the curve parameters (generator and order). + */ + switch (pk->curve) { + case BR_EC_secp256r1: + cd = &br_secp256r1; + break; + case BR_EC_secp384r1: + cd = &br_secp384r1; + break; + case BR_EC_secp521r1: + cd = &br_secp521r1; + break; + default: + return 0; + } + + /* + * Signature length must be even. + */ + if (sig_len & 1) { + return 0; + } + rlen = sig_len >> 1; + + /* + * Public key point must have the proper size for this curve. + */ + if (pk->qlen != cd->generator_len) { + return 0; + } + + /* + * Get modulus; then decode the r and s values. They must be + * lower than the modulus, and s must not be null. + */ + nlen = cd->order_len; + br_i15_decode(n, cd->order, nlen); + n0i = br_i15_ninv15(n[1]); + if (!br_i15_decode_mod(r, sig, rlen, n)) { + return 0; + } + if (!br_i15_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) { + return 0; + } + if (br_i15_iszero(s)) { + return 0; + } + + /* + * Invert s. We do that with a modular exponentiation; we use + * the fact that for all the curves we support, the least + * significant byte is not 0 or 1, so we can subtract 2 without + * any carry to process. + * We also want 1/s in Montgomery representation, which can be + * done by converting _from_ Montgomery representation before + * the inversion (because (1/s)*R = 1/(s/R)). + */ + br_i15_from_monty(s, n, n0i); + memcpy(tx, cd->order, nlen); + tx[nlen - 1] -= 2; + br_i15_modpow(s, tx, nlen, n, n0i, t1, t2); + + /* + * Truncate the hash to the modulus length (in bits) and reduce + * it modulo the curve order. The modular reduction can be done + * with a subtraction since the truncation already reduced the + * value to the modulus bit length. + */ + br_ecdsa_i15_bits2int(t1, hash, hash_len, n[0]); + br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1); + + /* + * Multiply the (truncated, reduced) hash value with 1/s, result in + * t2, encoded in ty. + */ + br_i15_montymul(t2, t1, s, n, n0i); + br_i15_encode(ty, nlen, t2); + + /* + * Multiply r with 1/s, result in t1, encoded in tx. + */ + br_i15_montymul(t1, r, s, n, n0i); + br_i15_encode(tx, nlen, t1); + + /* + * Compute the point x*Q + y*G. + */ + ulen = cd->generator_len; + memcpy(eU, pk->q, ulen); + res = impl->muladd(eU, NULL, ulen, + tx, nlen, ty, nlen, cd->curve); + + /* + * Get the X coordinate, reduce modulo the curve order, and + * compare with the 'r' value. + * + * The modular reduction can be done with subtractions because + * we work with curves of prime order, so the curve order is + * close to the field order (Hasse's theorem). + */ + br_i15_zero(t1, n[0]); + br_i15_decode(t1, &eU[1], ulen >> 1); + t1[0] = n[0]; + br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1); + res &= ~br_i15_sub(t1, r, 1); + res &= br_i15_iszero(t1); + return res; +} diff --git a/third_party/bearssl/src/ecdsa_i31_bits.c b/third_party/bearssl/src/ecdsa_i31_bits.c new file mode 100644 index 0000000..9a8d673 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i31_bits.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_ecdsa_i31_bits2int(uint32_t *x, + const void *src, size_t len, uint32_t ebitlen) +{ + uint32_t bitlen, hbitlen; + int sc; + + bitlen = ebitlen - (ebitlen >> 5); + hbitlen = (uint32_t)len << 3; + if (hbitlen > bitlen) { + len = (bitlen + 7) >> 3; + sc = (int)((hbitlen - bitlen) & 7); + } else { + sc = 0; + } + br_i31_zero(x, ebitlen); + br_i31_decode(x, src, len); + br_i31_rshift(x, sc); + x[0] = ebitlen; +} diff --git a/third_party/bearssl/src/ecdsa_i31_sign_asn1.c b/third_party/bearssl/src/ecdsa_i31_sign_asn1.c new file mode 100644 index 0000000..cf0d351 --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i31_sign_asn1.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define ORDER_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +size_t +br_ecdsa_i31_sign_asn1(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig) +{ + unsigned char rsig[(ORDER_LEN << 1) + 12]; + size_t sig_len; + + sig_len = br_ecdsa_i31_sign_raw(impl, hf, hash_value, sk, rsig); + if (sig_len == 0) { + return 0; + } + sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len); + memcpy(sig, rsig, sig_len); + return sig_len; +} diff --git a/third_party/bearssl/src/ecdsa_i31_sign_raw.c b/third_party/bearssl/src/ecdsa_i31_sign_raw.c new file mode 100644 index 0000000..1df98fe --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i31_sign_raw.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define I31_LEN ((BR_MAX_EC_SIZE + 61) / 31) +#define POINT_LEN (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1)) +#define ORDER_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +size_t +br_ecdsa_i31_sign_raw(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig) +{ + /* + * IMPORTANT: this code is fit only for curves with a prime + * order. This is needed so that modular reduction of the X + * coordinate of a point can be done with a simple subtraction. + * We also rely on the last byte of the curve order to be distinct + * from 0 and 1. + */ + const br_ec_curve_def *cd; + uint32_t n[I31_LEN], r[I31_LEN], s[I31_LEN], x[I31_LEN]; + uint32_t m[I31_LEN], k[I31_LEN], t1[I31_LEN], t2[I31_LEN]; + unsigned char tt[ORDER_LEN << 1]; + unsigned char eU[POINT_LEN]; + size_t hash_len, nlen, ulen; + uint32_t n0i, ctl; + br_hmac_drbg_context drbg; + + /* + * If the curve is not supported, then exit with an error. + */ + if (((impl->supported_curves >> sk->curve) & 1) == 0) { + return 0; + } + + /* + * Get the curve parameters (generator and order). + */ + switch (sk->curve) { + case BR_EC_secp256r1: + cd = &br_secp256r1; + break; + case BR_EC_secp384r1: + cd = &br_secp384r1; + break; + case BR_EC_secp521r1: + cd = &br_secp521r1; + break; + default: + return 0; + } + + /* + * Get modulus. + */ + nlen = cd->order_len; + br_i31_decode(n, cd->order, nlen); + n0i = br_i31_ninv31(n[1]); + + /* + * Get private key as an i31 integer. This also checks that the + * private key is well-defined (not zero, and less than the + * curve order). + */ + if (!br_i31_decode_mod(x, sk->x, sk->xlen, n)) { + return 0; + } + if (br_i31_iszero(x)) { + return 0; + } + + /* + * Get hash length. + */ + hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK; + + /* + * Truncate and reduce the hash value modulo the curve order. + */ + br_ecdsa_i31_bits2int(m, hash_value, hash_len, n[0]); + br_i31_sub(m, n, br_i31_sub(m, n, 0) ^ 1); + + /* + * RFC 6979 generation of the "k" value. + * + * The process uses HMAC_DRBG (with the hash function used to + * process the message that is to be signed). The seed is the + * concatenation of the encodings of the private key and + * the hash value (after truncation and modular reduction). + */ + br_i31_encode(tt, nlen, x); + br_i31_encode(tt + nlen, nlen, m); + br_hmac_drbg_init(&drbg, hf, tt, nlen << 1); + for (;;) { + br_hmac_drbg_generate(&drbg, tt, nlen); + br_ecdsa_i31_bits2int(k, tt, nlen, n[0]); + if (br_i31_iszero(k)) { + continue; + } + if (br_i31_sub(k, n, 0)) { + break; + } + } + + /* + * Compute k*G and extract the X coordinate, then reduce it + * modulo the curve order. Since we support only curves with + * prime order, that reduction is only a matter of computing + * a subtraction. + */ + br_i31_encode(tt, nlen, k); + ulen = impl->mulgen(eU, tt, nlen, sk->curve); + br_i31_zero(r, n[0]); + br_i31_decode(r, &eU[1], ulen >> 1); + r[0] = n[0]; + br_i31_sub(r, n, br_i31_sub(r, n, 0) ^ 1); + + /* + * Compute 1/k in double-Montgomery representation. We do so by + * first converting _from_ Montgomery representation (twice), + * then using a modular exponentiation. + */ + br_i31_from_monty(k, n, n0i); + br_i31_from_monty(k, n, n0i); + memcpy(tt, cd->order, nlen); + tt[nlen - 1] -= 2; + br_i31_modpow(k, tt, nlen, n, n0i, t1, t2); + + /* + * Compute s = (m+xr)/k (mod n). + * The k[] array contains R^2/k (double-Montgomery representation); + * we thus can use direct Montgomery multiplications and conversions + * from Montgomery, avoiding any call to br_i31_to_monty() (which + * is slower). + */ + br_i31_from_monty(m, n, n0i); + br_i31_montymul(t1, x, r, n, n0i); + ctl = br_i31_add(t1, m, 1); + ctl |= br_i31_sub(t1, n, 0) ^ 1; + br_i31_sub(t1, n, ctl); + br_i31_montymul(s, t1, k, n, n0i); + + /* + * Encode r and s in the signature. + */ + br_i31_encode(sig, nlen, r); + br_i31_encode((unsigned char *)sig + nlen, nlen, s); + return nlen << 1; +} diff --git a/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c new file mode 100644 index 0000000..4161aaa --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define FIELD_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +uint32_t +br_ecdsa_i31_vrfy_asn1(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, + const void *sig, size_t sig_len) +{ + /* + * We use a double-sized buffer because a malformed ASN.1 signature + * may trigger a size expansion when converting to "raw" format. + */ + unsigned char rsig[(FIELD_LEN << 2) + 24]; + + if (sig_len > ((sizeof rsig) >> 1)) { + return 0; + } + memcpy(rsig, sig, sig_len); + sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len); + return br_ecdsa_i31_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len); +} diff --git a/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c b/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c new file mode 100644 index 0000000..259477f --- /dev/null +++ b/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define I31_LEN ((BR_MAX_EC_SIZE + 61) / 31) +#define POINT_LEN (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1)) + +/* see bearssl_ec.h */ +uint32_t +br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, + const void *sig, size_t sig_len) +{ + /* + * IMPORTANT: this code is fit only for curves with a prime + * order. This is needed so that modular reduction of the X + * coordinate of a point can be done with a simple subtraction. + */ + const br_ec_curve_def *cd; + uint32_t n[I31_LEN], r[I31_LEN], s[I31_LEN], t1[I31_LEN], t2[I31_LEN]; + unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3]; + unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3]; + unsigned char eU[POINT_LEN]; + size_t nlen, rlen, ulen; + uint32_t n0i, res; + + /* + * If the curve is not supported, then report an error. + */ + if (((impl->supported_curves >> pk->curve) & 1) == 0) { + return 0; + } + + /* + * Get the curve parameters (generator and order). + */ + switch (pk->curve) { + case BR_EC_secp256r1: + cd = &br_secp256r1; + break; + case BR_EC_secp384r1: + cd = &br_secp384r1; + break; + case BR_EC_secp521r1: + cd = &br_secp521r1; + break; + default: + return 0; + } + + /* + * Signature length must be even. + */ + if (sig_len & 1) { + return 0; + } + rlen = sig_len >> 1; + + /* + * Public key point must have the proper size for this curve. + */ + if (pk->qlen != cd->generator_len) { + return 0; + } + + /* + * Get modulus; then decode the r and s values. They must be + * lower than the modulus, and s must not be null. + */ + nlen = cd->order_len; + br_i31_decode(n, cd->order, nlen); + n0i = br_i31_ninv31(n[1]); + if (!br_i31_decode_mod(r, sig, rlen, n)) { + return 0; + } + if (!br_i31_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) { + return 0; + } + if (br_i31_iszero(s)) { + return 0; + } + + /* + * Invert s. We do that with a modular exponentiation; we use + * the fact that for all the curves we support, the least + * significant byte is not 0 or 1, so we can subtract 2 without + * any carry to process. + * We also want 1/s in Montgomery representation, which can be + * done by converting _from_ Montgomery representation before + * the inversion (because (1/s)*R = 1/(s/R)). + */ + br_i31_from_monty(s, n, n0i); + memcpy(tx, cd->order, nlen); + tx[nlen - 1] -= 2; + br_i31_modpow(s, tx, nlen, n, n0i, t1, t2); + + /* + * Truncate the hash to the modulus length (in bits) and reduce + * it modulo the curve order. The modular reduction can be done + * with a subtraction since the truncation already reduced the + * value to the modulus bit length. + */ + br_ecdsa_i31_bits2int(t1, hash, hash_len, n[0]); + br_i31_sub(t1, n, br_i31_sub(t1, n, 0) ^ 1); + + /* + * Multiply the (truncated, reduced) hash value with 1/s, result in + * t2, encoded in ty. + */ + br_i31_montymul(t2, t1, s, n, n0i); + br_i31_encode(ty, nlen, t2); + + /* + * Multiply r with 1/s, result in t1, encoded in tx. + */ + br_i31_montymul(t1, r, s, n, n0i); + br_i31_encode(tx, nlen, t1); + + /* + * Compute the point x*Q + y*G. + */ + ulen = cd->generator_len; + memcpy(eU, pk->q, ulen); + res = impl->muladd(eU, NULL, ulen, + tx, nlen, ty, nlen, cd->curve); + + /* + * Get the X coordinate, reduce modulo the curve order, and + * compare with the 'r' value. + * + * The modular reduction can be done with subtractions because + * we work with curves of prime order, so the curve order is + * close to the field order (Hasse's theorem). + */ + br_i31_zero(t1, n[0]); + br_i31_decode(t1, &eU[1], ulen >> 1); + t1[0] = n[0]; + br_i31_sub(t1, n, br_i31_sub(t1, n, 0) ^ 1); + res &= ~br_i31_sub(t1, r, 1); + res &= br_i31_iszero(t1); + return res; +} diff --git a/third_party/bearssl/src/ecdsa_rta.c b/third_party/bearssl/src/ecdsa_rta.c new file mode 100644 index 0000000..005c62c --- /dev/null +++ b/third_party/bearssl/src/ecdsa_rta.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Compute ASN.1 encoded length for the provided integer. The ASN.1 + * encoding is signed, so its leading bit must have value 0; it must + * also be of minimal length (so leading bytes of value 0 must be + * removed, except if that would contradict the rule about the sign + * bit). + */ +static size_t +asn1_int_length(const unsigned char *x, size_t xlen) +{ + while (xlen > 0 && *x == 0) { + x ++; + xlen --; + } + if (xlen == 0 || *x >= 0x80) { + xlen ++; + } + return xlen; +} + +/* see bearssl_ec.h */ +size_t +br_ecdsa_raw_to_asn1(void *sig, size_t sig_len) +{ + /* + * Internal buffer is large enough to accommodate a signature + * such that r and s fit on 125 bytes each (signed encoding), + * meaning a curve order of up to 999 bits. This is the limit + * that ensures "simple" length encodings. + */ + unsigned char *buf; + size_t hlen, rlen, slen, zlen, off; + unsigned char tmp[257]; + + buf = sig; + if ((sig_len & 1) != 0) { + return 0; + } + + /* + * Compute lengths for the two integers. + */ + hlen = sig_len >> 1; + rlen = asn1_int_length(buf, hlen); + slen = asn1_int_length(buf + hlen, hlen); + if (rlen > 125 || slen > 125) { + return 0; + } + + /* + * SEQUENCE header. + */ + tmp[0] = 0x30; + zlen = rlen + slen + 4; + if (zlen >= 0x80) { + tmp[1] = 0x81; + tmp[2] = zlen; + off = 3; + } else { + tmp[1] = zlen; + off = 2; + } + + /* + * First INTEGER (r). + */ + tmp[off ++] = 0x02; + tmp[off ++] = rlen; + if (rlen > hlen) { + tmp[off] = 0x00; + memcpy(tmp + off + 1, buf, hlen); + } else { + memcpy(tmp + off, buf + hlen - rlen, rlen); + } + off += rlen; + + /* + * Second INTEGER (s). + */ + tmp[off ++] = 0x02; + tmp[off ++] = slen; + if (slen > hlen) { + tmp[off] = 0x00; + memcpy(tmp + off + 1, buf + hlen, hlen); + } else { + memcpy(tmp + off, buf + sig_len - slen, slen); + } + off += slen; + + /* + * Return ASN.1 signature. + */ + memcpy(sig, tmp, off); + return off; +} diff --git a/third_party/bearssl/src/enc16be.c b/third_party/bearssl/src/enc16be.c new file mode 100644 index 0000000..6e06652 --- /dev/null +++ b/third_party/bearssl/src/enc16be.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_enc16be(void *dst, const uint16_t *v, size_t num) +{ + unsigned char *buf; + + buf = dst; + while (num -- > 0) { + br_enc16be(buf, *v ++); + buf += 2; + } +} diff --git a/third_party/bearssl/src/enc16le.c b/third_party/bearssl/src/enc16le.c new file mode 100644 index 0000000..3e5049a --- /dev/null +++ b/third_party/bearssl/src/enc16le.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_enc16le(void *dst, const uint16_t *v, size_t num) +{ + unsigned char *buf; + + buf = dst; + while (num -- > 0) { + br_enc16le(buf, *v ++); + buf += 2; + } +} diff --git a/third_party/bearssl/src/enc32be.c b/third_party/bearssl/src/enc32be.c new file mode 100644 index 0000000..97298b5 --- /dev/null +++ b/third_party/bearssl/src/enc32be.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_enc32be(void *dst, const uint32_t *v, size_t num) +{ + unsigned char *buf; + + buf = dst; + while (num -- > 0) { + br_enc32be(buf, *v ++); + buf += 4; + } +} diff --git a/third_party/bearssl/src/enc32le.c b/third_party/bearssl/src/enc32le.c new file mode 100644 index 0000000..9e9c856 --- /dev/null +++ b/third_party/bearssl/src/enc32le.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_enc32le(void *dst, const uint32_t *v, size_t num) +{ + unsigned char *buf; + + buf = dst; + while (num -- > 0) { + br_enc32le(buf, *v ++); + buf += 4; + } +} diff --git a/third_party/bearssl/src/enc64be.c b/third_party/bearssl/src/enc64be.c new file mode 100644 index 0000000..d548944 --- /dev/null +++ b/third_party/bearssl/src/enc64be.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_enc64be(void *dst, const uint64_t *v, size_t num) +{ + unsigned char *buf; + + buf = dst; + while (num -- > 0) { + br_enc64be(buf, *v ++); + buf += 8; + } +} diff --git a/third_party/bearssl/src/enc64le.c b/third_party/bearssl/src/enc64le.c new file mode 100644 index 0000000..1f1d68e --- /dev/null +++ b/third_party/bearssl/src/enc64le.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_range_enc64le(void *dst, const uint64_t *v, size_t num) +{ + unsigned char *buf; + + buf = dst; + while (num -- > 0) { + br_enc64le(buf, *v ++); + buf += 8; + } +} diff --git a/third_party/bearssl/src/encode_ec_pk8der.c b/third_party/bearssl/src/encode_ec_pk8der.c new file mode 100644 index 0000000..53717ce --- /dev/null +++ b/third_party/bearssl/src/encode_ec_pk8der.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_x509.h */ +size_t +br_encode_ec_pkcs8_der(void *dest, + const br_ec_private_key *sk, const br_ec_public_key *pk) +{ + /* + * ASN.1 format: + * + * OneAsymmetricKey ::= SEQUENCE { + * version Version, + * privateKeyAlgorithm PrivateKeyAlgorithmIdentifier, + * privateKey PrivateKey, + * attributes [0] Attributes OPTIONAL, + * ..., + * [[2: publicKey [1] PublicKey OPTIONAL ]], + * ... + * } + * + * We don't include attributes or public key (the public key + * is included in the private key value instead). The + * 'version' field is an INTEGER that we will set to 0 + * (meaning 'v1', compatible with previous versions of PKCS#8). + * The 'privateKeyAlgorithm' structure is an AlgorithmIdentifier + * whose OID should be id-ecPublicKey, with, as parameters, the + * curve OID. The 'privateKey' is an OCTET STRING, whose value + * is the "raw DER" encoding of the key pair. + */ + + /* + * OID id-ecPublicKey (1.2.840.10045.2.1), DER-encoded (with + * the tag). + */ + static const unsigned char OID_ECPUBKEY[] = { + 0x06, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01 + }; + + size_t len_version, len_privateKeyAlgorithm, len_privateKeyValue; + size_t len_privateKey, len_seq; + const unsigned char *oid; + + oid = br_get_curve_OID(sk->curve); + if (oid == NULL) { + return 0; + } + len_version = 3; + len_privateKeyAlgorithm = 2 + sizeof OID_ECPUBKEY + 2 + oid[0]; + len_privateKeyValue = br_encode_ec_raw_der_inner(NULL, sk, pk, 0); + len_privateKey = 1 + len_of_len(len_privateKeyValue) + + len_privateKeyValue; + len_seq = len_version + len_privateKeyAlgorithm + len_privateKey; + + if (dest == NULL) { + return 1 + len_of_len(len_seq) + len_seq; + } else { + unsigned char *buf; + size_t lenlen; + + buf = dest; + *buf ++ = 0x30; /* SEQUENCE tag */ + lenlen = br_asn1_encode_length(buf, len_seq); + buf += lenlen; + + /* version */ + *buf ++ = 0x02; + *buf ++ = 0x01; + *buf ++ = 0x00; + + /* privateKeyAlgorithm */ + *buf ++ = 0x30; + *buf ++ = (sizeof OID_ECPUBKEY) + 2 + oid[0]; + memcpy(buf, OID_ECPUBKEY, sizeof OID_ECPUBKEY); + buf += sizeof OID_ECPUBKEY; + *buf ++ = 0x06; + memcpy(buf, oid, 1 + oid[0]); + buf += 1 + oid[0]; + + /* privateKey */ + *buf ++ = 0x04; + buf += br_asn1_encode_length(buf, len_privateKeyValue); + br_encode_ec_raw_der_inner(buf, sk, pk, 0); + + return 1 + lenlen + len_seq; + } +} diff --git a/third_party/bearssl/src/encode_ec_rawder.c b/third_party/bearssl/src/encode_ec_rawder.c new file mode 100644 index 0000000..5985909 --- /dev/null +++ b/third_party/bearssl/src/encode_ec_rawder.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +const unsigned char * +br_get_curve_OID(int curve) +{ + static const unsigned char OID_secp256r1[] = { + 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07 + }; + static const unsigned char OID_secp384r1[] = { + 0x05, 0x2b, 0x81, 0x04, 0x00, 0x22 + }; + static const unsigned char OID_secp521r1[] = { + 0x05, 0x2b, 0x81, 0x04, 0x00, 0x23 + }; + + switch (curve) { + case BR_EC_secp256r1: return OID_secp256r1; + case BR_EC_secp384r1: return OID_secp384r1; + case BR_EC_secp521r1: return OID_secp521r1; + default: + return NULL; + } +} + +/* see inner.h */ +size_t +br_encode_ec_raw_der_inner(void *dest, + const br_ec_private_key *sk, const br_ec_public_key *pk, + int include_curve_oid) +{ + /* + * ASN.1 format: + * + * ECPrivateKey ::= SEQUENCE { + * version INTEGER { ecPrivkeyVer1(1) } (ecPrivkeyVer1), + * privateKey OCTET STRING, + * parameters [0] ECParameters {{ NamedCurve }} OPTIONAL, + * publicKey [1] BIT STRING OPTIONAL + * } + * + * The tages '[0]' and '[1]' are explicit. The 'ECParameters' + * is a CHOICE; in our case, it will always be an OBJECT IDENTIFIER + * that identifies the curve. + * + * The value of the 'privateKey' field is the raw unsigned big-endian + * encoding of the private key (integer modulo the curve subgroup + * order); there is no INTEGER tag, and the leading bit may be 1. + * Also, leading bytes of value 0x00 are _not_ removed. + * + * The 'publicKey' contents are the raw encoded public key point, + * normally uncompressed (leading byte of value 0x04, followed + * by the unsigned big-endian encodings of the X and Y coordinates, + * padded to the full field length if necessary). + */ + + size_t len_version, len_privateKey, len_parameters, len_publicKey; + size_t len_publicKey_bits, len_seq; + const unsigned char *oid; + + if (include_curve_oid) { + oid = br_get_curve_OID(sk->curve); + if (oid == NULL) { + return 0; + } + } else { + oid = NULL; + } + len_version = 3; + len_privateKey = 1 + len_of_len(sk->xlen) + sk->xlen; + if (include_curve_oid) { + len_parameters = 4 + oid[0]; + } else { + len_parameters = 0; + } + if (pk == NULL) { + len_publicKey = 0; + len_publicKey_bits = 0; + } else { + len_publicKey_bits = 2 + len_of_len(pk->qlen) + pk->qlen; + len_publicKey = 1 + len_of_len(len_publicKey_bits) + + len_publicKey_bits; + } + len_seq = len_version + len_privateKey + len_parameters + len_publicKey; + if (dest == NULL) { + return 1 + len_of_len(len_seq) + len_seq; + } else { + unsigned char *buf; + size_t lenlen; + + buf = dest; + *buf ++ = 0x30; /* SEQUENCE tag */ + lenlen = br_asn1_encode_length(buf, len_seq); + buf += lenlen; + + /* version */ + *buf ++ = 0x02; + *buf ++ = 0x01; + *buf ++ = 0x01; + + /* privateKey */ + *buf ++ = 0x04; + buf += br_asn1_encode_length(buf, sk->xlen); + memcpy(buf, sk->x, sk->xlen); + buf += sk->xlen; + + /* parameters */ + if (include_curve_oid) { + *buf ++ = 0xA0; + *buf ++ = oid[0] + 2; + *buf ++ = 0x06; + memcpy(buf, oid, oid[0] + 1); + buf += oid[0] + 1; + } + + /* publicKey */ + if (pk != NULL) { + *buf ++ = 0xA1; + buf += br_asn1_encode_length(buf, len_publicKey_bits); + *buf ++ = 0x03; + buf += br_asn1_encode_length(buf, pk->qlen + 1); + *buf ++ = 0x00; + memcpy(buf, pk->q, pk->qlen); + /* buf += pk->qlen; */ + } + + return 1 + lenlen + len_seq; + } +} + +/* see bearssl_x509.h */ +size_t +br_encode_ec_raw_der(void *dest, + const br_ec_private_key *sk, const br_ec_public_key *pk) +{ + return br_encode_ec_raw_der_inner(dest, sk, pk, 1); +} diff --git a/third_party/bearssl/src/encode_rsa_pk8der.c b/third_party/bearssl/src/encode_rsa_pk8der.c new file mode 100644 index 0000000..c053503 --- /dev/null +++ b/third_party/bearssl/src/encode_rsa_pk8der.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_x509.h */ +size_t +br_encode_rsa_pkcs8_der(void *dest, const br_rsa_private_key *sk, + const br_rsa_public_key *pk, const void *d, size_t dlen) +{ + /* + * ASN.1 format: + * + * OneAsymmetricKey ::= SEQUENCE { + * version Version, + * privateKeyAlgorithm PrivateKeyAlgorithmIdentifier, + * privateKey PrivateKey, + * attributes [0] Attributes OPTIONAL, + * ..., + * [[2: publicKey [1] PublicKey OPTIONAL ]], + * ... + * } + * + * We don't include attributes or public key. The 'version' field + * is an INTEGER that we will set to 0 (meaning 'v1', compatible + * with previous versions of PKCS#8). The 'privateKeyAlgorithm' + * structure is an AlgorithmIdentifier whose OID should be + * rsaEncryption, with NULL parameters. The 'privateKey' is an + * OCTET STRING, whose value is the "raw DER" encoding of the + * key pair. + * + * Since the private key value comes last, this function really + * adds a header, which is mostly fixed (only some lengths have + * to be modified. + */ + + /* + * Concatenation of: + * - DER encoding of an INTEGER of value 0 (the 'version' field) + * - DER encoding of a PrivateKeyAlgorithmIdentifier that uses + * the rsaEncryption OID, and NULL parameters + * - An OCTET STRING tag + */ + static const unsigned char PK8_HEAD[] = { + 0x02, 0x01, 0x00, + 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, + 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00, + 0x04 + }; + + size_t len_raw, len_seq; + + len_raw = br_encode_rsa_raw_der(NULL, sk, pk, d, dlen); + len_seq = (sizeof PK8_HEAD) + len_of_len(len_raw) + len_raw; + if (dest == NULL) { + return 1 + len_of_len(len_seq) + len_seq; + } else { + unsigned char *buf; + size_t lenlen; + + buf = dest; + *buf ++ = 0x30; /* SEQUENCE tag */ + lenlen = br_asn1_encode_length(buf, len_seq); + buf += lenlen; + + /* version, privateKeyAlgorithm, privateKey tag */ + memcpy(buf, PK8_HEAD, sizeof PK8_HEAD); + buf += sizeof PK8_HEAD; + + /* privateKey */ + buf += br_asn1_encode_length(buf, len_raw); + br_encode_rsa_raw_der(buf, sk, pk, d, dlen); + + return 1 + lenlen + len_seq; + } +} diff --git a/third_party/bearssl/src/encode_rsa_rawder.c b/third_party/bearssl/src/encode_rsa_rawder.c new file mode 100644 index 0000000..1a8052b --- /dev/null +++ b/third_party/bearssl/src/encode_rsa_rawder.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_x509.h */ +size_t +br_encode_rsa_raw_der(void *dest, const br_rsa_private_key *sk, + const br_rsa_public_key *pk, const void *d, size_t dlen) +{ + /* + * ASN.1 format: + * + * RSAPrivateKey ::= SEQUENCE { + * version Version, + * modulus INTEGER, -- n + * publicExponent INTEGER, -- e + * privateExponent INTEGER, -- d + * prime1 INTEGER, -- p + * prime2 INTEGER, -- q + * exponent1 INTEGER, -- d mod (p-1) + * exponent2 INTEGER, -- d mod (q-1) + * coefficient INTEGER, -- (inverse of q) mod p + * otherPrimeInfos OtherPrimeInfos OPTIONAL + * } + * + * The 'version' field is an INTEGER of value 0 (meaning: there + * are exactly two prime factors), and 'otherPrimeInfos' will + * be absent (because there are exactly two prime factors). + */ + + br_asn1_uint num[9]; + size_t u, slen; + + /* + * For all INTEGER values, get the pointer and length for the + * data bytes. + */ + num[0] = br_asn1_uint_prepare(NULL, 0); + num[1] = br_asn1_uint_prepare(pk->n, pk->nlen); + num[2] = br_asn1_uint_prepare(pk->e, pk->elen); + num[3] = br_asn1_uint_prepare(d, dlen); + num[4] = br_asn1_uint_prepare(sk->p, sk->plen); + num[5] = br_asn1_uint_prepare(sk->q, sk->qlen); + num[6] = br_asn1_uint_prepare(sk->dp, sk->dplen); + num[7] = br_asn1_uint_prepare(sk->dq, sk->dqlen); + num[8] = br_asn1_uint_prepare(sk->iq, sk->iqlen); + + /* + * Get the length of the SEQUENCE contents. + */ + slen = 0; + for (u = 0; u < 9; u ++) { + uint32_t ilen; + + ilen = num[u].asn1len; + slen += 1 + len_of_len(ilen) + ilen; + } + + if (dest == NULL) { + return 1 + len_of_len(slen) + slen; + } else { + unsigned char *buf; + size_t lenlen; + + buf = dest; + *buf ++ = 0x30; /* SEQUENCE tag */ + lenlen = br_asn1_encode_length(buf, slen); + buf += lenlen; + for (u = 0; u < 9; u ++) { + buf += br_asn1_encode_uint(buf, num[u]); + } + return 1 + lenlen + slen; + } +} diff --git a/third_party/bearssl/src/gcm.c b/third_party/bearssl/src/gcm.c new file mode 100644 index 0000000..ede5f08 --- /dev/null +++ b/third_party/bearssl/src/gcm.c @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Implementation Notes + * ==================== + * + * Since CTR and GHASH implementations can handle only full blocks, a + * 16-byte buffer (buf[]) is maintained in the context: + * + * - When processing AAD, buf[] contains the 0-15 unprocessed bytes. + * + * - When doing CTR encryption / decryption, buf[] contains the AES output + * for the last partial block, to be used with the next few bytes of + * data, as well as the already encrypted bytes. For instance, if the + * processed data length so far is 21 bytes, then buf[0..4] contains + * the five last encrypted bytes, and buf[5..15] contains the next 11 + * AES output bytes to be XORed with the next 11 bytes of input. + * + * The recorded AES output bytes are used to complete the block when + * the corresponding bytes are obtained. Note that buf[] always + * contains the _encrypted_ bytes, whether we apply encryption or + * decryption: these bytes are used as input to GHASH when the block + * is complete. + * + * In both cases, the low bits of the data length counters (count_aad, + * count_ctr) are used to work out the current situation. + */ + +/* see bearssl_aead.h */ +void +br_gcm_init(br_gcm_context *ctx, const br_block_ctr_class **bctx, br_ghash gh) +{ + unsigned char iv[12]; + + ctx->vtable = &br_gcm_vtable; + ctx->bctx = bctx; + ctx->gh = gh; + + /* + * The GHASH key h[] is the raw encryption of the all-zero + * block. Since we only have a CTR implementation, we use it + * with an all-zero IV and a zero counter, to CTR-encrypt an + * all-zero block. + */ + memset(ctx->h, 0, sizeof ctx->h); + memset(iv, 0, sizeof iv); + (*bctx)->run(bctx, iv, 0, ctx->h, sizeof ctx->h); +} + +/* see bearssl_aead.h */ +void +br_gcm_reset(br_gcm_context *ctx, const void *iv, size_t len) +{ + /* + * If the provided nonce is 12 bytes, then this is the initial + * IV for CTR mode; it will be used with a counter that starts + * at 2 (value 1 is for encrypting the GHASH output into the tag). + * + * If the provided nonce has any other length, then it is hashed + * (with GHASH) into a 16-byte value that will be the IV for CTR + * (both 12-byte IV and 32-bit counter). + */ + if (len == 12) { + memcpy(ctx->j0_1, iv, 12); + ctx->j0_2 = 1; + } else { + unsigned char ty[16], tmp[16]; + + memset(ty, 0, sizeof ty); + ctx->gh(ty, ctx->h, iv, len); + memset(tmp, 0, 8); + br_enc64be(tmp + 8, (uint64_t)len << 3); + ctx->gh(ty, ctx->h, tmp, 16); + memcpy(ctx->j0_1, ty, 12); + ctx->j0_2 = br_dec32be(ty + 12); + } + ctx->jc = ctx->j0_2 + 1; + memset(ctx->y, 0, sizeof ctx->y); + ctx->count_aad = 0; + ctx->count_ctr = 0; +} + +/* see bearssl_aead.h */ +void +br_gcm_aad_inject(br_gcm_context *ctx, const void *data, size_t len) +{ + size_t ptr, dlen; + + ptr = (size_t)ctx->count_aad & (size_t)15; + if (ptr != 0) { + /* + * If there is a partial block, then we first try to + * complete it. + */ + size_t clen; + + clen = 16 - ptr; + if (len < clen) { + memcpy(ctx->buf + ptr, data, len); + ctx->count_aad += (uint64_t)len; + return; + } + memcpy(ctx->buf + ptr, data, clen); + ctx->gh(ctx->y, ctx->h, ctx->buf, 16); + data = (const unsigned char *)data + clen; + len -= clen; + ctx->count_aad += (uint64_t)clen; + } + + /* + * Now AAD is aligned on a 16-byte block (with regards to GHASH). + * We process all complete blocks, and save the last partial + * block. + */ + dlen = len & ~(size_t)15; + ctx->gh(ctx->y, ctx->h, data, dlen); + memcpy(ctx->buf, (const unsigned char *)data + dlen, len - dlen); + ctx->count_aad += (uint64_t)len; +} + +/* see bearssl_aead.h */ +void +br_gcm_flip(br_gcm_context *ctx) +{ + /* + * We complete the GHASH computation if there is a partial block. + * The GHASH implementation automatically applies padding with + * zeros. + */ + size_t ptr; + + ptr = (size_t)ctx->count_aad & (size_t)15; + if (ptr != 0) { + ctx->gh(ctx->y, ctx->h, ctx->buf, ptr); + } +} + +/* see bearssl_aead.h */ +void +br_gcm_run(br_gcm_context *ctx, int encrypt, void *data, size_t len) +{ + unsigned char *buf; + size_t ptr, dlen; + + buf = data; + ptr = (size_t)ctx->count_ctr & (size_t)15; + if (ptr != 0) { + /* + * If we have a partial block, then we try to complete it. + */ + size_t u, clen; + + clen = 16 - ptr; + if (len < clen) { + clen = len; + } + for (u = 0; u < clen; u ++) { + unsigned x, y; + + x = buf[u]; + y = x ^ ctx->buf[ptr + u]; + ctx->buf[ptr + u] = encrypt ? y : x; + buf[u] = y; + } + ctx->count_ctr += (uint64_t)clen; + buf += clen; + len -= clen; + if (ptr + clen < 16) { + return; + } + ctx->gh(ctx->y, ctx->h, ctx->buf, 16); + } + + /* + * Process full blocks. + */ + dlen = len & ~(size_t)15; + if (!encrypt) { + ctx->gh(ctx->y, ctx->h, buf, dlen); + } + ctx->jc = (*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->jc, buf, dlen); + if (encrypt) { + ctx->gh(ctx->y, ctx->h, buf, dlen); + } + buf += dlen; + len -= dlen; + ctx->count_ctr += (uint64_t)dlen; + + if (len > 0) { + /* + * There is a partial block. + */ + size_t u; + + memset(ctx->buf, 0, sizeof ctx->buf); + ctx->jc = (*ctx->bctx)->run(ctx->bctx, ctx->j0_1, + ctx->jc, ctx->buf, 16); + for (u = 0; u < len; u ++) { + unsigned x, y; + + x = buf[u]; + y = x ^ ctx->buf[u]; + ctx->buf[u] = encrypt ? y : x; + buf[u] = y; + } + ctx->count_ctr += (uint64_t)len; + } +} + +/* see bearssl_aead.h */ +void +br_gcm_get_tag(br_gcm_context *ctx, void *tag) +{ + size_t ptr; + unsigned char tmp[16]; + + ptr = (size_t)ctx->count_ctr & (size_t)15; + if (ptr > 0) { + /* + * There is a partial block: encrypted/decrypted data has + * been produced, but the encrypted bytes must still be + * processed by GHASH. + */ + ctx->gh(ctx->y, ctx->h, ctx->buf, ptr); + } + + /* + * Final block for GHASH: the AAD and plaintext lengths (in bits). + */ + br_enc64be(tmp, ctx->count_aad << 3); + br_enc64be(tmp + 8, ctx->count_ctr << 3); + ctx->gh(ctx->y, ctx->h, tmp, 16); + + /* + * Tag is the GHASH output XORed with the encryption of the + * nonce with the initial counter value. + */ + memcpy(tag, ctx->y, 16); + (*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->j0_2, tag, 16); +} + +/* see bearssl_aead.h */ +void +br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len) +{ + unsigned char tmp[16]; + + br_gcm_get_tag(ctx, tmp); + memcpy(tag, tmp, len); +} + +/* see bearssl_aead.h */ +uint32_t +br_gcm_check_tag_trunc(br_gcm_context *ctx, const void *tag, size_t len) +{ + unsigned char tmp[16]; + size_t u; + int x; + + br_gcm_get_tag(ctx, tmp); + x = 0; + for (u = 0; u < len; u ++) { + x |= tmp[u] ^ ((const unsigned char *)tag)[u]; + } + return EQ0(x); +} + +/* see bearssl_aead.h */ +uint32_t +br_gcm_check_tag(br_gcm_context *ctx, const void *tag) +{ + return br_gcm_check_tag_trunc(ctx, tag, 16); +} + +/* see bearssl_aead.h */ +const br_aead_class br_gcm_vtable = { + 16, + (void (*)(const br_aead_class **, const void *, size_t)) + &br_gcm_reset, + (void (*)(const br_aead_class **, const void *, size_t)) + &br_gcm_aad_inject, + (void (*)(const br_aead_class **)) + &br_gcm_flip, + (void (*)(const br_aead_class **, int, void *, size_t)) + &br_gcm_run, + (void (*)(const br_aead_class **, void *)) + &br_gcm_get_tag, + (uint32_t (*)(const br_aead_class **, const void *)) + &br_gcm_check_tag, + (void (*)(const br_aead_class **, void *, size_t)) + &br_gcm_get_tag_trunc, + (uint32_t (*)(const br_aead_class **, const void *, size_t)) + &br_gcm_check_tag_trunc +}; diff --git a/third_party/bearssl/src/ghash_ctmul.c b/third_party/bearssl/src/ghash_ctmul.c new file mode 100644 index 0000000..3623202 --- /dev/null +++ b/third_party/bearssl/src/ghash_ctmul.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * We compute "carryless multiplications" through normal integer + * multiplications, masking out enough bits to create "holes" in which + * carries may expand without altering our bits; we really use 8 data + * bits per 32-bit word, spaced every fourth bit. Accumulated carries + * may not exceed 8 in total, which fits in 4 bits. + * + * It would be possible to use a 3-bit spacing, allowing two operands, + * one with 7 non-zero data bits, the other one with 10 or 11 non-zero + * data bits; this asymmetric splitting makes the overall code more + * complex with thresholds and exceptions, and does not appear to be + * worth the effort. + */ + +/* + * We cannot really autodetect whether multiplications are "slow" or + * not. A typical example is the ARM Cortex M0+, which exists in two + * versions: one with a 1-cycle multiplication opcode, the other with + * a 32-cycle multiplication opcode. They both use exactly the same + * architecture and ABI, and cannot be distinguished from each other + * at compile-time. + * + * Since most modern CPU (even embedded CPU) still have fast + * multiplications, we use the "fast mul" code by default. + */ + +#if BR_SLOW_MUL + +/* + * This implementation uses Karatsuba-like reduction to make fewer + * integer multiplications (9 instead of 16), at the expense of extra + * logical operations (XOR, shifts...). On modern x86 CPU that offer + * fast, pipelined multiplications, this code is about twice slower than + * the simpler code with 16 multiplications. This tendency may be + * reversed on low-end platforms with expensive multiplications. + */ + +#define MUL32(h, l, x, y) do { \ + uint64_t mul32tmp = MUL(x, y); \ + (h) = (uint32_t)(mul32tmp >> 32); \ + (l) = (uint32_t)mul32tmp; \ + } while (0) + +static inline void +bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y) +{ + uint32_t x0, x1, x2, x3; + uint32_t y0, y1, y2, y3; + uint32_t a0, a1, a2, a3, a4, a5, a6, a7, a8; + uint32_t b0, b1, b2, b3, b4, b5, b6, b7, b8; + + x0 = x & (uint32_t)0x11111111; + x1 = x & (uint32_t)0x22222222; + x2 = x & (uint32_t)0x44444444; + x3 = x & (uint32_t)0x88888888; + y0 = y & (uint32_t)0x11111111; + y1 = y & (uint32_t)0x22222222; + y2 = y & (uint32_t)0x44444444; + y3 = y & (uint32_t)0x88888888; + + /* + * (x0+W*x1)*(y0+W*y1) -> a0:b0 + * (x2+W*x3)*(y2+W*y3) -> a3:b3 + * ((x0+x2)+W*(x1+x3))*((y0+y2)+W*(y1+y3)) -> a6:b6 + */ + a0 = x0; + b0 = y0; + a1 = x1 >> 1; + b1 = y1 >> 1; + a2 = a0 ^ a1; + b2 = b0 ^ b1; + a3 = x2 >> 2; + b3 = y2 >> 2; + a4 = x3 >> 3; + b4 = y3 >> 3; + a5 = a3 ^ a4; + b5 = b3 ^ b4; + a6 = a0 ^ a3; + b6 = b0 ^ b3; + a7 = a1 ^ a4; + b7 = b1 ^ b4; + a8 = a6 ^ a7; + b8 = b6 ^ b7; + + MUL32(b0, a0, b0, a0); + MUL32(b1, a1, b1, a1); + MUL32(b2, a2, b2, a2); + MUL32(b3, a3, b3, a3); + MUL32(b4, a4, b4, a4); + MUL32(b5, a5, b5, a5); + MUL32(b6, a6, b6, a6); + MUL32(b7, a7, b7, a7); + MUL32(b8, a8, b8, a8); + + a0 &= (uint32_t)0x11111111; + a1 &= (uint32_t)0x11111111; + a2 &= (uint32_t)0x11111111; + a3 &= (uint32_t)0x11111111; + a4 &= (uint32_t)0x11111111; + a5 &= (uint32_t)0x11111111; + a6 &= (uint32_t)0x11111111; + a7 &= (uint32_t)0x11111111; + a8 &= (uint32_t)0x11111111; + b0 &= (uint32_t)0x11111111; + b1 &= (uint32_t)0x11111111; + b2 &= (uint32_t)0x11111111; + b3 &= (uint32_t)0x11111111; + b4 &= (uint32_t)0x11111111; + b5 &= (uint32_t)0x11111111; + b6 &= (uint32_t)0x11111111; + b7 &= (uint32_t)0x11111111; + b8 &= (uint32_t)0x11111111; + + a2 ^= a0 ^ a1; + b2 ^= b0 ^ b1; + a0 ^= (a2 << 1) ^ (a1 << 2); + b0 ^= (b2 << 1) ^ (b1 << 2); + a5 ^= a3 ^ a4; + b5 ^= b3 ^ b4; + a3 ^= (a5 << 1) ^ (a4 << 2); + b3 ^= (b5 << 1) ^ (b4 << 2); + a8 ^= a6 ^ a7; + b8 ^= b6 ^ b7; + a6 ^= (a8 << 1) ^ (a7 << 2); + b6 ^= (b8 << 1) ^ (b7 << 2); + a6 ^= a0 ^ a3; + b6 ^= b0 ^ b3; + *lo = a0 ^ (a6 << 2) ^ (a3 << 4); + *hi = b0 ^ (b6 << 2) ^ (b3 << 4) ^ (a6 >> 30) ^ (a3 >> 28); +} + +#else + +/* + * Simple multiplication in GF(2)[X], using 16 integer multiplications. + */ + +static inline void +bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y) +{ + uint32_t x0, x1, x2, x3; + uint32_t y0, y1, y2, y3; + uint64_t z0, z1, z2, z3; + uint64_t z; + + x0 = x & (uint32_t)0x11111111; + x1 = x & (uint32_t)0x22222222; + x2 = x & (uint32_t)0x44444444; + x3 = x & (uint32_t)0x88888888; + y0 = y & (uint32_t)0x11111111; + y1 = y & (uint32_t)0x22222222; + y2 = y & (uint32_t)0x44444444; + y3 = y & (uint32_t)0x88888888; + z0 = MUL(x0, y0) ^ MUL(x1, y3) ^ MUL(x2, y2) ^ MUL(x3, y1); + z1 = MUL(x0, y1) ^ MUL(x1, y0) ^ MUL(x2, y3) ^ MUL(x3, y2); + z2 = MUL(x0, y2) ^ MUL(x1, y1) ^ MUL(x2, y0) ^ MUL(x3, y3); + z3 = MUL(x0, y3) ^ MUL(x1, y2) ^ MUL(x2, y1) ^ MUL(x3, y0); + z0 &= (uint64_t)0x1111111111111111; + z1 &= (uint64_t)0x2222222222222222; + z2 &= (uint64_t)0x4444444444444444; + z3 &= (uint64_t)0x8888888888888888; + z = z0 | z1 | z2 | z3; + *lo = (uint32_t)z; + *hi = (uint32_t)(z >> 32); +} + +#endif + +/* see bearssl_hash.h */ +void +br_ghash_ctmul(void *y, const void *h, const void *data, size_t len) +{ + const unsigned char *buf, *hb; + unsigned char *yb; + uint32_t yw[4]; + uint32_t hw[4]; + + /* + * Throughout the loop we handle the y and h values as arrays + * of 32-bit words. + */ + buf = data; + yb = y; + hb = h; + yw[3] = br_dec32be(yb); + yw[2] = br_dec32be(yb + 4); + yw[1] = br_dec32be(yb + 8); + yw[0] = br_dec32be(yb + 12); + hw[3] = br_dec32be(hb); + hw[2] = br_dec32be(hb + 4); + hw[1] = br_dec32be(hb + 8); + hw[0] = br_dec32be(hb + 12); + while (len > 0) { + const unsigned char *src; + unsigned char tmp[16]; + int i; + uint32_t a[9], b[9], zw[8]; + uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3; + + /* + * Get the next 16-byte block (using zero-padding if + * necessary). + */ + if (len >= 16) { + src = buf; + buf += 16; + len -= 16; + } else { + memcpy(tmp, buf, len); + memset(tmp + len, 0, (sizeof tmp) - len); + src = tmp; + len = 0; + } + + /* + * Decode the block. The GHASH standard mandates + * big-endian encoding. + */ + yw[3] ^= br_dec32be(src); + yw[2] ^= br_dec32be(src + 4); + yw[1] ^= br_dec32be(src + 8); + yw[0] ^= br_dec32be(src + 12); + + /* + * We multiply two 128-bit field elements. We use + * Karatsuba to turn that into three 64-bit + * multiplications, which are themselves done with a + * total of nine 32-bit multiplications. + */ + + /* + * y[0,1]*h[0,1] -> 0..2 + * y[2,3]*h[2,3] -> 3..5 + * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6..8 + */ + a[0] = yw[0]; + b[0] = hw[0]; + a[1] = yw[1]; + b[1] = hw[1]; + a[2] = a[0] ^ a[1]; + b[2] = b[0] ^ b[1]; + + a[3] = yw[2]; + b[3] = hw[2]; + a[4] = yw[3]; + b[4] = hw[3]; + a[5] = a[3] ^ a[4]; + b[5] = b[3] ^ b[4]; + + a[6] = a[0] ^ a[3]; + b[6] = b[0] ^ b[3]; + a[7] = a[1] ^ a[4]; + b[7] = b[1] ^ b[4]; + a[8] = a[6] ^ a[7]; + b[8] = b[6] ^ b[7]; + + for (i = 0; i < 9; i ++) { + bmul(&b[i], &a[i], b[i], a[i]); + } + + c0 = a[0]; + c1 = b[0] ^ a[2] ^ a[0] ^ a[1]; + c2 = a[1] ^ b[2] ^ b[0] ^ b[1]; + c3 = b[1]; + d0 = a[3]; + d1 = b[3] ^ a[5] ^ a[3] ^ a[4]; + d2 = a[4] ^ b[5] ^ b[3] ^ b[4]; + d3 = b[4]; + e0 = a[6]; + e1 = b[6] ^ a[8] ^ a[6] ^ a[7]; + e2 = a[7] ^ b[8] ^ b[6] ^ b[7]; + e3 = b[7]; + + e0 ^= c0 ^ d0; + e1 ^= c1 ^ d1; + e2 ^= c2 ^ d2; + e3 ^= c3 ^ d3; + c2 ^= e0; + c3 ^= e1; + d0 ^= e2; + d1 ^= e3; + + /* + * GHASH specification has the bits "reversed" (most + * significant is in fact least significant), which does + * not matter for a carryless multiplication, except that + * the 255-bit result must be shifted by 1 bit. + */ + zw[0] = c0 << 1; + zw[1] = (c1 << 1) | (c0 >> 31); + zw[2] = (c2 << 1) | (c1 >> 31); + zw[3] = (c3 << 1) | (c2 >> 31); + zw[4] = (d0 << 1) | (c3 >> 31); + zw[5] = (d1 << 1) | (d0 >> 31); + zw[6] = (d2 << 1) | (d1 >> 31); + zw[7] = (d3 << 1) | (d2 >> 31); + + /* + * We now do the reduction modulo the field polynomial + * to get back to 128 bits. + */ + for (i = 0; i < 4; i ++) { + uint32_t lw; + + lw = zw[i]; + zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7); + zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25); + } + memcpy(yw, zw + 4, sizeof yw); + } + + /* + * Encode back the result. + */ + br_enc32be(yb, yw[3]); + br_enc32be(yb + 4, yw[2]); + br_enc32be(yb + 8, yw[1]); + br_enc32be(yb + 12, yw[0]); +} diff --git a/third_party/bearssl/src/ghash_ctmul32.c b/third_party/bearssl/src/ghash_ctmul32.c new file mode 100644 index 0000000..c66af46 --- /dev/null +++ b/third_party/bearssl/src/ghash_ctmul32.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This implementation uses 32-bit multiplications, and only the low + * 32 bits for each multiplication result. This is meant primarily for + * the ARM Cortex M0 and M0+, whose multiplication opcode does not yield + * the upper 32 bits; but it might also be useful on architectures where + * access to the upper 32 bits requires use of specific registers that + * create contention (e.g. on i386, "mul" necessarily outputs the result + * in edx:eax, while "imul" can use any registers but is limited to the + * low 32 bits). + * + * The implementation trick that is used here is bit-reversing (bit 0 + * is swapped with bit 31, bit 1 with bit 30, and so on). In GF(2)[X], + * for all values x and y, we have: + * rev32(x) * rev32(y) = rev64(x * y) + * In other words, if we bit-reverse (over 32 bits) the operands, then we + * bit-reverse (over 64 bits) the result. + */ + +/* + * Multiplication in GF(2)[X], truncated to its low 32 bits. + */ +static inline uint32_t +bmul32(uint32_t x, uint32_t y) +{ + uint32_t x0, x1, x2, x3; + uint32_t y0, y1, y2, y3; + uint32_t z0, z1, z2, z3; + + x0 = x & (uint32_t)0x11111111; + x1 = x & (uint32_t)0x22222222; + x2 = x & (uint32_t)0x44444444; + x3 = x & (uint32_t)0x88888888; + y0 = y & (uint32_t)0x11111111; + y1 = y & (uint32_t)0x22222222; + y2 = y & (uint32_t)0x44444444; + y3 = y & (uint32_t)0x88888888; + z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); + z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); + z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); + z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); + z0 &= (uint32_t)0x11111111; + z1 &= (uint32_t)0x22222222; + z2 &= (uint32_t)0x44444444; + z3 &= (uint32_t)0x88888888; + return z0 | z1 | z2 | z3; +} + +/* + * Bit-reverse a 32-bit word. + */ +static uint32_t +rev32(uint32_t x) +{ +#define RMS(m, s) do { \ + x = ((x & (uint32_t)(m)) << (s)) \ + | ((x >> (s)) & (uint32_t)(m)); \ + } while (0) + + RMS(0x55555555, 1); + RMS(0x33333333, 2); + RMS(0x0F0F0F0F, 4); + RMS(0x00FF00FF, 8); + return (x << 16) | (x >> 16); + +#undef RMS +} + +/* see bearssl_hash.h */ +void +br_ghash_ctmul32(void *y, const void *h, const void *data, size_t len) +{ + /* + * This implementation is similar to br_ghash_ctmul() except + * that we have to do the multiplication twice, with the + * "normal" and "bit reversed" operands. Hence we end up with + * eighteen 32-bit multiplications instead of nine. + */ + + const unsigned char *buf, *hb; + unsigned char *yb; + uint32_t yw[4]; + uint32_t hw[4], hwr[4]; + + buf = data; + yb = y; + hb = h; + yw[3] = br_dec32be(yb); + yw[2] = br_dec32be(yb + 4); + yw[1] = br_dec32be(yb + 8); + yw[0] = br_dec32be(yb + 12); + hw[3] = br_dec32be(hb); + hw[2] = br_dec32be(hb + 4); + hw[1] = br_dec32be(hb + 8); + hw[0] = br_dec32be(hb + 12); + hwr[3] = rev32(hw[3]); + hwr[2] = rev32(hw[2]); + hwr[1] = rev32(hw[1]); + hwr[0] = rev32(hw[0]); + while (len > 0) { + const unsigned char *src; + unsigned char tmp[16]; + int i; + uint32_t a[18], b[18], c[18]; + uint32_t d0, d1, d2, d3, d4, d5, d6, d7; + uint32_t zw[8]; + + if (len >= 16) { + src = buf; + buf += 16; + len -= 16; + } else { + memcpy(tmp, buf, len); + memset(tmp + len, 0, (sizeof tmp) - len); + src = tmp; + len = 0; + } + yw[3] ^= br_dec32be(src); + yw[2] ^= br_dec32be(src + 4); + yw[1] ^= br_dec32be(src + 8); + yw[0] ^= br_dec32be(src + 12); + + /* + * We are using Karatsuba: the 128x128 multiplication is + * reduced to three 64x64 multiplications, hence nine + * 32x32 multiplications. With the bit-reversal trick, + * we have to perform 18 32x32 multiplications. + */ + + /* + * y[0,1]*h[0,1] -> 0,1,4 + * y[2,3]*h[2,3] -> 2,3,5 + * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,7,8 + */ + + a[0] = yw[0]; + a[1] = yw[1]; + a[2] = yw[2]; + a[3] = yw[3]; + a[4] = a[0] ^ a[1]; + a[5] = a[2] ^ a[3]; + a[6] = a[0] ^ a[2]; + a[7] = a[1] ^ a[3]; + a[8] = a[6] ^ a[7]; + + a[ 9] = rev32(yw[0]); + a[10] = rev32(yw[1]); + a[11] = rev32(yw[2]); + a[12] = rev32(yw[3]); + a[13] = a[ 9] ^ a[10]; + a[14] = a[11] ^ a[12]; + a[15] = a[ 9] ^ a[11]; + a[16] = a[10] ^ a[12]; + a[17] = a[15] ^ a[16]; + + b[0] = hw[0]; + b[1] = hw[1]; + b[2] = hw[2]; + b[3] = hw[3]; + b[4] = b[0] ^ b[1]; + b[5] = b[2] ^ b[3]; + b[6] = b[0] ^ b[2]; + b[7] = b[1] ^ b[3]; + b[8] = b[6] ^ b[7]; + + b[ 9] = hwr[0]; + b[10] = hwr[1]; + b[11] = hwr[2]; + b[12] = hwr[3]; + b[13] = b[ 9] ^ b[10]; + b[14] = b[11] ^ b[12]; + b[15] = b[ 9] ^ b[11]; + b[16] = b[10] ^ b[12]; + b[17] = b[15] ^ b[16]; + + for (i = 0; i < 18; i ++) { + c[i] = bmul32(a[i], b[i]); + } + + c[4] ^= c[0] ^ c[1]; + c[5] ^= c[2] ^ c[3]; + c[8] ^= c[6] ^ c[7]; + + c[13] ^= c[ 9] ^ c[10]; + c[14] ^= c[11] ^ c[12]; + c[17] ^= c[15] ^ c[16]; + + /* + * y[0,1]*h[0,1] -> 0,9^4,1^13,10 + * y[2,3]*h[2,3] -> 2,11^5,3^14,12 + * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,15^8,7^17,16 + */ + d0 = c[0]; + d1 = c[4] ^ (rev32(c[9]) >> 1); + d2 = c[1] ^ c[0] ^ c[2] ^ c[6] ^ (rev32(c[13]) >> 1); + d3 = c[4] ^ c[5] ^ c[8] + ^ (rev32(c[10] ^ c[9] ^ c[11] ^ c[15]) >> 1); + d4 = c[2] ^ c[1] ^ c[3] ^ c[7] + ^ (rev32(c[13] ^ c[14] ^ c[17]) >> 1); + d5 = c[5] ^ (rev32(c[11] ^ c[10] ^ c[12] ^ c[16]) >> 1); + d6 = c[3] ^ (rev32(c[14]) >> 1); + d7 = rev32(c[12]) >> 1; + + zw[0] = d0 << 1; + zw[1] = (d1 << 1) | (d0 >> 31); + zw[2] = (d2 << 1) | (d1 >> 31); + zw[3] = (d3 << 1) | (d2 >> 31); + zw[4] = (d4 << 1) | (d3 >> 31); + zw[5] = (d5 << 1) | (d4 >> 31); + zw[6] = (d6 << 1) | (d5 >> 31); + zw[7] = (d7 << 1) | (d6 >> 31); + + for (i = 0; i < 4; i ++) { + uint32_t lw; + + lw = zw[i]; + zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7); + zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25); + } + memcpy(yw, zw + 4, sizeof yw); + } + br_enc32be(yb, yw[3]); + br_enc32be(yb + 4, yw[2]); + br_enc32be(yb + 8, yw[1]); + br_enc32be(yb + 12, yw[0]); +} diff --git a/third_party/bearssl/src/ghash_ctmul64.c b/third_party/bearssl/src/ghash_ctmul64.c new file mode 100644 index 0000000..a46f16f --- /dev/null +++ b/third_party/bearssl/src/ghash_ctmul64.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This is the 64-bit variant of br_ghash_ctmul32(), with 64-bit operands + * and bit reversal of 64-bit words. + */ + +static inline uint64_t +bmul64(uint64_t x, uint64_t y) +{ + uint64_t x0, x1, x2, x3; + uint64_t y0, y1, y2, y3; + uint64_t z0, z1, z2, z3; + + x0 = x & (uint64_t)0x1111111111111111; + x1 = x & (uint64_t)0x2222222222222222; + x2 = x & (uint64_t)0x4444444444444444; + x3 = x & (uint64_t)0x8888888888888888; + y0 = y & (uint64_t)0x1111111111111111; + y1 = y & (uint64_t)0x2222222222222222; + y2 = y & (uint64_t)0x4444444444444444; + y3 = y & (uint64_t)0x8888888888888888; + z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); + z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); + z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); + z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); + z0 &= (uint64_t)0x1111111111111111; + z1 &= (uint64_t)0x2222222222222222; + z2 &= (uint64_t)0x4444444444444444; + z3 &= (uint64_t)0x8888888888888888; + return z0 | z1 | z2 | z3; +} + +static uint64_t +rev64(uint64_t x) +{ +#define RMS(m, s) do { \ + x = ((x & (uint64_t)(m)) << (s)) \ + | ((x >> (s)) & (uint64_t)(m)); \ + } while (0) + + RMS(0x5555555555555555, 1); + RMS(0x3333333333333333, 2); + RMS(0x0F0F0F0F0F0F0F0F, 4); + RMS(0x00FF00FF00FF00FF, 8); + RMS(0x0000FFFF0000FFFF, 16); + return (x << 32) | (x >> 32); + +#undef RMS +} + +/* see bearssl_ghash.h */ +void +br_ghash_ctmul64(void *y, const void *h, const void *data, size_t len) +{ + const unsigned char *buf, *hb; + unsigned char *yb; + uint64_t y0, y1; + uint64_t h0, h1, h2, h0r, h1r, h2r; + + buf = data; + yb = y; + hb = h; + y1 = br_dec64be(yb); + y0 = br_dec64be(yb + 8); + h1 = br_dec64be(hb); + h0 = br_dec64be(hb + 8); + h0r = rev64(h0); + h1r = rev64(h1); + h2 = h0 ^ h1; + h2r = h0r ^ h1r; + while (len > 0) { + const unsigned char *src; + unsigned char tmp[16]; + uint64_t y0r, y1r, y2, y2r; + uint64_t z0, z1, z2, z0h, z1h, z2h; + uint64_t v0, v1, v2, v3; + + if (len >= 16) { + src = buf; + buf += 16; + len -= 16; + } else { + memcpy(tmp, buf, len); + memset(tmp + len, 0, (sizeof tmp) - len); + src = tmp; + len = 0; + } + y1 ^= br_dec64be(src); + y0 ^= br_dec64be(src + 8); + + y0r = rev64(y0); + y1r = rev64(y1); + y2 = y0 ^ y1; + y2r = y0r ^ y1r; + + z0 = bmul64(y0, h0); + z1 = bmul64(y1, h1); + z2 = bmul64(y2, h2); + z0h = bmul64(y0r, h0r); + z1h = bmul64(y1r, h1r); + z2h = bmul64(y2r, h2r); + z2 ^= z0 ^ z1; + z2h ^= z0h ^ z1h; + z0h = rev64(z0h) >> 1; + z1h = rev64(z1h) >> 1; + z2h = rev64(z2h) >> 1; + + v0 = z0; + v1 = z0h ^ z2; + v2 = z1 ^ z2h; + v3 = z1h; + + v3 = (v3 << 1) | (v2 >> 63); + v2 = (v2 << 1) | (v1 >> 63); + v1 = (v1 << 1) | (v0 >> 63); + v0 = (v0 << 1); + + v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7); + v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57); + v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7); + v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57); + + y0 = v2; + y1 = v3; + } + + br_enc64be(yb, y1); + br_enc64be(yb + 8, y0); +} diff --git a/third_party/bearssl/src/ghash_pclmul.c b/third_party/bearssl/src/ghash_pclmul.c new file mode 100644 index 0000000..a58e7dc --- /dev/null +++ b/third_party/bearssl/src/ghash_pclmul.c @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +/* + * This is the GHASH implementation that leverages the pclmulqdq opcode + * (from the AES-NI instructions). + */ + +#if BR_AES_X86NI + +/* + * Test CPU support for PCLMULQDQ. + */ +static inline int +pclmul_supported(void) +{ + /* + * Bit mask for features in ECX: + * 1 PCLMULQDQ support + */ + return br_cpuid(0, 0, 0x00000002, 0); +} + +/* see bearssl_hash.h */ +br_ghash +br_ghash_pclmul_get(void) +{ + return pclmul_supported() ? &br_ghash_pclmul : 0; +} + +BR_TARGETS_X86_UP + +/* + * GHASH is defined over elements of GF(2^128) with "full little-endian" + * representation: leftmost byte is least significant, and, within each + * byte, leftmost _bit_ is least significant. The natural ordering in + * x86 is "mixed little-endian": bytes are ordered from least to most + * significant, but bits within a byte are in most-to-least significant + * order. Going to full little-endian representation would require + * reversing bits within each byte, which is doable but expensive. + * + * Instead, we go to full big-endian representation, by swapping bytes + * around, which is done with a single _mm_shuffle_epi8() opcode (it + * comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We + * can use a full big-endian representation because in a carryless + * multiplication, we have a nice bit reversal property: + * + * rev_128(x) * rev_128(y) = rev_255(x * y) + * + * So by using full big-endian, we still get the right result, except + * that it is right-shifted by 1 bit. The left-shift is relatively + * inexpensive, and it can be mutualised. + * + * + * Since SSE2 opcodes do not have facilities for shitfting full 128-bit + * values with bit precision, we have to break down values into 64-bit + * chunks. We number chunks from 0 to 3 in left to right order. + */ + +/* + * Byte-swap a complete 128-bit value. This normally uses + * _mm_shuffle_epi8(), which gets translated to pshufb (an SSSE3 opcode). + * However, this crashes old Clang versions, so, for Clang before 3.8, + * we use an alternate (and less efficient) version. + */ +#if BR_CLANG && !BR_CLANG_3_8 +#define BYTESWAP_DECL +#define BYTESWAP_PREP (void)0 +#define BYTESWAP(x) do { \ + __m128i byteswap1, byteswap2; \ + byteswap1 = (x); \ + byteswap2 = _mm_srli_epi16(byteswap1, 8); \ + byteswap1 = _mm_slli_epi16(byteswap1, 8); \ + byteswap1 = _mm_or_si128(byteswap1, byteswap2); \ + byteswap1 = _mm_shufflelo_epi16(byteswap1, 0x1B); \ + byteswap1 = _mm_shufflehi_epi16(byteswap1, 0x1B); \ + (x) = _mm_shuffle_epi32(byteswap1, 0x4E); \ + } while (0) +#else +#define BYTESWAP_DECL __m128i byteswap_index; +#define BYTESWAP_PREP do { \ + byteswap_index = _mm_set_epi8( \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + } while (0) +#define BYTESWAP(x) do { \ + (x) = _mm_shuffle_epi8((x), byteswap_index); \ + } while (0) +#endif + +/* + * Call pclmulqdq. Clang appears to have trouble with the intrinsic, so, + * for that compiler, we use inline assembly. Inline assembly is + * potentially a bit slower because the compiler does not understand + * what the opcode does, and thus cannot optimize instruction + * scheduling. + * + * We use a target of "sse2" only, so that Clang may still handle the + * '__m128i' type and allocate SSE2 registers. + */ +#if BR_CLANG +BR_TARGET("sse2") +static inline __m128i +pclmulqdq00(__m128i x, __m128i y) +{ + __asm__ ("pclmulqdq $0x00, %1, %0" : "+x" (x) : "x" (y)); + return x; +} +BR_TARGET("sse2") +static inline __m128i +pclmulqdq11(__m128i x, __m128i y) +{ + __asm__ ("pclmulqdq $0x11, %1, %0" : "+x" (x) : "x" (y)); + return x; +} +#else +#define pclmulqdq00(x, y) _mm_clmulepi64_si128(x, y, 0x00) +#define pclmulqdq11(x, y) _mm_clmulepi64_si128(x, y, 0x11) +#endif + +/* + * From a 128-bit value kw, compute kx as the XOR of the two 64-bit + * halves of kw (into the right half of kx; left half is unspecified). + */ +#define BK(kw, kx) do { \ + kx = _mm_xor_si128(kw, _mm_shuffle_epi32(kw, 0x0E)); \ + } while (0) + +/* + * Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and + * the XOR of the two values (kx). + */ +#define PBK(k0, k1, kw, kx) do { \ + kw = _mm_unpacklo_epi64(k1, k0); \ + kx = _mm_xor_si128(k0, k1); \ + } while (0) + +/* + * Left-shift by 1 bit a 256-bit value (in four 64-bit words). + */ +#define SL_256(x0, x1, x2, x3) do { \ + x0 = _mm_or_si128( \ + _mm_slli_epi64(x0, 1), \ + _mm_srli_epi64(x1, 63)); \ + x1 = _mm_or_si128( \ + _mm_slli_epi64(x1, 1), \ + _mm_srli_epi64(x2, 63)); \ + x2 = _mm_or_si128( \ + _mm_slli_epi64(x2, 1), \ + _mm_srli_epi64(x3, 63)); \ + x3 = _mm_slli_epi64(x3, 1); \ + } while (0) + +/* + * Perform reduction in GF(2^128). The 256-bit value is in x0..x3; + * result is written in x0..x1. + */ +#define REDUCE_F128(x0, x1, x2, x3) do { \ + x1 = _mm_xor_si128( \ + x1, \ + _mm_xor_si128( \ + _mm_xor_si128( \ + x3, \ + _mm_srli_epi64(x3, 1)), \ + _mm_xor_si128( \ + _mm_srli_epi64(x3, 2), \ + _mm_srli_epi64(x3, 7)))); \ + x2 = _mm_xor_si128( \ + _mm_xor_si128( \ + x2, \ + _mm_slli_epi64(x3, 63)), \ + _mm_xor_si128( \ + _mm_slli_epi64(x3, 62), \ + _mm_slli_epi64(x3, 57))); \ + x0 = _mm_xor_si128( \ + x0, \ + _mm_xor_si128( \ + _mm_xor_si128( \ + x2, \ + _mm_srli_epi64(x2, 1)), \ + _mm_xor_si128( \ + _mm_srli_epi64(x2, 2), \ + _mm_srli_epi64(x2, 7)))); \ + x1 = _mm_xor_si128( \ + _mm_xor_si128( \ + x1, \ + _mm_slli_epi64(x2, 63)), \ + _mm_xor_si128( \ + _mm_slli_epi64(x2, 62), \ + _mm_slli_epi64(x2, 57))); \ + } while (0) + +/* + * Square value kw into (dw,dx). + */ +#define SQUARE_F128(kw, dw, dx) do { \ + __m128i z0, z1, z2, z3; \ + z1 = pclmulqdq11(kw, kw); \ + z3 = pclmulqdq00(kw, kw); \ + z0 = _mm_shuffle_epi32(z1, 0x0E); \ + z2 = _mm_shuffle_epi32(z3, 0x0E); \ + SL_256(z0, z1, z2, z3); \ + REDUCE_F128(z0, z1, z2, z3); \ + PBK(z0, z1, dw, dx); \ + } while (0) + +/* see bearssl_hash.h */ +BR_TARGET("ssse3,pclmul") +void +br_ghash_pclmul(void *y, const void *h, const void *data, size_t len) +{ + const unsigned char *buf1, *buf2; + unsigned char tmp[64]; + size_t num4, num1; + __m128i yw, h1w, h1x; + BYTESWAP_DECL + + /* + * We split data into two chunks. First chunk starts at buf1 + * and contains num4 blocks of 64-byte values. Second chunk + * starts at buf2 and contains num1 blocks of 16-byte values. + * We want the first chunk to be as large as possible. + */ + buf1 = data; + num4 = len >> 6; + len &= 63; + buf2 = buf1 + (num4 << 6); + num1 = (len + 15) >> 4; + if ((len & 15) != 0) { + memcpy(tmp, buf2, len); + memset(tmp + len, 0, (num1 << 4) - len); + buf2 = tmp; + } + + /* + * Preparatory step for endian conversions. + */ + BYTESWAP_PREP; + + /* + * Load y and h. + */ + yw = _mm_loadu_si128(y); + h1w = _mm_loadu_si128(h); + BYTESWAP(yw); + BYTESWAP(h1w); + BK(h1w, h1x); + + if (num4 > 0) { + __m128i h2w, h2x, h3w, h3x, h4w, h4x; + __m128i t0, t1, t2, t3; + + /* + * Compute h2 = h^2. + */ + SQUARE_F128(h1w, h2w, h2x); + + /* + * Compute h3 = h^3 = h*(h^2). + */ + t1 = pclmulqdq11(h1w, h2w); + t3 = pclmulqdq00(h1w, h2w); + t2 = _mm_xor_si128(pclmulqdq00(h1x, h2x), + _mm_xor_si128(t1, t3)); + t0 = _mm_shuffle_epi32(t1, 0x0E); + t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); + t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E)); + SL_256(t0, t1, t2, t3); + REDUCE_F128(t0, t1, t2, t3); + PBK(t0, t1, h3w, h3x); + + /* + * Compute h4 = h^4 = (h^2)^2. + */ + SQUARE_F128(h2w, h4w, h4x); + + while (num4 -- > 0) { + __m128i aw0, aw1, aw2, aw3; + __m128i ax0, ax1, ax2, ax3; + + aw0 = _mm_loadu_si128((void *)(buf1 + 0)); + aw1 = _mm_loadu_si128((void *)(buf1 + 16)); + aw2 = _mm_loadu_si128((void *)(buf1 + 32)); + aw3 = _mm_loadu_si128((void *)(buf1 + 48)); + BYTESWAP(aw0); + BYTESWAP(aw1); + BYTESWAP(aw2); + BYTESWAP(aw3); + buf1 += 64; + + aw0 = _mm_xor_si128(aw0, yw); + BK(aw1, ax1); + BK(aw2, ax2); + BK(aw3, ax3); + BK(aw0, ax0); + + t1 = _mm_xor_si128( + _mm_xor_si128( + pclmulqdq11(aw0, h4w), + pclmulqdq11(aw1, h3w)), + _mm_xor_si128( + pclmulqdq11(aw2, h2w), + pclmulqdq11(aw3, h1w))); + t3 = _mm_xor_si128( + _mm_xor_si128( + pclmulqdq00(aw0, h4w), + pclmulqdq00(aw1, h3w)), + _mm_xor_si128( + pclmulqdq00(aw2, h2w), + pclmulqdq00(aw3, h1w))); + t2 = _mm_xor_si128( + _mm_xor_si128( + pclmulqdq00(ax0, h4x), + pclmulqdq00(ax1, h3x)), + _mm_xor_si128( + pclmulqdq00(ax2, h2x), + pclmulqdq00(ax3, h1x))); + t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3)); + t0 = _mm_shuffle_epi32(t1, 0x0E); + t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); + t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E)); + SL_256(t0, t1, t2, t3); + REDUCE_F128(t0, t1, t2, t3); + yw = _mm_unpacklo_epi64(t1, t0); + } + } + + while (num1 -- > 0) { + __m128i aw, ax; + __m128i t0, t1, t2, t3; + + aw = _mm_loadu_si128((void *)buf2); + BYTESWAP(aw); + buf2 += 16; + + aw = _mm_xor_si128(aw, yw); + BK(aw, ax); + + t1 = pclmulqdq11(aw, h1w); + t3 = pclmulqdq00(aw, h1w); + t2 = pclmulqdq00(ax, h1x); + t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3)); + t0 = _mm_shuffle_epi32(t1, 0x0E); + t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); + t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E)); + SL_256(t0, t1, t2, t3); + REDUCE_F128(t0, t1, t2, t3); + yw = _mm_unpacklo_epi64(t1, t0); + } + + BYTESWAP(yw); + _mm_storeu_si128(y, yw); +} + +BR_TARGETS_X86_DOWN + +#else + +/* see bearssl_hash.h */ +br_ghash +br_ghash_pclmul_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/ghash_pwr8.c b/third_party/bearssl/src/ghash_pwr8.c new file mode 100644 index 0000000..2e7b0f4 --- /dev/null +++ b/third_party/bearssl/src/ghash_pwr8.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_POWER_ASM_MACROS 1 +#include "inner.h" + +/* + * This is the GHASH implementation that leverages the POWER8 opcodes. + */ + +#if BR_POWER8 + +/* + * Some symbolic names for registers. + * HB0 = 16 bytes of value 0 + * HB1 = 16 bytes of value 1 + * HB2 = 16 bytes of value 2 + * HB6 = 16 bytes of value 6 + * HB7 = 16 bytes of value 7 + * TT0, TT1 and TT2 are temporaries + * + * BSW holds the pattern for byteswapping 32-bit words; this is set only + * on little-endian systems. XBSW is the same register with the +32 offset + * for access with the VSX opcodes. + */ +#define HB0 0 +#define HB1 1 +#define HB2 2 +#define HB6 3 +#define HB7 4 +#define TT0 5 +#define TT1 6 +#define TT2 7 + +#define BSW 8 +#define XBSW 40 + +/* + * Macro to initialise the constants. + */ +#define INIT \ + vxor(HB0, HB0, HB0) \ + vspltisb(HB1, 1) \ + vspltisb(HB2, 2) \ + vspltisb(HB6, 6) \ + vspltisb(HB7, 7) \ + INIT_BSW + +/* + * Fix endianness of a value after reading it or before writing it, if + * necessary. + */ +#if BR_POWER8_LE +#define INIT_BSW lxvw4x(XBSW, 0, %[idx2be]) +#define FIX_ENDIAN(xx) vperm(xx, xx, xx, BSW) +#else +#define INIT_BSW +#define FIX_ENDIAN(xx) +#endif + +/* + * Left-shift x0:x1 by one bit to the left. This is a corrective action + * needed because GHASH is defined in full little-endian specification, + * while the opcodes use full big-endian convention, so the 255-bit product + * ends up one bit to the right. + */ +#define SL_256(x0, x1) \ + vsldoi(TT0, HB0, x1, 1) \ + vsl(x0, x0, HB1) \ + vsr(TT0, TT0, HB7) \ + vsl(x1, x1, HB1) \ + vxor(x0, x0, TT0) + +/* + * Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as + * x0 or x1, or a different register). x0 and x1 are modified. + */ +#define REDUCE_F128(xd, x0, x1) \ + vxor(x0, x0, x1) \ + vsr(TT0, x1, HB1) \ + vsr(TT1, x1, HB2) \ + vsr(TT2, x1, HB7) \ + vxor(x0, x0, TT0) \ + vxor(TT1, TT1, TT2) \ + vxor(x0, x0, TT1) \ + vsldoi(x1, x1, HB0, 15) \ + vsl(TT1, x1, HB6) \ + vsl(TT2, x1, HB1) \ + vxor(x1, TT1, TT2) \ + vsr(TT0, x1, HB1) \ + vsr(TT1, x1, HB2) \ + vsr(TT2, x1, HB7) \ + vxor(x0, x0, x1) \ + vxor(x0, x0, TT0) \ + vxor(TT1, TT1, TT2) \ + vxor(xd, x0, TT1) + +/* see bearssl_hash.h */ +void +br_ghash_pwr8(void *y, const void *h, const void *data, size_t len) +{ + const unsigned char *buf1, *buf2; + size_t num4, num1; + unsigned char tmp[64]; + long cc0, cc1, cc2, cc3; + +#if BR_POWER8_LE + static const uint32_t idx2be[] = { + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C + }; +#endif + + buf1 = data; + + /* + * Assembly code requires data into two chunks; first chunk + * must contain a number of blocks which is a multiple of 4. + * Since the processing for the first chunk is faster, we want + * to make it as big as possible. + * + * For the remainder, there are two possibilities: + * -- if the remainder size is a multiple of 16, then use it + * in place; + * -- otherwise, copy it to the tmp[] array and pad it with + * zeros. + */ + num4 = len >> 6; + buf2 = buf1 + (num4 << 6); + len &= 63; + num1 = (len + 15) >> 4; + if ((len & 15) != 0) { + memcpy(tmp, buf2, len); + memset(tmp + len, 0, (num1 << 4) - len); + buf2 = tmp; + } + + cc0 = 0; + cc1 = 16; + cc2 = 32; + cc3 = 48; + asm volatile ( + INIT + + /* + * Load current h (denoted hereafter h1) in v9. + */ + lxvw4x(41, 0, %[h]) + FIX_ENDIAN(9) + + /* + * Load current y into v28. + */ + lxvw4x(60, 0, %[y]) + FIX_ENDIAN(28) + + /* + * Split h1 into three registers: + * v17 = h1_1:h1_0 + * v18 = 0:h1_0 + * v19 = h1_1:0 + */ + xxpermdi(49, 41, 41, 2) + vsldoi(18, HB0, 9, 8) + vsldoi(19, 9, HB0, 8) + + /* + * If num4 is 0, skip directly to the second chunk. + */ + cmpldi(%[num4], 0) + beq(chunk1) + + /* + * Compute h2 = h*h in v10. + */ + vpmsumd(10, 18, 18) + vpmsumd(11, 19, 19) + SL_256(10, 11) + REDUCE_F128(10, 10, 11) + + /* + * Compute h3 = h*h*h in v11. + * We first split h2 into: + * v10 = h2_0:h2_1 + * v11 = 0:h2_0 + * v12 = h2_1:0 + * Then we do the product with h1, and reduce into v11. + */ + vsldoi(11, HB0, 10, 8) + vsldoi(12, 10, HB0, 8) + vpmsumd(13, 10, 17) + vpmsumd(11, 11, 18) + vpmsumd(12, 12, 19) + vsldoi(14, HB0, 13, 8) + vsldoi(15, 13, HB0, 8) + vxor(11, 11, 14) + vxor(12, 12, 15) + SL_256(11, 12) + REDUCE_F128(11, 11, 12) + + /* + * Compute h4 = h*h*h*h in v12. This is done by squaring h2. + */ + vsldoi(12, HB0, 10, 8) + vsldoi(13, 10, HB0, 8) + vpmsumd(12, 12, 12) + vpmsumd(13, 13, 13) + SL_256(12, 13) + REDUCE_F128(12, 12, 13) + + /* + * Repack h1, h2, h3 and h4: + * v13 = h4_0:h3_0 + * v14 = h4_1:h3_1 + * v15 = h2_0:h1_0 + * v16 = h2_1:h1_1 + */ + xxpermdi(45, 44, 43, 0) + xxpermdi(46, 44, 43, 3) + xxpermdi(47, 42, 41, 0) + xxpermdi(48, 42, 41, 3) + + /* + * Loop for each group of four blocks. + */ + mtctr(%[num4]) + label(loop4) + /* + * Read the four next blocks. + * v20 = y + a0 = b0 + * v21 = a1 = b1 + * v22 = a2 = b2 + * v23 = a3 = b3 + */ + lxvw4x(52, %[cc0], %[buf1]) + lxvw4x(53, %[cc1], %[buf1]) + lxvw4x(54, %[cc2], %[buf1]) + lxvw4x(55, %[cc3], %[buf1]) + FIX_ENDIAN(20) + FIX_ENDIAN(21) + FIX_ENDIAN(22) + FIX_ENDIAN(23) + addi(%[buf1], %[buf1], 64) + vxor(20, 20, 28) + + /* + * Repack the blocks into v9, v10, v11 and v12. + * v9 = b0_0:b1_0 + * v10 = b0_1:b1_1 + * v11 = b2_0:b3_0 + * v12 = b2_1:b3_1 + */ + xxpermdi(41, 52, 53, 0) + xxpermdi(42, 52, 53, 3) + xxpermdi(43, 54, 55, 0) + xxpermdi(44, 54, 55, 3) + + /* + * Compute the products. + * v20 = b0_0*h4_0 + b1_0*h3_0 + * v21 = b0_1*h4_0 + b1_1*h3_0 + * v22 = b0_0*h4_1 + b1_0*h3_1 + * v23 = b0_1*h4_1 + b1_1*h3_1 + * v24 = b2_0*h2_0 + b3_0*h1_0 + * v25 = b2_1*h2_0 + b3_1*h1_0 + * v26 = b2_0*h2_1 + b3_0*h1_1 + * v27 = b2_1*h2_1 + b3_1*h1_1 + */ + vpmsumd(20, 13, 9) + vpmsumd(21, 13, 10) + vpmsumd(22, 14, 9) + vpmsumd(23, 14, 10) + vpmsumd(24, 15, 11) + vpmsumd(25, 15, 12) + vpmsumd(26, 16, 11) + vpmsumd(27, 16, 12) + + /* + * Sum products into a single 256-bit result in v11:v12. + */ + vxor(11, 20, 24) + vxor(12, 23, 27) + vxor( 9, 21, 22) + vxor(10, 25, 26) + vxor(20, 9, 10) + vsldoi( 9, HB0, 20, 8) + vsldoi(10, 20, HB0, 8) + vxor(11, 11, 9) + vxor(12, 12, 10) + + /* + * Fix and reduce in GF(2^128); this is the new y (in v28). + */ + SL_256(11, 12) + REDUCE_F128(28, 11, 12) + + /* + * Loop for next group of four blocks. + */ + bdnz(loop4) + + /* + * Process second chunk, one block at a time. + */ + label(chunk1) + cmpldi(%[num1], 0) + beq(done) + + mtctr(%[num1]) + label(loop1) + /* + * Load next data block and XOR it into y. + */ + lxvw4x(41, 0, %[buf2]) +#if BR_POWER8_LE + FIX_ENDIAN(9) +#endif + addi(%[buf2], %[buf2], 16) + vxor(9, 28, 9) + + /* + * Split y into doublewords: + * v9 = y_0:y_1 + * v10 = 0:y_0 + * v11 = y_1:0 + */ + vsldoi(10, HB0, 9, 8) + vsldoi(11, 9, HB0, 8) + + /* + * Compute products with h: + * v12 = y_0 * h_0 + * v13 = y_1 * h_1 + * v14 = y_1 * h_0 + y_0 * h_1 + */ + vpmsumd(14, 9, 17) + vpmsumd(12, 10, 18) + vpmsumd(13, 11, 19) + + /* + * Propagate v14 into v12:v13 to finalise product. + */ + vsldoi(10, HB0, 14, 8) + vsldoi(11, 14, HB0, 8) + vxor(12, 12, 10) + vxor(13, 13, 11) + + /* + * Fix result and reduce into v28 (next value for y). + */ + SL_256(12, 13) + REDUCE_F128(28, 12, 13) + bdnz(loop1) + + label(done) + /* + * Write back the new y. + */ + FIX_ENDIAN(28) + stxvw4x(60, 0, %[y]) + +: [buf1] "+b" (buf1), [buf2] "+b" (buf2) +: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1), + [cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3) +#if BR_POWER8_LE + , [idx2be] "b" (idx2be) +#endif +: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "ctr", "memory" + ); +} + +/* see bearssl_hash.h */ +br_ghash +br_ghash_pwr8_get(void) +{ + return &br_ghash_pwr8; +} + +#else + +/* see bearssl_hash.h */ +br_ghash +br_ghash_pwr8_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/hkdf.c b/third_party/bearssl/src/hkdf.c new file mode 100644 index 0000000..6a36851 --- /dev/null +++ b/third_party/bearssl/src/hkdf.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +const unsigned char br_hkdf_no_salt = 0; + +/* see bearssl_kdf.h */ +void +br_hkdf_init(br_hkdf_context *hc, const br_hash_class *digest_vtable, + const void *salt, size_t salt_len) +{ + br_hmac_key_context kc; + unsigned char tmp[64]; + + if (salt == BR_HKDF_NO_SALT) { + salt = tmp; + salt_len = br_digest_size(digest_vtable); + memset(tmp, 0, salt_len); + } + br_hmac_key_init(&kc, digest_vtable, salt, salt_len); + br_hmac_init(&hc->u.hmac_ctx, &kc, 0); + hc->dig_len = br_hmac_size(&hc->u.hmac_ctx); +} + +/* see bearssl_kdf.h */ +void +br_hkdf_inject(br_hkdf_context *hc, const void *ikm, size_t ikm_len) +{ + br_hmac_update(&hc->u.hmac_ctx, ikm, ikm_len); +} + +/* see bearssl_kdf.h */ +void +br_hkdf_flip(br_hkdf_context *hc) +{ + unsigned char tmp[64]; + + br_hmac_out(&hc->u.hmac_ctx, tmp); + br_hmac_key_init(&hc->u.prk_ctx, + br_hmac_get_digest(&hc->u.hmac_ctx), tmp, hc->dig_len); + hc->ptr = hc->dig_len; + hc->chunk_num = 0; +} + +/* see bearssl_kdf.h */ +size_t +br_hkdf_produce(br_hkdf_context *hc, + const void *info, size_t info_len, void *out, size_t out_len) +{ + size_t tlen; + + tlen = 0; + while (out_len > 0) { + size_t clen; + + if (hc->ptr == hc->dig_len) { + br_hmac_context hmac_ctx; + unsigned char x; + + hc->chunk_num ++; + if (hc->chunk_num == 256) { + return tlen; + } + x = hc->chunk_num; + br_hmac_init(&hmac_ctx, &hc->u.prk_ctx, 0); + if (x != 1) { + br_hmac_update(&hmac_ctx, hc->buf, hc->dig_len); + } + br_hmac_update(&hmac_ctx, info, info_len); + br_hmac_update(&hmac_ctx, &x, 1); + br_hmac_out(&hmac_ctx, hc->buf); + hc->ptr = 0; + } + clen = hc->dig_len - hc->ptr; + if (clen > out_len) { + clen = out_len; + } + memcpy(out, hc->buf + hc->ptr, clen); + out = (unsigned char *)out + clen; + out_len -= clen; + hc->ptr += clen; + tlen += clen; + } + return tlen; +} diff --git a/third_party/bearssl/src/hmac.c b/third_party/bearssl/src/hmac.c new file mode 100644 index 0000000..b438798 --- /dev/null +++ b/third_party/bearssl/src/hmac.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static inline size_t +block_size(const br_hash_class *dig) +{ + unsigned ls; + + ls = (unsigned)(dig->desc >> BR_HASHDESC_LBLEN_OFF) + & BR_HASHDESC_LBLEN_MASK; + return (size_t)1 << ls; +} + +static void +process_key(const br_hash_class **hc, void *ks, + const void *key, size_t key_len, unsigned bb) +{ + unsigned char tmp[256]; + size_t blen, u; + + blen = block_size(*hc); + memcpy(tmp, key, key_len); + for (u = 0; u < key_len; u ++) { + tmp[u] ^= (unsigned char)bb; + } + memset(tmp + key_len, bb, blen - key_len); + (*hc)->init(hc); + (*hc)->update(hc, tmp, blen); + (*hc)->state(hc, ks); +} + +/* see bearssl.h */ +void +br_hmac_key_init(br_hmac_key_context *kc, + const br_hash_class *dig, const void *key, size_t key_len) +{ + br_hash_compat_context hc; + unsigned char kbuf[64]; + + kc->dig_vtable = dig; + hc.vtable = dig; + if (key_len > block_size(dig)) { + dig->init(&hc.vtable); + dig->update(&hc.vtable, key, key_len); + dig->out(&hc.vtable, kbuf); + key = kbuf; + key_len = br_digest_size(dig); + } + process_key(&hc.vtable, kc->ksi, key, key_len, 0x36); + process_key(&hc.vtable, kc->kso, key, key_len, 0x5C); +} + +/* see bearssl.h */ +void +br_hmac_init(br_hmac_context *ctx, + const br_hmac_key_context *kc, size_t out_len) +{ + const br_hash_class *dig; + size_t blen, hlen; + + dig = kc->dig_vtable; + blen = block_size(dig); + dig->init(&ctx->dig.vtable); + dig->set_state(&ctx->dig.vtable, kc->ksi, (uint64_t)blen); + memcpy(ctx->kso, kc->kso, sizeof kc->kso); + hlen = br_digest_size(dig); + if (out_len > 0 && out_len < hlen) { + hlen = out_len; + } + ctx->out_len = hlen; +} + +/* see bearssl.h */ +void +br_hmac_update(br_hmac_context *ctx, const void *data, size_t len) +{ + ctx->dig.vtable->update(&ctx->dig.vtable, data, len); +} + +/* see bearssl.h */ +size_t +br_hmac_out(const br_hmac_context *ctx, void *out) +{ + const br_hash_class *dig; + br_hash_compat_context hc; + unsigned char tmp[64]; + size_t blen, hlen; + + dig = ctx->dig.vtable; + dig->out(&ctx->dig.vtable, tmp); + blen = block_size(dig); + dig->init(&hc.vtable); + dig->set_state(&hc.vtable, ctx->kso, (uint64_t)blen); + hlen = br_digest_size(dig); + dig->update(&hc.vtable, tmp, hlen); + dig->out(&hc.vtable, tmp); + memcpy(out, tmp, ctx->out_len); + return ctx->out_len; +} diff --git a/third_party/bearssl/src/hmac_ct.c b/third_party/bearssl/src/hmac_ct.c new file mode 100644 index 0000000..e1c1d80 --- /dev/null +++ b/third_party/bearssl/src/hmac_ct.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static inline size_t +hash_size(const br_hash_class *dig) +{ + return (unsigned)(dig->desc >> BR_HASHDESC_OUT_OFF) + & BR_HASHDESC_OUT_MASK; +} + +static inline size_t +block_size(const br_hash_class *dig) +{ + unsigned ls; + + ls = (unsigned)(dig->desc >> BR_HASHDESC_LBLEN_OFF) + & BR_HASHDESC_LBLEN_MASK; + return (size_t)1 << ls; +} + +/* see bearssl.h */ +size_t +br_hmac_outCT(const br_hmac_context *ctx, + const void *data, size_t len, size_t min_len, size_t max_len, + void *out) +{ + /* + * Method implemented here is inspired from the descriptions on: + * https://www.imperialviolet.org/2013/02/04/luckythirteen.html + * + * Principle: we input bytes one by one. We use a MUX to push + * padding bytes instead of data bytes when appropriate. At each + * block limit, we get the current hash function state: this is + * a potential output, since we handle MD padding ourselves. + * + * be 1 for big-endian, 0 for little-endian + * po minimal MD padding length + * bs block size (always a power of 2) + * hlen hash output size + */ + + const br_hash_class *dig; + br_hash_compat_context hc; + int be; + uint32_t po, bs; + uint32_t kr, km, kl, kz, u; + uint64_t count, ncount, bit_len; + unsigned char tmp1[64], tmp2[64]; + size_t hlen; + + /* + * Copy the current hash context. + */ + hc = ctx->dig; + + /* + * Get function-specific information. + */ + dig = hc.vtable; + be = (dig->desc & BR_HASHDESC_MD_PADDING_BE) != 0; + po = 9; + if (dig->desc & BR_HASHDESC_MD_PADDING_128) { + po += 8; + } + bs = block_size(dig); + hlen = hash_size(dig); + + /* + * Get current input length and compute total bit length. + */ + count = dig->state(&hc.vtable, tmp1); + bit_len = (count + (uint64_t)len) << 3; + + /* + * We can input the blocks that we are sure we will use. + * This offers better performance (no MUX for these blocks) + * and also ensures that the remaining lengths fit on 32 bits. + */ + ncount = (count + (uint64_t)min_len) & ~(uint64_t)(bs - 1); + if (ncount > count) { + size_t zlen; + + zlen = (size_t)(ncount - count); + dig->update(&hc.vtable, data, zlen); + data = (const unsigned char *)data + zlen; + len -= zlen; + max_len -= zlen; + count = ncount; + } + + /* + * At that point: + * -- 'count' contains the number of bytes already processed + * (in total). + * -- We must input 'len' bytes. 'min_len' is unimportant: we + * used it to know how many full blocks we could process + * directly. Now only len and max_len matter. + * + * We compute kr, kl, kz and km. + * kr number of input bytes already in the current block + * km index of the first byte after the end of the last padding + * block, if length is max_len + * kz index of the last byte of the actual last padding block + * kl index of the start of the encoded length + * + * km, kz and kl are counted from the current offset in the + * input data. + */ + kr = (uint32_t)count & (bs - 1); + kz = ((kr + (uint32_t)len + po + bs - 1) & ~(bs - 1)) - 1 - kr; + kl = kz - 7; + km = ((kr + (uint32_t)max_len + po + bs - 1) & ~(bs - 1)) - kr; + + /* + * We must now process km bytes. For index u from 0 to km-1: + * d is from data[] if u < max_len, 0x00 otherwise + * e is an encoded length byte or 0x00, depending on u + * The tests for d and e need not be constant-time, since + * they relate only to u and max_len, not to the actual length. + * + * Actual input length is then: + * d if u < len + * 0x80 if u == len + * 0x00 if u > len and u < kl + * e if u >= kl + * + * Hash state is obtained whenever we reach a full block. This + * is the result we want if and only if u == kz. + */ + memset(tmp2, 0, sizeof tmp2); + for (u = 0; u < km; u ++) { + uint32_t v; + uint32_t d, e, x0, x1; + unsigned char x[1]; + + d = (u < max_len) ? ((const unsigned char *)data)[u] : 0x00; + v = (kr + u) & (bs - 1); + if (v >= (bs - 8)) { + unsigned j; + + j = (v - (bs - 8)) << 3; + if (be) { + e = (uint32_t)(bit_len >> (56 - j)); + } else { + e = (uint32_t)(bit_len >> j); + } + e &= 0xFF; + } else { + e = 0x00; + } + x0 = MUX(EQ(u, (uint32_t)len), 0x80, d); + x1 = MUX(LT(u, kl), 0x00, e); + x[0] = MUX(LE(u, (uint32_t)len), x0, x1); + dig->update(&hc.vtable, x, 1); + if (v == (bs - 1)) { + dig->state(&hc.vtable, tmp1); + CCOPY(EQ(u, kz), tmp2, tmp1, hlen); + } + } + + /* + * Inner hash output is in tmp2[]; we finish processing. + */ + dig->init(&hc.vtable); + dig->set_state(&hc.vtable, ctx->kso, (uint64_t)bs); + dig->update(&hc.vtable, tmp2, hlen); + dig->out(&hc.vtable, tmp2); + memcpy(out, tmp2, ctx->out_len); + return ctx->out_len; +} diff --git a/third_party/bearssl/src/hmac_drbg.c b/third_party/bearssl/src/hmac_drbg.c new file mode 100644 index 0000000..d746756 --- /dev/null +++ b/third_party/bearssl/src/hmac_drbg.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl.h */ +void +br_hmac_drbg_init(br_hmac_drbg_context *ctx, + const br_hash_class *digest_class, const void *seed, size_t len) +{ + size_t hlen; + + ctx->vtable = &br_hmac_drbg_vtable; + hlen = br_digest_size(digest_class); + memset(ctx->K, 0x00, hlen); + memset(ctx->V, 0x01, hlen); + ctx->digest_class = digest_class; + br_hmac_drbg_update(ctx, seed, len); +} + +/* see bearssl.h */ +void +br_hmac_drbg_generate(br_hmac_drbg_context *ctx, void *out, size_t len) +{ + const br_hash_class *dig; + br_hmac_key_context kc; + br_hmac_context hc; + size_t hlen; + unsigned char *buf; + unsigned char x; + + dig = ctx->digest_class; + hlen = br_digest_size(dig); + br_hmac_key_init(&kc, dig, ctx->K, hlen); + buf = out; + while (len > 0) { + size_t clen; + + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + br_hmac_out(&hc, ctx->V); + clen = hlen; + if (clen > len) { + clen = len; + } + memcpy(buf, ctx->V, clen); + buf += clen; + len -= clen; + } + + /* + * To prepare the state for the next request, we should call + * br_hmac_drbg_update() with an empty additional seed. However, + * we already have an initialized HMAC context with the right + * initial key, and we don't want to push another one on the + * stack, so we inline that update() call here. + */ + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + x = 0x00; + br_hmac_update(&hc, &x, 1); + br_hmac_out(&hc, ctx->K); + br_hmac_key_init(&kc, dig, ctx->K, hlen); + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + br_hmac_out(&hc, ctx->V); +} + +/* see bearssl.h */ +void +br_hmac_drbg_update(br_hmac_drbg_context *ctx, const void *seed, size_t len) +{ + const br_hash_class *dig; + br_hmac_key_context kc; + br_hmac_context hc; + size_t hlen; + unsigned char x; + + dig = ctx->digest_class; + hlen = br_digest_size(dig); + + /* + * 1. K = HMAC(K, V || 0x00 || seed) + */ + br_hmac_key_init(&kc, dig, ctx->K, hlen); + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + x = 0x00; + br_hmac_update(&hc, &x, 1); + br_hmac_update(&hc, seed, len); + br_hmac_out(&hc, ctx->K); + br_hmac_key_init(&kc, dig, ctx->K, hlen); + + /* + * 2. V = HMAC(K, V) + */ + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + br_hmac_out(&hc, ctx->V); + + /* + * 3. If the additional seed is empty, then stop here. + */ + if (len == 0) { + return; + } + + /* + * 4. K = HMAC(K, V || 0x01 || seed) + */ + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + x = 0x01; + br_hmac_update(&hc, &x, 1); + br_hmac_update(&hc, seed, len); + br_hmac_out(&hc, ctx->K); + br_hmac_key_init(&kc, dig, ctx->K, hlen); + + /* + * 5. V = HMAC(K, V) + */ + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, ctx->V, hlen); + br_hmac_out(&hc, ctx->V); +} + +/* see bearssl.h */ +const br_prng_class br_hmac_drbg_vtable = { + sizeof(br_hmac_drbg_context), + (void (*)(const br_prng_class **, const void *, const void *, size_t)) + &br_hmac_drbg_init, + (void (*)(const br_prng_class **, void *, size_t)) + &br_hmac_drbg_generate, + (void (*)(const br_prng_class **, const void *, size_t)) + &br_hmac_drbg_update +}; diff --git a/third_party/bearssl/src/i15_add.c b/third_party/bearssl/src/i15_add.c new file mode 100644 index 0000000..97e29b8 --- /dev/null +++ b/third_party/bearssl/src/i15_add.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 31) >> 4; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw + bw + cc; + cc = naw >> 15; + a[u] = MUX(ctl, naw & 0x7FFF, aw); + } + return cc; +} diff --git a/third_party/bearssl/src/i15_bitlen.c b/third_party/bearssl/src/i15_bitlen.c new file mode 100644 index 0000000..ad74467 --- /dev/null +++ b/third_party/bearssl/src/i15_bitlen.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i15_bit_length(uint16_t *x, size_t xlen) +{ + uint32_t tw, twk; + + tw = 0; + twk = 0; + while (xlen -- > 0) { + uint32_t w, c; + + c = EQ(tw, 0); + w = x[xlen]; + tw = MUX(c, w, tw); + twk = MUX(c, (uint32_t)xlen, twk); + } + return (twk << 4) + BIT_LENGTH(tw); +} diff --git a/third_party/bearssl/src/i15_decmod.c b/third_party/bearssl/src/i15_decmod.c new file mode 100644 index 0000000..6076c57 --- /dev/null +++ b/third_party/bearssl/src/i15_decmod.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i15_decode_mod(uint16_t *x, const void *src, size_t len, const uint16_t *m) +{ + /* + * Two-pass algorithm: in the first pass, we determine whether the + * value fits; in the second pass, we do the actual write. + * + * During the first pass, 'r' contains the comparison result so + * far: + * 0x00000000 value is equal to the modulus + * 0x00000001 value is greater than the modulus + * 0xFFFFFFFF value is lower than the modulus + * + * Since we iterate starting with the least significant bytes (at + * the end of src[]), each new comparison overrides the previous + * except when the comparison yields 0 (equal). + * + * During the second pass, 'r' is either 0xFFFFFFFF (value fits) + * or 0x00000000 (value does not fit). + * + * We must iterate over all bytes of the source, _and_ possibly + * some extra virtual bytes (with value 0) so as to cover the + * complete modulus as well. We also add 4 such extra bytes beyond + * the modulus length because it then guarantees that no accumulated + * partial word remains to be processed. + */ + const unsigned char *buf; + size_t mlen, tlen; + int pass; + uint32_t r; + + buf = src; + mlen = (m[0] + 15) >> 4; + tlen = (mlen << 1); + if (tlen < len) { + tlen = len; + } + tlen += 4; + r = 0; + for (pass = 0; pass < 2; pass ++) { + size_t u, v; + uint32_t acc; + int acc_len; + + v = 1; + acc = 0; + acc_len = 0; + for (u = 0; u < tlen; u ++) { + uint32_t b; + + if (u < len) { + b = buf[len - 1 - u]; + } else { + b = 0; + } + acc |= (b << acc_len); + acc_len += 8; + if (acc_len >= 15) { + uint32_t xw; + + xw = acc & (uint32_t)0x7FFF; + acc_len -= 15; + acc = b >> (8 - acc_len); + if (v <= mlen) { + if (pass) { + x[v] = r & xw; + } else { + uint32_t cc; + + cc = (uint32_t)CMP(xw, m[v]); + r = MUX(EQ(cc, 0), r, cc); + } + } else { + if (!pass) { + r = MUX(EQ(xw, 0), r, 1); + } + } + v ++; + } + } + + /* + * When we reach this point at the end of the first pass: + * r is either 0, 1 or -1; we want to set r to 0 if it + * is equal to 0 or 1, and leave it to -1 otherwise. + * + * When we reach this point at the end of the second pass: + * r is either 0 or -1; we want to leave that value + * untouched. This is a subcase of the previous. + */ + r >>= 1; + r |= (r << 1); + } + + x[0] = m[0]; + return r & (uint32_t)1; +} diff --git a/third_party/bearssl/src/i15_decode.c b/third_party/bearssl/src/i15_decode.c new file mode 100644 index 0000000..fc2c0be --- /dev/null +++ b/third_party/bearssl/src/i15_decode.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_decode(uint16_t *x, const void *src, size_t len) +{ + const unsigned char *buf; + size_t v; + uint32_t acc; + int acc_len; + + buf = src; + v = 1; + acc = 0; + acc_len = 0; + while (len -- > 0) { + uint32_t b; + + b = buf[len]; + acc |= (b << acc_len); + acc_len += 8; + if (acc_len >= 15) { + x[v ++] = acc & 0x7FFF; + acc_len -= 15; + acc >>= 15; + } + } + if (acc_len != 0) { + x[v ++] = acc; + } + x[0] = br_i15_bit_length(x + 1, v - 1); +} diff --git a/third_party/bearssl/src/i15_decred.c b/third_party/bearssl/src/i15_decred.c new file mode 100644 index 0000000..81e7dd1 --- /dev/null +++ b/third_party/bearssl/src/i15_decred.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_decode_reduce(uint16_t *x, + const void *src, size_t len, const uint16_t *m) +{ + uint32_t m_ebitlen, m_rbitlen; + size_t mblen, k; + const unsigned char *buf; + uint32_t acc; + int acc_len; + + /* + * Get the encoded bit length. + */ + m_ebitlen = m[0]; + + /* + * Special case for an invalid (null) modulus. + */ + if (m_ebitlen == 0) { + x[0] = 0; + return; + } + + /* + * Clear the destination. + */ + br_i15_zero(x, m_ebitlen); + + /* + * First decode directly as many bytes as possible. This requires + * computing the actual bit length. + */ + m_rbitlen = m_ebitlen >> 4; + m_rbitlen = (m_ebitlen & 15) + (m_rbitlen << 4) - m_rbitlen; + mblen = (m_rbitlen + 7) >> 3; + k = mblen - 1; + if (k >= len) { + br_i15_decode(x, src, len); + x[0] = m_ebitlen; + return; + } + buf = src; + br_i15_decode(x, buf, k); + x[0] = m_ebitlen; + + /* + * Input remaining bytes, using 15-bit words. + */ + acc = 0; + acc_len = 0; + while (k < len) { + uint32_t v; + + v = buf[k ++]; + acc = (acc << 8) | v; + acc_len += 8; + if (acc_len >= 15) { + br_i15_muladd_small(x, acc >> (acc_len - 15), m); + acc_len -= 15; + acc &= ~((uint32_t)-1 << acc_len); + } + } + + /* + * We may have some bits accumulated. We then perform a shift to + * be able to inject these bits as a full 15-bit word. + */ + if (acc_len != 0) { + acc = (acc | (x[1] << acc_len)) & 0x7FFF; + br_i15_rshift(x, 15 - acc_len); + br_i15_muladd_small(x, acc, m); + } +} diff --git a/third_party/bearssl/src/i15_encode.c b/third_party/bearssl/src/i15_encode.c new file mode 100644 index 0000000..50668f4 --- /dev/null +++ b/third_party/bearssl/src/i15_encode.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_encode(void *dst, size_t len, const uint16_t *x) +{ + unsigned char *buf; + size_t u, xlen; + uint32_t acc; + int acc_len; + + xlen = (x[0] + 15) >> 4; + if (xlen == 0) { + memset(dst, 0, len); + return; + } + u = 1; + acc = 0; + acc_len = 0; + buf = dst; + while (len -- > 0) { + if (acc_len < 8) { + if (u <= xlen) { + acc += (uint32_t)x[u ++] << acc_len; + } + acc_len += 15; + } + buf[len] = (unsigned char)acc; + acc >>= 8; + acc_len -= 8; + } +} diff --git a/third_party/bearssl/src/i15_fmont.c b/third_party/bearssl/src/i15_fmont.c new file mode 100644 index 0000000..3450b72 --- /dev/null +++ b/third_party/bearssl/src/i15_fmont.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i) +{ + size_t len, u, v; + + len = (m[0] + 15) >> 4; + for (u = 0; u < len; u ++) { + uint32_t f, cc; + + f = MUL15(x[1], m0i) & 0x7FFF; + cc = 0; + for (v = 0; v < len; v ++) { + uint32_t z; + + z = (uint32_t)x[v + 1] + MUL15(f, m[v + 1]) + cc; + cc = z >> 15; + if (v != 0) { + x[v] = z & 0x7FFF; + } + } + x[len] = cc; + } + + /* + * We may have to do an extra subtraction, but only if the + * value in x[] is indeed greater than or equal to that of m[], + * which is why we must do two calls (first call computes the + * carry, second call performs the subtraction only if the carry + * is 0). + */ + br_i15_sub(x, m, NOT(br_i15_sub(x, m, 0))); +} diff --git a/third_party/bearssl/src/i15_iszero.c b/third_party/bearssl/src/i15_iszero.c new file mode 100644 index 0000000..d4b6f10 --- /dev/null +++ b/third_party/bearssl/src/i15_iszero.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i15_iszero(const uint16_t *x) +{ + uint32_t z; + size_t u; + + z = 0; + for (u = (x[0] + 15) >> 4; u > 0; u --) { + z |= x[u]; + } + return ~(z | -z) >> 31; +} diff --git a/third_party/bearssl/src/i15_moddiv.c b/third_party/bearssl/src/i15_moddiv.c new file mode 100644 index 0000000..45af756 --- /dev/null +++ b/third_party/bearssl/src/i15_moddiv.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * In this file, we handle big integers with a custom format, i.e. + * without the usual one-word header. Value is split into 15-bit words, + * each stored in a 16-bit slot (top bit is zero) in little-endian + * order. The length (in words) is provided explicitly. In some cases, + * the value can be negative (using two's complement representation). In + * some cases, the top word is allowed to have a 16th bit. + */ + +/* + * Negate big integer conditionally. The value consists of 'len' words, + * with 15 bits in each word (the top bit of each word should be 0, + * except possibly for the last word). If 'ctl' is 1, the negation is + * computed; otherwise, if 'ctl' is 0, then the value is unchanged. + */ +static void +cond_negate(uint16_t *a, size_t len, uint32_t ctl) +{ + size_t k; + uint32_t cc, xm; + + cc = ctl; + xm = 0x7FFF & -ctl; + for (k = 0; k < len; k ++) { + uint32_t aw; + + aw = a[k]; + aw = (aw ^ xm) + cc; + a[k] = aw & 0x7FFF; + cc = (aw >> 15) & 1; + } +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] may use 16 bits. + * + * Also, modulus m must be odd. + */ +static void +finish_mod(uint16_t *a, size_t len, const uint16_t *m, uint32_t neg) +{ + size_t k; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. + */ + cc = 0; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = m[k]; + cc = (aw - mw - cc) >> 31; + } + + /* + * At this point: + * if neg = 1, then we must add m (regardless of cc) + * if neg = 0 and cc = 0, then we must subtract m + * if neg = 0 and cc = 1, then we must do nothing + */ + xm = 0x7FFF & -neg; + ym = -(neg | (1 - cc)); + cc = neg; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = (m[k] ^ xm) & ym; + aw = aw - mw - cc; + a[k] = aw & 0x7FFF; + cc = aw >> 31; + } +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^15) + * b <- (a*qa+b*qb)/(2^15) + * The division is assumed to be exact (i.e. the low word is dropped). + * If the final a is negative, then it is negated. Similarly for b. + * Returned value is the combination of two bits: + * bit 0: 1 if a had to be negated, 0 otherwise + * bit 1: 1 if b had to be negated, 0 otherwise + * + * Factors pa, pb, qa and qb must be at most 2^15 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 16th bit. + */ +static uint32_t +co_reduce(uint16_t *a, uint16_t *b, size_t len, + int32_t pa, int32_t pb, int32_t qa, int32_t qb) +{ + size_t k; + int32_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (k = 0; k < len; k ++) { + uint32_t wa, wb, za, zb; + uint16_t tta, ttb; + + /* + * Since: + * |pa| <= 2^15 + * |pb| <= 2^15 + * 0 <= wa <= 2^15 - 1 + * 0 <= wb <= 2^15 - 1 + * |cca| <= 2^16 - 1 + * Then: + * |za| <= (2^15-1)*(2^16) + (2^16-1) = 2^31 - 1 + * + * Thus, the new value of cca is such that |cca| <= 2^16 - 1. + * The same applies to ccb. + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint32_t)pa + wb * (uint32_t)pb + (uint32_t)cca; + zb = wa * (uint32_t)qa + wb * (uint32_t)qb + (uint32_t)ccb; + if (k > 0) { + a[k - 1] = za & 0x7FFF; + b[k - 1] = zb & 0x7FFF; + } + tta = za >> 15; + ttb = zb >> 15; + cca = *(int16_t *)&tta; + ccb = *(int16_t *)&ttb; + } + a[len - 1] = (uint16_t)cca; + b[len - 1] = (uint16_t)ccb; + nega = (uint32_t)cca >> 31; + negb = (uint32_t)ccb >> 31; + cond_negate(a, len, nega); + cond_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^15) mod m + * b <- (a*qa+b*qb)/(2^15) mod m + * + * m0i is equal to -1/m[0] mod 2^15. + * + * Factors pa, pb, qa and qb must be at most 2^15 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 16th bit. + */ +static void +co_reduce_mod(uint16_t *a, uint16_t *b, size_t len, + int32_t pa, int32_t pb, int32_t qa, int32_t qb, + const uint16_t *m, uint16_t m0i) +{ + size_t k; + int32_t cca, ccb, fa, fb; + + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFF; + fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFF; + for (k = 0; k < len; k ++) { + uint32_t wa, wb, za, zb; + uint32_t tta, ttb; + + /* + * In this loop, carries 'cca' and 'ccb' always fit on + * 17 bits (in absolute value). + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint32_t)pa + wb * (uint32_t)pb + + m[k] * (uint32_t)fa + (uint32_t)cca; + zb = wa * (uint32_t)qa + wb * (uint32_t)qb + + m[k] * (uint32_t)fb + (uint32_t)ccb; + if (k > 0) { + a[k - 1] = za & 0x7FFF; + b[k - 1] = zb & 0x7FFF; + } + + /* + * The XOR-and-sub construction below does an arithmetic + * right shift in a portable way (technically, right-shifting + * a negative signed value is implementation-defined in C). + */ +#define M ((uint32_t)1 << 16) + tta = za >> 15; + ttb = zb >> 15; + tta = (tta ^ M) - M; + ttb = (ttb ^ M) - M; + cca = *(int32_t *)&tta; + ccb = *(int32_t *)&ttb; +#undef M + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top word of 'a' and 'b' may have a 16-th bit set. + * We may have to add or subtract the modulus. + */ + finish_mod(a, len, m, (uint32_t)cca >> 31); + finish_mod(b, len, m, (uint32_t)ccb >> 31); +} + +/* see inner.h */ +uint32_t +br_i15_moddiv(uint16_t *x, const uint16_t *y, const uint16_t *m, uint16_t m0i, + uint16_t *t) +{ + /* + * Algorithm is an extended binary GCD. We maintain four values + * a, b, u and v, with the following invariants: + * + * a * x = y * u mod m + * b * x = y * v mod m + * + * Starting values are: + * + * a = y + * b = m + * u = x + * v = 0 + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then a <- a/2 and u <- u/2 mod m. + * - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m. + * - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m. + * - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m. + * + * Algorithm stops when a = b. At that point, they both are equal + * to GCD(y,m); the modular division succeeds if that value is 1. + * The result of the modular division is then u (or v: both are + * equal at that point). + * + * Each step makes either a or b shrink by at least one bit; hence, + * if m has bit length k bits, then 2k-2 steps are sufficient. + * + * + * Though complexity is quadratic in the size of m, the bit-by-bit + * processing is not very efficient. We can speed up processing by + * remarking that the decisions are taken based only on observation + * of the top and low bits of a and b. + * + * In the loop below, at each iteration, we use the two top words + * of a and b, and the low words of a and b, to compute reduction + * parameters pa, pb, qa and qb such that the new values for a + * and b are: + * + * a' = (a*pa + b*pb) / (2^15) + * b' = (a*qa + b*qb) / (2^15) + * + * the division being exact. + * + * Since the choices are based on the top words, they may be slightly + * off, requiring an optional correction: if a' < 0, then we replace + * pa with -pa, and pb with -pb. The total length of a and b is + * thus reduced by at least 14 bits at each iteration. + * + * The stopping conditions are still the same, though: when a + * and b become equal, they must be both odd (since m is odd, + * the GCD cannot be even), therefore the next operation is a + * subtraction, and one of the values becomes 0. At that point, + * nothing else happens, i.e. one value is stuck at 0, and the + * other one is the GCD. + */ + size_t len, k; + uint16_t *a, *b, *u, *v; + uint32_t num, r; + + len = (m[0] + 15) >> 4; + a = t; + b = a + len; + u = x + 1; + v = b + len; + memcpy(a, y + 1, len * sizeof *y); + memcpy(b, m + 1, len * sizeof *m); + memset(v, 0, len * sizeof *v); + + /* + * Loop below ensures that a and b are reduced by some bits each, + * for a total of at least 14 bits. + */ + for (num = ((m[0] - (m[0] >> 4)) << 1) + 14; num >= 14; num -= 14) { + size_t j; + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint32_t a_hi, b_hi, a_lo, b_lo; + int32_t pa, pb, qa, qb; + int i; + + /* + * Extract top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we want + * (a[j] << 15) + a[j - 1], and (b[j] << 15) + b[j - 1]. + * If a and b are down to one word each, then we use a[0] + * and b[0]. + */ + c0 = (uint32_t)-1; + c1 = (uint32_t)-1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0xFFFF) >> 16) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = (a0 << 15) + a1; + b_hi = (b0 << 15) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^15, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 15; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t r, oa, ob, cAB, cBA, cA; + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a is divided by 2, 0 otherwise + * + * Rules: + * + * cAB and cBA cannot be both 1. + * if a is not divided by 2, b is. + */ + r = GT(a_hi, b_hi); + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & r; + cBA = oa & ob & NOT(r); + cA = cAB | NOT(oa); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -cAB; + pa -= qa & -(int32_t)cAB; + pb -= qb & -(int32_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -cBA; + qa -= pa & -(int32_t)cBA; + qb -= pb & -(int32_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int32_t)cA - 1); + pb += pb & ((int32_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -cA; + b_lo += b_lo & -cA; + qa += qa & -(int32_t)cA; + qb += qb & -(int32_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & (cA - 1); + } + + /* + * Replace a and b with new values a' and b'. + */ + r = co_reduce(a, b, len, pa, pb, qa, qb); + pa -= pa * ((r & 1) << 1); + pb -= pb * ((r & 1) << 1); + qa -= qa * (r & 2); + qb -= qb * (r & 2); + co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i); + } + + /* + * Now one of the arrays should be 0, and the other contains + * the GCD. If a is 0, then u is 0 as well, and v contains + * the division result. + * Result is correct if and only if GCD is 1. + */ + r = (a[0] | b[0]) ^ 1; + u[0] |= v[0]; + for (k = 1; k < len; k ++) { + r |= a[k] | b[k]; + u[k] |= v[k]; + } + return EQ0(r); +} diff --git a/third_party/bearssl/src/i15_modpow.c b/third_party/bearssl/src/i15_modpow.c new file mode 100644 index 0000000..9bf304e --- /dev/null +++ b/third_party/bearssl/src/i15_modpow.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_modpow(uint16_t *x, + const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2) +{ + size_t mlen; + unsigned k; + + mlen = ((m[0] + 31) >> 4) * sizeof m[0]; + memcpy(t1, x, mlen); + br_i15_to_monty(t1, m); + br_i15_zero(x, m[0]); + x[1] = 1; + for (k = 0; k < ((unsigned)elen << 3); k ++) { + uint32_t ctl; + + ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1; + br_i15_montymul(t2, x, t1, m, m0i); + CCOPY(ctl, x, t2, mlen); + br_i15_montymul(t2, t1, t1, m, m0i); + memcpy(t1, t2, mlen); + } +} diff --git a/third_party/bearssl/src/i15_modpow2.c b/third_party/bearssl/src/i15_modpow2.c new file mode 100644 index 0000000..4b32118 --- /dev/null +++ b/third_party/bearssl/src/i15_modpow2.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i15_modpow_opt(uint16_t *x, + const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen) +{ + size_t mlen, mwlen; + uint16_t *t1, *t2, *base; + size_t u, v; + uint32_t acc; + int acc_len, win_len; + + /* + * Get modulus size. + */ + mwlen = (m[0] + 31) >> 4; + mlen = mwlen * sizeof m[0]; + mwlen += (mwlen & 1); + t1 = tmp; + t2 = tmp + mwlen; + + /* + * Compute possible window size, with a maximum of 5 bits. + * When the window has size 1 bit, we use a specific code + * that requires only two temporaries. Otherwise, for a + * window of k bits, we need 2^k+1 temporaries. + */ + if (twlen < (mwlen << 1)) { + return 0; + } + for (win_len = 5; win_len > 1; win_len --) { + if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) { + break; + } + } + + /* + * Everything is done in Montgomery representation. + */ + br_i15_to_monty(x, m); + + /* + * Compute window contents. If the window has size one bit only, + * then t2 is set to x; otherwise, t2[0] is left untouched, and + * t2[k] is set to x^k (for k >= 1). + */ + if (win_len == 1) { + memcpy(t2, x, mlen); + } else { + memcpy(t2 + mwlen, x, mlen); + base = t2 + mwlen; + for (u = 2; u < ((unsigned)1 << win_len); u ++) { + br_i15_montymul(base + mwlen, base, x, m, m0i); + base += mwlen; + } + } + + /* + * We need to set x to 1, in Montgomery representation. This can + * be done efficiently by setting the high word to 1, then doing + * one word-sized shift. + */ + br_i15_zero(x, m[0]); + x[(m[0] + 15) >> 4] = 1; + br_i15_muladd_small(x, 0, m); + + /* + * We process bits from most to least significant. At each + * loop iteration, we have acc_len bits in acc. + */ + acc = 0; + acc_len = 0; + while (acc_len > 0 || elen > 0) { + int i, k; + uint32_t bits; + + /* + * Get the next bits. + */ + k = win_len; + if (acc_len < win_len) { + if (elen > 0) { + acc = (acc << 8) | *e ++; + elen --; + acc_len += 8; + } else { + k = acc_len; + } + } + bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1); + acc_len -= k; + + /* + * We could get exactly k bits. Compute k squarings. + */ + for (i = 0; i < k; i ++) { + br_i15_montymul(t1, x, x, m, m0i); + memcpy(x, t1, mlen); + } + + /* + * Window lookup: we want to set t2 to the window + * lookup value, assuming the bits are non-zero. If + * the window length is 1 bit only, then t2 is + * already set; otherwise, we do a constant-time lookup. + */ + if (win_len > 1) { + br_i15_zero(t2, m[0]); + base = t2 + mwlen; + for (u = 1; u < ((uint32_t)1 << k); u ++) { + uint32_t mask; + + mask = -EQ(u, bits); + for (v = 1; v < mwlen; v ++) { + t2[v] |= mask & base[v]; + } + base += mwlen; + } + } + + /* + * Multiply with the looked-up value. We keep the + * product only if the exponent bits are not all-zero. + */ + br_i15_montymul(t1, x, t2, m, m0i); + CCOPY(NEQ(bits, 0), x, t1, mlen); + } + + /* + * Convert back from Montgomery representation, and exit. + */ + br_i15_from_monty(x, m, m0i); + return 1; +} diff --git a/third_party/bearssl/src/i15_montmul.c b/third_party/bearssl/src/i15_montmul.c new file mode 100644 index 0000000..e98bc32 --- /dev/null +++ b/third_party/bearssl/src/i15_montmul.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i) +{ + size_t len, len4, u, v; + uint32_t dh; + + len = (m[0] + 15) >> 4; + len4 = len & ~(size_t)3; + br_i15_zero(d, m[0]); + dh = 0; + for (u = 0; u < len; u ++) { + uint32_t f, xu, r, zh; + + xu = x[u + 1]; + f = MUL15((d[1] + MUL15(x[u + 1], y[1])) & 0x7FFF, m0i) + & 0x7FFF; +#if BR_ARMEL_CORTEXM_GCC + if (len4 != 0) { + uint16_t *limit; + + limit = d + len4; + asm volatile ( +"\n\ + @ carry: r=r2 \n\ + @ multipliers: xu=r3 f=r4 \n\ + @ base registers: d+v=r5 y+v=r6 m+v=r7 \n\ + @ r8 contains 0x7FFF \n\ + @ r9 contains d+len4 \n\ + ldr r0, %[limit] \n\ + ldr r3, %[xu] \n\ + mov r9, r0 \n\ + ldr r4, %[f] \n\ + eor r2, r2 \n\ + ldr r5, %[d] \n\ + sub r1, r2, #1 \n\ + ldr r6, %[y] \n\ + lsr r1, r1, #17 \n\ + ldr r7, %[m] \n\ + mov r8, r1 \n\ +loop%=: \n\ + ldrh r0, [r6, #2] \n\ + ldrh r1, [r7, #2] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #2] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #0] \n\ + \n\ + ldrh r0, [r6, #4] \n\ + ldrh r1, [r7, #4] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #4] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #2] \n\ + \n\ + ldrh r0, [r6, #6] \n\ + ldrh r1, [r7, #6] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #6] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #4] \n\ + \n\ + ldrh r0, [r6, #8] \n\ + ldrh r1, [r7, #8] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #8] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #6] \n\ + \n\ + add r5, r5, #8 \n\ + add r6, r6, #8 \n\ + add r7, r7, #8 \n\ + cmp r5, r9 \n\ + bne loop%= \n\ + \n\ + str r2, %[carry] \n\ +" +: [carry] "=m" (r) +: [xu] "m" (xu), [f] "m" (f), [d] "m" (d), [y] "m" (y), + [m] "m" (m), [limit] "m" (limit) +: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); + } else { + r = 0; + } + v = len4; +#else + r = 0; + for (v = 0; v < len4; v += 4) { + uint32_t z; + + z = d[v + 1] + MUL15(xu, y[v + 1]) + + MUL15(f, m[v + 1]) + r; + r = z >> 15; + d[v + 0] = z & 0x7FFF; + z = d[v + 2] + MUL15(xu, y[v + 2]) + + MUL15(f, m[v + 2]) + r; + r = z >> 15; + d[v + 1] = z & 0x7FFF; + z = d[v + 3] + MUL15(xu, y[v + 3]) + + MUL15(f, m[v + 3]) + r; + r = z >> 15; + d[v + 2] = z & 0x7FFF; + z = d[v + 4] + MUL15(xu, y[v + 4]) + + MUL15(f, m[v + 4]) + r; + r = z >> 15; + d[v + 3] = z & 0x7FFF; + } +#endif + for (; v < len; v ++) { + uint32_t z; + + z = d[v + 1] + MUL15(xu, y[v + 1]) + + MUL15(f, m[v + 1]) + r; + r = z >> 15; + d[v + 0] = z & 0x7FFF; + } + + zh = dh + r; + d[len] = zh & 0x7FFF; + dh = zh >> 15; + } + + /* + * Restore the bit length (it was overwritten in the loop above). + */ + d[0] = m[0]; + + /* + * d[] may be greater than m[], but it is still lower than twice + * the modulus. + */ + br_i15_sub(d, m, NEQ(dh, 0) | NOT(br_i15_sub(d, m, 0))); +} diff --git a/third_party/bearssl/src/i15_mulacc.c b/third_party/bearssl/src/i15_mulacc.c new file mode 100644 index 0000000..7a073ac --- /dev/null +++ b/third_party/bearssl/src/i15_mulacc.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b) +{ + size_t alen, blen, u; + unsigned dl, dh; + + alen = (a[0] + 15) >> 4; + blen = (b[0] + 15) >> 4; + + /* + * Announced bit length of d[] will be the sum of the announced + * bit lengths of a[] and b[]; but the lengths are encoded. + */ + dl = (a[0] & 15) + (b[0] & 15); + dh = (a[0] >> 4) + (b[0] >> 4); + d[0] = (dh << 4) + dl + (~(uint32_t)(dl - 15) >> 31); + + for (u = 0; u < blen; u ++) { + uint32_t f; + size_t v; + uint32_t cc; + + f = b[1 + u]; + cc = 0; + for (v = 0; v < alen; v ++) { + uint32_t z; + + z = (uint32_t)d[1 + u + v] + MUL15(f, a[1 + v]) + cc; + cc = z >> 15; + d[1 + u + v] = z & 0x7FFF; + } + d[1 + u + alen] = cc; + } +} diff --git a/third_party/bearssl/src/i15_muladd.c b/third_party/bearssl/src/i15_muladd.c new file mode 100644 index 0000000..c4b7216 --- /dev/null +++ b/third_party/bearssl/src/i15_muladd.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Constant-time division. The divisor must not be larger than 16 bits, + * and the quotient must fit on 17 bits. + */ +static uint32_t +divrem16(uint32_t x, uint32_t d, uint32_t *r) +{ + int i; + uint32_t q; + + q = 0; + d <<= 16; + for (i = 16; i >= 0; i --) { + uint32_t ctl; + + ctl = LE(d, x); + q |= ctl << i; + x -= (-ctl) & d; + d >>= 1; + } + if (r != NULL) { + *r = x; + } + return q; +} + +/* see inner.h */ +void +br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m) +{ + /* + * Constant-time: we accept to leak the exact bit length of the + * modulus m. + */ + unsigned m_bitlen, mblr; + size_t u, mlen; + uint32_t hi, a0, a, b, q; + uint32_t cc, tb, over, under; + + /* + * Simple case: the modulus fits on one word. + */ + m_bitlen = m[0]; + if (m_bitlen == 0) { + return; + } + if (m_bitlen <= 15) { + uint32_t rem; + + divrem16(((uint32_t)x[1] << 15) | z, m[1], &rem); + x[1] = rem; + return; + } + mlen = (m_bitlen + 15) >> 4; + mblr = m_bitlen & 15; + + /* + * Principle: we estimate the quotient (x*2^15+z)/m by + * doing a 30/15 division with the high words. + * + * Let: + * w = 2^15 + * a = (w*a0 + a1) * w^N + a2 + * b = b0 * w^N + b2 + * such that: + * 0 <= a0 < w + * 0 <= a1 < w + * 0 <= a2 < w^N + * w/2 <= b0 < w + * 0 <= b2 < w^N + * a < w*b + * I.e. the two top words of a are a0:a1, the top word of b is + * b0, we ensured that b0 is "full" (high bit set), and a is + * such that the quotient q = a/b fits on one word (0 <= q < w). + * + * If a = b*q + r (with 0 <= r < q), then we can estimate q by + * using a division on the top words: + * a0*w + a1 = b0*u + v (with 0 <= v < b0) + * Then the following holds: + * 0 <= u <= w + * u-2 <= q <= u + */ + hi = x[mlen]; + if (mblr == 0) { + a0 = x[mlen]; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a = (a0 << 15) + x[mlen]; + b = m[mlen]; + } else { + a0 = (x[mlen] << (15 - mblr)) | (x[mlen - 1] >> mblr); + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a = (a0 << 15) | (((x[mlen] << (15 - mblr)) + | (x[mlen - 1] >> mblr)) & 0x7FFF); + b = (m[mlen] << (15 - mblr)) | (m[mlen - 1] >> mblr); + } + q = divrem16(a, b, NULL); + + /* + * We computed an estimate for q, but the real one may be q, + * q-1 or q-2; moreover, the division may have returned a value + * 8000 or even 8001 if the two high words were identical, and + * we want to avoid values beyond 7FFF. We thus adjust q so + * that the "true" multiplier will be q+1, q or q-1, and q is + * in the 0000..7FFF range. + */ + q = MUX(EQ(b, a0), 0x7FFF, q - 1 + ((q - 1) >> 31)); + + /* + * We subtract q*m from x (x has an extra high word of value 'hi'). + * Since q may be off by 1 (in either direction), we may have to + * add or subtract m afterwards. + * + * The 'tb' flag will be true (1) at the end of the loop if the + * result is greater than or equal to the modulus (not counting + * 'hi' or the carry). + */ + cc = 0; + tb = 1; + for (u = 1; u <= mlen; u ++) { + uint32_t mw, zl, xw, nxw; + + mw = m[u]; + zl = MUL15(mw, q) + cc; + cc = zl >> 15; + zl &= 0x7FFF; + xw = x[u]; + nxw = xw - zl; + cc += nxw >> 31; + nxw &= 0x7FFF; + x[u] = nxw; + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); + } + + /* + * If we underestimated q, then either cc < hi (one extra bit + * beyond the top array word), or cc == hi and tb is true (no + * extra bit, but the result is not lower than the modulus). + * + * If we overestimated q, then cc > hi. + */ + over = GT(cc, hi); + under = ~over & (tb | LT(cc, hi)); + br_i15_add(x, m, over); + br_i15_sub(x, m, under); +} diff --git a/third_party/bearssl/src/i15_ninv15.c b/third_party/bearssl/src/i15_ninv15.c new file mode 100644 index 0000000..de3a3ba --- /dev/null +++ b/third_party/bearssl/src/i15_ninv15.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint16_t +br_i15_ninv15(uint16_t x) +{ + uint32_t y; + + y = 2 - x; + y = MUL15(y, 2 - MUL15(x, y)); + y = MUL15(y, 2 - MUL15(x, y)); + y = MUL15(y, 2 - MUL15(x, y)); + return MUX(x & 1, -y, 0) & 0x7FFF; +} diff --git a/third_party/bearssl/src/i15_reduce.c b/third_party/bearssl/src/i15_reduce.c new file mode 100644 index 0000000..0931b10 --- /dev/null +++ b/third_party/bearssl/src/i15_reduce.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m) +{ + uint32_t m_bitlen, a_bitlen; + size_t mlen, alen, u; + + m_bitlen = m[0]; + mlen = (m_bitlen + 15) >> 4; + + x[0] = m_bitlen; + if (m_bitlen == 0) { + return; + } + + /* + * If the source is shorter, then simply copy all words from a[] + * and zero out the upper words. + */ + a_bitlen = a[0]; + alen = (a_bitlen + 15) >> 4; + if (a_bitlen < m_bitlen) { + memcpy(x + 1, a + 1, alen * sizeof *a); + for (u = alen; u < mlen; u ++) { + x[u + 1] = 0; + } + return; + } + + /* + * The source length is at least equal to that of the modulus. + * We must thus copy N-1 words, and input the remaining words + * one by one. + */ + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); + x[mlen] = 0; + for (u = 1 + alen - mlen; u > 0; u --) { + br_i15_muladd_small(x, a[u], m); + } +} diff --git a/third_party/bearssl/src/i15_rshift.c b/third_party/bearssl/src/i15_rshift.c new file mode 100644 index 0000000..f9991ab --- /dev/null +++ b/third_party/bearssl/src/i15_rshift.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_rshift(uint16_t *x, int count) +{ + size_t u, len; + unsigned r; + + len = (x[0] + 15) >> 4; + if (len == 0) { + return; + } + r = x[1] >> count; + for (u = 2; u <= len; u ++) { + unsigned w; + + w = x[u]; + x[u - 1] = ((w << (15 - count)) | r) & 0x7FFF; + r = w >> count; + } + x[len] = r; +} diff --git a/third_party/bearssl/src/i15_sub.c b/third_party/bearssl/src/i15_sub.c new file mode 100644 index 0000000..1983c4d --- /dev/null +++ b/third_party/bearssl/src/i15_sub.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 31) >> 4; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw - bw - cc; + cc = naw >> 31; + a[u] = MUX(ctl, naw & 0x7FFF, aw); + } + return cc; +} diff --git a/third_party/bearssl/src/i15_tmont.c b/third_party/bearssl/src/i15_tmont.c new file mode 100644 index 0000000..d5c4b8b --- /dev/null +++ b/third_party/bearssl/src/i15_tmont.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i15_to_monty(uint16_t *x, const uint16_t *m) +{ + unsigned k; + + for (k = (m[0] + 15) >> 4; k > 0; k --) { + br_i15_muladd_small(x, 0, m); + } +} diff --git a/third_party/bearssl/src/i31_add.c b/third_party/bearssl/src/i31_add.c new file mode 100644 index 0000000..2ca47c6 --- /dev/null +++ b/third_party/bearssl/src/i31_add.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 63) >> 5; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw + bw + cc; + cc = naw >> 31; + a[u] = MUX(ctl, naw & (uint32_t)0x7FFFFFFF, aw); + } + return cc; +} diff --git a/third_party/bearssl/src/i31_bitlen.c b/third_party/bearssl/src/i31_bitlen.c new file mode 100644 index 0000000..3e127c2 --- /dev/null +++ b/third_party/bearssl/src/i31_bitlen.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_bit_length(uint32_t *x, size_t xlen) +{ + uint32_t tw, twk; + + tw = 0; + twk = 0; + while (xlen -- > 0) { + uint32_t w, c; + + c = EQ(tw, 0); + w = x[xlen]; + tw = MUX(c, w, tw); + twk = MUX(c, (uint32_t)xlen, twk); + } + return (twk << 5) + BIT_LENGTH(tw); +} diff --git a/third_party/bearssl/src/i31_decmod.c b/third_party/bearssl/src/i31_decmod.c new file mode 100644 index 0000000..3cd7bfe --- /dev/null +++ b/third_party/bearssl/src/i31_decmod.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_decode_mod(uint32_t *x, const void *src, size_t len, const uint32_t *m) +{ + /* + * Two-pass algorithm: in the first pass, we determine whether the + * value fits; in the second pass, we do the actual write. + * + * During the first pass, 'r' contains the comparison result so + * far: + * 0x00000000 value is equal to the modulus + * 0x00000001 value is greater than the modulus + * 0xFFFFFFFF value is lower than the modulus + * + * Since we iterate starting with the least significant bytes (at + * the end of src[]), each new comparison overrides the previous + * except when the comparison yields 0 (equal). + * + * During the second pass, 'r' is either 0xFFFFFFFF (value fits) + * or 0x00000000 (value does not fit). + * + * We must iterate over all bytes of the source, _and_ possibly + * some extra virtual bytes (with value 0) so as to cover the + * complete modulus as well. We also add 4 such extra bytes beyond + * the modulus length because it then guarantees that no accumulated + * partial word remains to be processed. + */ + const unsigned char *buf; + size_t mlen, tlen; + int pass; + uint32_t r; + + buf = src; + mlen = (m[0] + 31) >> 5; + tlen = (mlen << 2); + if (tlen < len) { + tlen = len; + } + tlen += 4; + r = 0; + for (pass = 0; pass < 2; pass ++) { + size_t u, v; + uint32_t acc; + int acc_len; + + v = 1; + acc = 0; + acc_len = 0; + for (u = 0; u < tlen; u ++) { + uint32_t b; + + if (u < len) { + b = buf[len - 1 - u]; + } else { + b = 0; + } + acc |= (b << acc_len); + acc_len += 8; + if (acc_len >= 31) { + uint32_t xw; + + xw = acc & (uint32_t)0x7FFFFFFF; + acc_len -= 31; + acc = b >> (8 - acc_len); + if (v <= mlen) { + if (pass) { + x[v] = r & xw; + } else { + uint32_t cc; + + cc = (uint32_t)CMP(xw, m[v]); + r = MUX(EQ(cc, 0), r, cc); + } + } else { + if (!pass) { + r = MUX(EQ(xw, 0), r, 1); + } + } + v ++; + } + } + + /* + * When we reach this point at the end of the first pass: + * r is either 0, 1 or -1; we want to set r to 0 if it + * is equal to 0 or 1, and leave it to -1 otherwise. + * + * When we reach this point at the end of the second pass: + * r is either 0 or -1; we want to leave that value + * untouched. This is a subcase of the previous. + */ + r >>= 1; + r |= (r << 1); + } + + x[0] = m[0]; + return r & (uint32_t)1; +} diff --git a/third_party/bearssl/src/i31_decode.c b/third_party/bearssl/src/i31_decode.c new file mode 100644 index 0000000..8ec6d90 --- /dev/null +++ b/third_party/bearssl/src/i31_decode.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_decode(uint32_t *x, const void *src, size_t len) +{ + const unsigned char *buf; + size_t u, v; + uint32_t acc; + int acc_len; + + buf = src; + u = len; + v = 1; + acc = 0; + acc_len = 0; + while (u -- > 0) { + uint32_t b; + + b = buf[u]; + acc |= (b << acc_len); + acc_len += 8; + if (acc_len >= 31) { + x[v ++] = acc & (uint32_t)0x7FFFFFFF; + acc_len -= 31; + acc = b >> (8 - acc_len); + } + } + if (acc_len != 0) { + x[v ++] = acc; + } + x[0] = br_i31_bit_length(x + 1, v - 1); +} diff --git a/third_party/bearssl/src/i31_decred.c b/third_party/bearssl/src/i31_decred.c new file mode 100644 index 0000000..43db662 --- /dev/null +++ b/third_party/bearssl/src/i31_decred.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_decode_reduce(uint32_t *x, + const void *src, size_t len, const uint32_t *m) +{ + uint32_t m_ebitlen, m_rbitlen; + size_t mblen, k; + const unsigned char *buf; + uint32_t acc; + int acc_len; + + /* + * Get the encoded bit length. + */ + m_ebitlen = m[0]; + + /* + * Special case for an invalid (null) modulus. + */ + if (m_ebitlen == 0) { + x[0] = 0; + return; + } + + /* + * Clear the destination. + */ + br_i31_zero(x, m_ebitlen); + + /* + * First decode directly as many bytes as possible. This requires + * computing the actual bit length. + */ + m_rbitlen = m_ebitlen >> 5; + m_rbitlen = (m_ebitlen & 31) + (m_rbitlen << 5) - m_rbitlen; + mblen = (m_rbitlen + 7) >> 3; + k = mblen - 1; + if (k >= len) { + br_i31_decode(x, src, len); + x[0] = m_ebitlen; + return; + } + buf = src; + br_i31_decode(x, buf, k); + x[0] = m_ebitlen; + + /* + * Input remaining bytes, using 31-bit words. + */ + acc = 0; + acc_len = 0; + while (k < len) { + uint32_t v; + + v = buf[k ++]; + if (acc_len >= 23) { + acc_len -= 23; + acc <<= (8 - acc_len); + acc |= v >> acc_len; + br_i31_muladd_small(x, acc, m); + acc = v & (0xFF >> (8 - acc_len)); + } else { + acc = (acc << 8) | v; + acc_len += 8; + } + } + + /* + * We may have some bits accumulated. We then perform a shift to + * be able to inject these bits as a full 31-bit word. + */ + if (acc_len != 0) { + acc = (acc | (x[1] << acc_len)) & 0x7FFFFFFF; + br_i31_rshift(x, 31 - acc_len); + br_i31_muladd_small(x, acc, m); + } +} diff --git a/third_party/bearssl/src/i31_encode.c b/third_party/bearssl/src/i31_encode.c new file mode 100644 index 0000000..b6b40c4 --- /dev/null +++ b/third_party/bearssl/src/i31_encode.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_encode(void *dst, size_t len, const uint32_t *x) +{ + unsigned char *buf; + size_t k, xlen; + uint32_t acc; + int acc_len; + + xlen = (x[0] + 31) >> 5; + if (xlen == 0) { + memset(dst, 0, len); + return; + } + buf = (unsigned char *)dst + len; + k = 1; + acc = 0; + acc_len = 0; + while (len != 0) { + uint32_t w; + + w = (k <= xlen) ? x[k] : 0; + k ++; + if (acc_len == 0) { + acc = w; + acc_len = 31; + } else { + uint32_t z; + + z = acc | (w << acc_len); + acc_len --; + acc = w >> (31 - acc_len); + if (len >= 4) { + buf -= 4; + len -= 4; + br_enc32be(buf, z); + } else { + switch (len) { + case 3: + buf[-3] = (unsigned char)(z >> 16); + /* fall through */ + case 2: + buf[-2] = (unsigned char)(z >> 8); + /* fall through */ + case 1: + buf[-1] = (unsigned char)z; + break; + } + return; + } + } + } +} diff --git a/third_party/bearssl/src/i31_fmont.c b/third_party/bearssl/src/i31_fmont.c new file mode 100644 index 0000000..c24b417 --- /dev/null +++ b/third_party/bearssl/src/i31_fmont.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i) +{ + size_t len, u, v; + + len = (m[0] + 31) >> 5; + for (u = 0; u < len; u ++) { + uint32_t f; + uint64_t cc; + + f = MUL31_lo(x[1], m0i); + cc = 0; + for (v = 0; v < len; v ++) { + uint64_t z; + + z = (uint64_t)x[v + 1] + MUL31(f, m[v + 1]) + cc; + cc = z >> 31; + if (v != 0) { + x[v] = (uint32_t)z & 0x7FFFFFFF; + } + } + x[len] = (uint32_t)cc; + } + + /* + * We may have to do an extra subtraction, but only if the + * value in x[] is indeed greater than or equal to that of m[], + * which is why we must do two calls (first call computes the + * carry, second call performs the subtraction only if the carry + * is 0). + */ + br_i31_sub(x, m, NOT(br_i31_sub(x, m, 0))); +} diff --git a/third_party/bearssl/src/i31_iszero.c b/third_party/bearssl/src/i31_iszero.c new file mode 100644 index 0000000..8a7ea44 --- /dev/null +++ b/third_party/bearssl/src/i31_iszero.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_iszero(const uint32_t *x) +{ + uint32_t z; + size_t u; + + z = 0; + for (u = (x[0] + 31) >> 5; u > 0; u --) { + z |= x[u]; + } + return ~(z | -z) >> 31; +} diff --git a/third_party/bearssl/src/i31_moddiv.c b/third_party/bearssl/src/i31_moddiv.c new file mode 100644 index 0000000..9950591 --- /dev/null +++ b/third_party/bearssl/src/i31_moddiv.c @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * In this file, we handle big integers with a custom format, i.e. + * without the usual one-word header. Value is split into 31-bit words, + * each stored in a 32-bit slot (top bit is zero) in little-endian + * order. The length (in words) is provided explicitly. In some cases, + * the value can be negative (using two's complement representation). In + * some cases, the top word is allowed to have a 32th bit. + */ + +/* + * Negate big integer conditionally. The value consists of 'len' words, + * with 31 bits in each word (the top bit of each word should be 0, + * except possibly for the last word). If 'ctl' is 1, the negation is + * computed; otherwise, if 'ctl' is 0, then the value is unchanged. + */ +static void +cond_negate(uint32_t *a, size_t len, uint32_t ctl) +{ + size_t k; + uint32_t cc, xm; + + cc = ctl; + xm = -ctl >> 1; + for (k = 0; k < len; k ++) { + uint32_t aw; + + aw = a[k]; + aw = (aw ^ xm) + cc; + a[k] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] may use 32 bits. + * + * Also, modulus m must be odd. + */ +static void +finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg) +{ + size_t k; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. + * Note that if the final word uses the top extra bit, then + * subtracting m must yield a value less than 2^31, since we + * assumed that a < 2*m. + */ + cc = 0; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = m[k]; + cc = (aw - mw - cc) >> 31; + } + + /* + * At this point: + * if neg = 1, then we must add m (regardless of cc) + * if neg = 0 and cc = 0, then we must subtract m + * if neg = 0 and cc = 1, then we must do nothing + */ + xm = -neg >> 1; + ym = -(neg | (1 - cc)); + cc = neg; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = (m[k] ^ xm) & ym; + aw = aw - mw - cc; + a[k] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^31) + * b <- (a*qa+b*qb)/(2^31) + * The division is assumed to be exact (i.e. the low word is dropped). + * If the final a is negative, then it is negated. Similarly for b. + * Returned value is the combination of two bits: + * bit 0: 1 if a had to be negated, 0 otherwise + * bit 1: 1 if b had to be negated, 0 otherwise + * + * Factors pa, pb, qa and qb must be at most 2^31 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 32th bit. + */ +static uint32_t +co_reduce(uint32_t *a, uint32_t *b, size_t len, + int64_t pa, int64_t pb, int64_t qa, int64_t qb) +{ + size_t k; + int64_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (k = 0; k < len; k ++) { + uint32_t wa, wb; + uint64_t za, zb; + uint64_t tta, ttb; + + /* + * Since: + * |pa| <= 2^31 + * |pb| <= 2^31 + * 0 <= wa <= 2^31 - 1 + * 0 <= wb <= 2^31 - 1 + * |cca| <= 2^32 - 1 + * Then: + * |za| <= (2^31-1)*(2^32) + (2^32-1) = 2^63 - 1 + * + * Thus, the new value of cca is such that |cca| <= 2^32 - 1. + * The same applies to ccb. + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint64_t)pa + wb * (uint64_t)pb + (uint64_t)cca; + zb = wa * (uint64_t)qa + wb * (uint64_t)qb + (uint64_t)ccb; + if (k > 0) { + a[k - 1] = za & 0x7FFFFFFF; + b[k - 1] = zb & 0x7FFFFFFF; + } + + /* + * For the new values of cca and ccb, we need a signed + * right-shift; since, in C, right-shifting a signed + * negative value is implementation-defined, we use a + * custom portable sign extension expression. + */ +#define M ((uint64_t)1 << 32) + tta = za >> 31; + ttb = zb >> 31; + tta = (tta ^ M) - M; + ttb = (ttb ^ M) - M; + cca = *(int64_t *)&tta; + ccb = *(int64_t *)&ttb; +#undef M + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + nega = (uint32_t)((uint64_t)cca >> 63); + negb = (uint32_t)((uint64_t)ccb >> 63); + cond_negate(a, len, nega); + cond_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^31) mod m + * b <- (a*qa+b*qb)/(2^31) mod m + * + * m0i is equal to -1/m[0] mod 2^31. + * + * Factors pa, pb, qa and qb must be at most 2^31 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 32th bit. + */ +static void +co_reduce_mod(uint32_t *a, uint32_t *b, size_t len, + int64_t pa, int64_t pb, int64_t qa, int64_t qb, + const uint32_t *m, uint32_t m0i) +{ + size_t k; + int64_t cca, ccb; + uint32_t fa, fb; + + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFFFFFF; + fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFFFFFF; + for (k = 0; k < len; k ++) { + uint32_t wa, wb; + uint64_t za, zb; + uint64_t tta, ttb; + + /* + * In this loop, carries 'cca' and 'ccb' always fit on + * 33 bits (in absolute value). + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint64_t)pa + wb * (uint64_t)pb + + m[k] * (uint64_t)fa + (uint64_t)cca; + zb = wa * (uint64_t)qa + wb * (uint64_t)qb + + m[k] * (uint64_t)fb + (uint64_t)ccb; + if (k > 0) { + a[k - 1] = (uint32_t)za & 0x7FFFFFFF; + b[k - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + +#define M ((uint64_t)1 << 32) + tta = za >> 31; + ttb = zb >> 31; + tta = (tta ^ M) - M; + ttb = (ttb ^ M) - M; + cca = *(int64_t *)&tta; + ccb = *(int64_t *)&ttb; +#undef M + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top word of 'a' and 'b' may have a 32-th bit set. + * We may have to add or subtract the modulus. + */ + finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63)); + finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63)); +} + +/* see inner.h */ +uint32_t +br_i31_moddiv(uint32_t *x, const uint32_t *y, const uint32_t *m, uint32_t m0i, + uint32_t *t) +{ + /* + * Algorithm is an extended binary GCD. We maintain four values + * a, b, u and v, with the following invariants: + * + * a * x = y * u mod m + * b * x = y * v mod m + * + * Starting values are: + * + * a = y + * b = m + * u = x + * v = 0 + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then a <- a/2 and u <- u/2 mod m. + * - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m. + * - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m. + * - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m. + * + * Algorithm stops when a = b. At that point, they both are equal + * to GCD(y,m); the modular division succeeds if that value is 1. + * The result of the modular division is then u (or v: both are + * equal at that point). + * + * Each step makes either a or b shrink by at least one bit; hence, + * if m has bit length k bits, then 2k-2 steps are sufficient. + * + * + * Though complexity is quadratic in the size of m, the bit-by-bit + * processing is not very efficient. We can speed up processing by + * remarking that the decisions are taken based only on observation + * of the top and low bits of a and b. + * + * In the loop below, at each iteration, we use the two top words + * of a and b, and the low words of a and b, to compute reduction + * parameters pa, pb, qa and qb such that the new values for a + * and b are: + * + * a' = (a*pa + b*pb) / (2^31) + * b' = (a*qa + b*qb) / (2^31) + * + * the division being exact. + * + * Since the choices are based on the top words, they may be slightly + * off, requiring an optional correction: if a' < 0, then we replace + * pa with -pa, and pb with -pb. The total length of a and b is + * thus reduced by at least 30 bits at each iteration. + * + * The stopping conditions are still the same, though: when a + * and b become equal, they must be both odd (since m is odd, + * the GCD cannot be even), therefore the next operation is a + * subtraction, and one of the values becomes 0. At that point, + * nothing else happens, i.e. one value is stuck at 0, and the + * other one is the GCD. + */ + size_t len, k; + uint32_t *a, *b, *u, *v; + uint32_t num, r; + + len = (m[0] + 31) >> 5; + a = t; + b = a + len; + u = x + 1; + v = b + len; + memcpy(a, y + 1, len * sizeof *y); + memcpy(b, m + 1, len * sizeof *m); + memset(v, 0, len * sizeof *v); + + /* + * Loop below ensures that a and b are reduced by some bits each, + * for a total of at least 30 bits. + */ + for (num = ((m[0] - (m[0] >> 5)) << 1) + 30; num >= 30; num -= 30) { + size_t j; + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint64_t a_hi, b_hi; + uint32_t a_lo, b_lo; + int64_t pa, pb, qa, qb; + int i; + + /* + * Extract top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we want + * (a[j] << 31) + a[j - 1], and (b[j] << 31) + b[j - 1]. + * If a and b are down to one word each, then we use a[0] + * and b[0]. + */ + c0 = (uint32_t)-1; + c1 = (uint32_t)-1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = ((uint64_t)a0 << 31) + a1; + b_hi = ((uint64_t)b0 << 31) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^31, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 31; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t r, oa, ob, cAB, cBA, cA; + uint64_t rz; + + /* + * r = GT(a_hi, b_hi) + * But the GT() function works on uint32_t operands, + * so we inline a 64-bit version here. + */ + rz = b_hi - a_hi; + r = (uint32_t)((rz ^ ((a_hi ^ b_hi) + & (a_hi ^ rz))) >> 63); + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a is divided by 2, 0 otherwise + * + * Rules: + * + * cAB and cBA cannot be both 1. + * if a is not divided by 2, b is. + */ + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & r; + cBA = oa & ob & NOT(r); + cA = cAB | NOT(oa); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -(uint64_t)cAB; + pa -= qa & -(int64_t)cAB; + pb -= qb & -(int64_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -(uint64_t)cBA; + qa -= pa & -(int64_t)cBA; + qb -= pb & -(int64_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int64_t)cA - 1); + pb += pb & ((int64_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA; + b_lo += b_lo & -cA; + qa += qa & -(int64_t)cA; + qb += qb & -(int64_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1); + } + + /* + * Replace a and b with new values a' and b'. + */ + r = co_reduce(a, b, len, pa, pb, qa, qb); + pa -= pa * ((r & 1) << 1); + pb -= pb * ((r & 1) << 1); + qa -= qa * (r & 2); + qb -= qb * (r & 2); + co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i); + } + + /* + * Now one of the arrays should be 0, and the other contains + * the GCD. If a is 0, then u is 0 as well, and v contains + * the division result. + * Result is correct if and only if GCD is 1. + */ + r = (a[0] | b[0]) ^ 1; + u[0] |= v[0]; + for (k = 1; k < len; k ++) { + r |= a[k] | b[k]; + u[k] |= v[k]; + } + return EQ0(r); +} diff --git a/third_party/bearssl/src/i31_modpow.c b/third_party/bearssl/src/i31_modpow.c new file mode 100644 index 0000000..4ef3f5d --- /dev/null +++ b/third_party/bearssl/src/i31_modpow.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_modpow(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2) +{ + size_t mlen; + uint32_t k; + + /* + * 'mlen' is the length of m[] expressed in bytes (including + * the "bit length" first field). + */ + mlen = ((m[0] + 63) >> 5) * sizeof m[0]; + + /* + * Throughout the algorithm: + * -- t1[] is in Montgomery representation; it contains x, x^2, + * x^4, x^8... + * -- The result is accumulated, in normal representation, in + * the x[] array. + * -- t2[] is used as destination buffer for each multiplication. + * + * Note that there is no need to call br_i32_from_monty(). + */ + memcpy(t1, x, mlen); + br_i31_to_monty(t1, m); + br_i31_zero(x, m[0]); + x[1] = 1; + for (k = 0; k < ((uint32_t)elen << 3); k ++) { + uint32_t ctl; + + ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1; + br_i31_montymul(t2, x, t1, m, m0i); + CCOPY(ctl, x, t2, mlen); + br_i31_montymul(t2, t1, t1, m, m0i); + memcpy(t1, t2, mlen); + } +} diff --git a/third_party/bearssl/src/i31_modpow2.c b/third_party/bearssl/src/i31_modpow2.c new file mode 100644 index 0000000..0b8f8cf --- /dev/null +++ b/third_party/bearssl/src/i31_modpow2.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_modpow_opt(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen) +{ + size_t mlen, mwlen; + uint32_t *t1, *t2, *base; + size_t u, v; + uint32_t acc; + int acc_len, win_len; + + /* + * Get modulus size. + */ + mwlen = (m[0] + 63) >> 5; + mlen = mwlen * sizeof m[0]; + mwlen += (mwlen & 1); + t1 = tmp; + t2 = tmp + mwlen; + + /* + * Compute possible window size, with a maximum of 5 bits. + * When the window has size 1 bit, we use a specific code + * that requires only two temporaries. Otherwise, for a + * window of k bits, we need 2^k+1 temporaries. + */ + if (twlen < (mwlen << 1)) { + return 0; + } + for (win_len = 5; win_len > 1; win_len --) { + if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) { + break; + } + } + + /* + * Everything is done in Montgomery representation. + */ + br_i31_to_monty(x, m); + + /* + * Compute window contents. If the window has size one bit only, + * then t2 is set to x; otherwise, t2[0] is left untouched, and + * t2[k] is set to x^k (for k >= 1). + */ + if (win_len == 1) { + memcpy(t2, x, mlen); + } else { + memcpy(t2 + mwlen, x, mlen); + base = t2 + mwlen; + for (u = 2; u < ((unsigned)1 << win_len); u ++) { + br_i31_montymul(base + mwlen, base, x, m, m0i); + base += mwlen; + } + } + + /* + * We need to set x to 1, in Montgomery representation. This can + * be done efficiently by setting the high word to 1, then doing + * one word-sized shift. + */ + br_i31_zero(x, m[0]); + x[(m[0] + 31) >> 5] = 1; + br_i31_muladd_small(x, 0, m); + + /* + * We process bits from most to least significant. At each + * loop iteration, we have acc_len bits in acc. + */ + acc = 0; + acc_len = 0; + while (acc_len > 0 || elen > 0) { + int i, k; + uint32_t bits; + + /* + * Get the next bits. + */ + k = win_len; + if (acc_len < win_len) { + if (elen > 0) { + acc = (acc << 8) | *e ++; + elen --; + acc_len += 8; + } else { + k = acc_len; + } + } + bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1); + acc_len -= k; + + /* + * We could get exactly k bits. Compute k squarings. + */ + for (i = 0; i < k; i ++) { + br_i31_montymul(t1, x, x, m, m0i); + memcpy(x, t1, mlen); + } + + /* + * Window lookup: we want to set t2 to the window + * lookup value, assuming the bits are non-zero. If + * the window length is 1 bit only, then t2 is + * already set; otherwise, we do a constant-time lookup. + */ + if (win_len > 1) { + br_i31_zero(t2, m[0]); + base = t2 + mwlen; + for (u = 1; u < ((uint32_t)1 << k); u ++) { + uint32_t mask; + + mask = -EQ(u, bits); + for (v = 1; v < mwlen; v ++) { + t2[v] |= mask & base[v]; + } + base += mwlen; + } + } + + /* + * Multiply with the looked-up value. We keep the + * product only if the exponent bits are not all-zero. + */ + br_i31_montymul(t1, x, t2, m, m0i); + CCOPY(NEQ(bits, 0), x, t1, mlen); + } + + /* + * Convert back from Montgomery representation, and exit. + */ + br_i31_from_monty(x, m, m0i); + return 1; +} diff --git a/third_party/bearssl/src/i31_montmul.c b/third_party/bearssl/src/i31_montmul.c new file mode 100644 index 0000000..758f8f4 --- /dev/null +++ b/third_party/bearssl/src/i31_montmul.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, + const uint32_t *m, uint32_t m0i) +{ + /* + * Each outer loop iteration computes: + * d <- (d + xu*y + f*m) / 2^31 + * We have xu <= 2^31-1 and f <= 2^31-1. + * Thus, if d <= 2*m-1 on input, then: + * 2*m-1 + 2*(2^31-1)*m <= (2^32)*m-1 + * and the new d value is less than 2*m. + * + * We represent d over 31-bit words, with an extra word 'dh' + * which can thus be only 0 or 1. + */ + size_t len, len4, u, v; + uint32_t dh; + + len = (m[0] + 31) >> 5; + len4 = len & ~(size_t)3; + br_i31_zero(d, m[0]); + dh = 0; + for (u = 0; u < len; u ++) { + /* + * The carry for each operation fits on 32 bits: + * d[v+1] <= 2^31-1 + * xu*y[v+1] <= (2^31-1)*(2^31-1) + * f*m[v+1] <= (2^31-1)*(2^31-1) + * r <= 2^32-1 + * (2^31-1) + 2*(2^31-1)*(2^31-1) + (2^32-1) = 2^63 - 2^31 + * After division by 2^31, the new r is then at most 2^32-1 + * + * Using a 32-bit carry has performance benefits on 32-bit + * systems; however, on 64-bit architectures, we prefer to + * keep the carry (r) in a 64-bit register, thus avoiding some + * "clear high bits" operations. + */ + uint32_t f, xu; +#if BR_64 + uint64_t r; +#else + uint32_t r; +#endif + + xu = x[u + 1]; + f = MUL31_lo((d[1] + MUL31_lo(x[u + 1], y[1])), m0i); + + r = 0; + for (v = 0; v < len4; v += 4) { + uint64_t z; + + z = (uint64_t)d[v + 1] + MUL31(xu, y[v + 1]) + + MUL31(f, m[v + 1]) + r; + r = z >> 31; + d[v + 0] = (uint32_t)z & 0x7FFFFFFF; + z = (uint64_t)d[v + 2] + MUL31(xu, y[v + 2]) + + MUL31(f, m[v + 2]) + r; + r = z >> 31; + d[v + 1] = (uint32_t)z & 0x7FFFFFFF; + z = (uint64_t)d[v + 3] + MUL31(xu, y[v + 3]) + + MUL31(f, m[v + 3]) + r; + r = z >> 31; + d[v + 2] = (uint32_t)z & 0x7FFFFFFF; + z = (uint64_t)d[v + 4] + MUL31(xu, y[v + 4]) + + MUL31(f, m[v + 4]) + r; + r = z >> 31; + d[v + 3] = (uint32_t)z & 0x7FFFFFFF; + } + for (; v < len; v ++) { + uint64_t z; + + z = (uint64_t)d[v + 1] + MUL31(xu, y[v + 1]) + + MUL31(f, m[v + 1]) + r; + r = z >> 31; + d[v] = (uint32_t)z & 0x7FFFFFFF; + } + + /* + * Since the new dh can only be 0 or 1, the addition of + * the old dh with the carry MUST fit on 32 bits, and + * thus can be done into dh itself. + */ + dh += r; + d[len] = dh & 0x7FFFFFFF; + dh >>= 31; + } + + /* + * We must write back the bit length because it was overwritten in + * the loop (not overwriting it would require a test in the loop, + * which would yield bigger and slower code). + */ + d[0] = m[0]; + + /* + * d[] may still be greater than m[] at that point; notably, the + * 'dh' word may be non-zero. + */ + br_i31_sub(d, m, NEQ(dh, 0) | NOT(br_i31_sub(d, m, 0))); +} diff --git a/third_party/bearssl/src/i31_mulacc.c b/third_party/bearssl/src/i31_mulacc.c new file mode 100644 index 0000000..7410e54 --- /dev/null +++ b/third_party/bearssl/src/i31_mulacc.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + size_t alen, blen, u; + uint32_t dl, dh; + + alen = (a[0] + 31) >> 5; + blen = (b[0] + 31) >> 5; + + /* + * We want to add the two bit lengths, but these are encoded, + * which requires some extra care. + */ + dl = (a[0] & 31) + (b[0] & 31); + dh = (a[0] >> 5) + (b[0] >> 5); + d[0] = (dh << 5) + dl + (~(uint32_t)(dl - 31) >> 31); + + for (u = 0; u < blen; u ++) { + uint32_t f; + size_t v; + + /* + * Carry always fits on 31 bits; we want to keep it in a + * 32-bit register on 32-bit architectures (on a 64-bit + * architecture, cast down from 64 to 32 bits means + * clearing the high bits, which is not free; on a 32-bit + * architecture, the same operation really means ignoring + * the top register, which has negative or zero cost). + */ +#if BR_64 + uint64_t cc; +#else + uint32_t cc; +#endif + + f = b[1 + u]; + cc = 0; + for (v = 0; v < alen; v ++) { + uint64_t z; + + z = (uint64_t)d[1 + u + v] + MUL31(f, a[1 + v]) + cc; + cc = z >> 31; + d[1 + u + v] = (uint32_t)z & 0x7FFFFFFF; + } + d[1 + u + alen] = (uint32_t)cc; + } +} diff --git a/third_party/bearssl/src/i31_muladd.c b/third_party/bearssl/src/i31_muladd.c new file mode 100644 index 0000000..eecd9e2 --- /dev/null +++ b/third_party/bearssl/src/i31_muladd.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) +{ + uint32_t m_bitlen; + unsigned mblr; + size_t u, mlen; + uint32_t a0, a1, b0, hi, g, q, tb; + uint32_t under, over; + uint32_t cc; + + /* + * We can test on the modulus bit length since we accept to + * leak that length. + */ + m_bitlen = m[0]; + if (m_bitlen == 0) { + return; + } + if (m_bitlen <= 31) { + uint32_t lo; + + hi = x[1] >> 1; + lo = (x[1] << 31) | z; + x[1] = br_rem(hi, lo, m[1]); + return; + } + mlen = (m_bitlen + 31) >> 5; + mblr = (unsigned)m_bitlen & 31; + + /* + * Principle: we estimate the quotient (x*2^31+z)/m by + * doing a 64/32 division with the high words. + * + * Let: + * w = 2^31 + * a = (w*a0 + a1) * w^N + a2 + * b = b0 * w^N + b2 + * such that: + * 0 <= a0 < w + * 0 <= a1 < w + * 0 <= a2 < w^N + * w/2 <= b0 < w + * 0 <= b2 < w^N + * a < w*b + * I.e. the two top words of a are a0:a1, the top word of b is + * b0, we ensured that b0 is "full" (high bit set), and a is + * such that the quotient q = a/b fits on one word (0 <= q < w). + * + * If a = b*q + r (with 0 <= r < q), we can estimate q by + * doing an Euclidean division on the top words: + * a0*w+a1 = b0*u + v (with 0 <= v < b0) + * Then the following holds: + * 0 <= u <= w + * u-2 <= q <= u + */ + hi = x[mlen]; + if (mblr == 0) { + a0 = x[mlen]; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a1 = x[mlen]; + b0 = m[mlen]; + } else { + a0 = ((x[mlen] << (31 - mblr)) | (x[mlen - 1] >> mblr)) + & 0x7FFFFFFF; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a1 = ((x[mlen] << (31 - mblr)) | (x[mlen - 1] >> mblr)) + & 0x7FFFFFFF; + b0 = ((m[mlen] << (31 - mblr)) | (m[mlen - 1] >> mblr)) + & 0x7FFFFFFF; + } + + /* + * We estimate a divisor q. If the quotient returned by br_div() + * is g: + * -- If a0 == b0 then g == 0; we want q = 0x7FFFFFFF. + * -- Otherwise: + * -- if g == 0 then we set q = 0; + * -- otherwise, we set q = g - 1. + * The properties described above then ensure that the true + * quotient is q-1, q or q+1. + * + * Take care that a0, a1 and b0 are 31-bit words, not 32-bit. We + * must adjust the parameters to br_div() accordingly. + */ + g = br_div(a0 >> 1, a1 | (a0 << 31), b0); + q = MUX(EQ(a0, b0), 0x7FFFFFFF, MUX(EQ(g, 0), 0, g - 1)); + + /* + * We subtract q*m from x (with the extra high word of value 'hi'). + * Since q may be off by 1 (in either direction), we may have to + * add or subtract m afterwards. + * + * The 'tb' flag will be true (1) at the end of the loop if the + * result is greater than or equal to the modulus (not counting + * 'hi' or the carry). + */ + cc = 0; + tb = 1; + for (u = 1; u <= mlen; u ++) { + uint32_t mw, zw, xw, nxw; + uint64_t zl; + + mw = m[u]; + zl = MUL31(mw, q) + cc; + cc = (uint32_t)(zl >> 31); + zw = (uint32_t)zl & (uint32_t)0x7FFFFFFF; + xw = x[u]; + nxw = xw - zw; + cc += nxw >> 31; + nxw &= 0x7FFFFFFF; + x[u] = nxw; + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); + } + + /* + * If we underestimated q, then either cc < hi (one extra bit + * beyond the top array word), or cc == hi and tb is true (no + * extra bit, but the result is not lower than the modulus). In + * these cases we must subtract m once. + * + * Otherwise, we may have overestimated, which will show as + * cc > hi (thus a negative result). Correction is adding m once. + */ + over = GT(cc, hi); + under = ~over & (tb | LT(cc, hi)); + br_i31_add(x, m, over); + br_i31_sub(x, m, under); +} diff --git a/third_party/bearssl/src/i31_ninv31.c b/third_party/bearssl/src/i31_ninv31.c new file mode 100644 index 0000000..dd83c96 --- /dev/null +++ b/third_party/bearssl/src/i31_ninv31.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_ninv31(uint32_t x) +{ + uint32_t y; + + y = 2 - x; + y *= 2 - y * x; + y *= 2 - y * x; + y *= 2 - y * x; + y *= 2 - y * x; + return MUX(x & 1, -y, 0) & 0x7FFFFFFF; +} diff --git a/third_party/bearssl/src/i31_reduce.c b/third_party/bearssl/src/i31_reduce.c new file mode 100644 index 0000000..5c9523e --- /dev/null +++ b/third_party/bearssl/src/i31_reduce.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m) +{ + uint32_t m_bitlen, a_bitlen; + size_t mlen, alen, u; + + m_bitlen = m[0]; + mlen = (m_bitlen + 31) >> 5; + + x[0] = m_bitlen; + if (m_bitlen == 0) { + return; + } + + /* + * If the source is shorter, then simply copy all words from a[] + * and zero out the upper words. + */ + a_bitlen = a[0]; + alen = (a_bitlen + 31) >> 5; + if (a_bitlen < m_bitlen) { + memcpy(x + 1, a + 1, alen * sizeof *a); + for (u = alen; u < mlen; u ++) { + x[u + 1] = 0; + } + return; + } + + /* + * The source length is at least equal to that of the modulus. + * We must thus copy N-1 words, and input the remaining words + * one by one. + */ + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); + x[mlen] = 0; + for (u = 1 + alen - mlen; u > 0; u --) { + br_i31_muladd_small(x, a[u], m); + } +} diff --git a/third_party/bearssl/src/i31_rshift.c b/third_party/bearssl/src/i31_rshift.c new file mode 100644 index 0000000..db6ba0b --- /dev/null +++ b/third_party/bearssl/src/i31_rshift.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_rshift(uint32_t *x, int count) +{ + size_t u, len; + uint32_t r; + + len = (x[0] + 31) >> 5; + if (len == 0) { + return; + } + r = x[1] >> count; + for (u = 2; u <= len; u ++) { + uint32_t w; + + w = x[u]; + x[u - 1] = ((w << (31 - count)) | r) & 0x7FFFFFFF; + r = w >> count; + } + x[len] = r; +} diff --git a/third_party/bearssl/src/i31_sub.c b/third_party/bearssl/src/i31_sub.c new file mode 100644 index 0000000..3910895 --- /dev/null +++ b/third_party/bearssl/src/i31_sub.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 63) >> 5; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw - bw - cc; + cc = naw >> 31; + a[u] = MUX(ctl, naw & 0x7FFFFFFF, aw); + } + return cc; +} diff --git a/third_party/bearssl/src/i31_tmont.c b/third_party/bearssl/src/i31_tmont.c new file mode 100644 index 0000000..4798ff6 --- /dev/null +++ b/third_party/bearssl/src/i31_tmont.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i31_to_monty(uint32_t *x, const uint32_t *m) +{ + uint32_t k; + + for (k = (m[0] + 31) >> 5; k > 0; k --) { + br_i31_muladd_small(x, 0, m); + } +} diff --git a/third_party/bearssl/src/i32_add.c b/third_party/bearssl/src/i32_add.c new file mode 100644 index 0000000..620baff --- /dev/null +++ b/third_party/bearssl/src/i32_add.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 63) >> 5; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw + bw + cc; + + /* + * Carry is 1 if naw < aw. Carry is also 1 if naw == aw + * AND the carry was already 1. + */ + cc = (cc & EQ(naw, aw)) | LT(naw, aw); + a[u] = MUX(ctl, naw, aw); + } + return cc; +} diff --git a/third_party/bearssl/src/i32_bitlen.c b/third_party/bearssl/src/i32_bitlen.c new file mode 100644 index 0000000..40ce9fa --- /dev/null +++ b/third_party/bearssl/src/i32_bitlen.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i32_bit_length(uint32_t *x, size_t xlen) +{ + uint32_t tw, twk; + + tw = 0; + twk = 0; + while (xlen -- > 0) { + uint32_t w, c; + + c = EQ(tw, 0); + w = x[xlen]; + tw = MUX(c, w, tw); + twk = MUX(c, (uint32_t)xlen, twk); + } + return (twk << 5) + BIT_LENGTH(tw); +} diff --git a/third_party/bearssl/src/i32_decmod.c b/third_party/bearssl/src/i32_decmod.c new file mode 100644 index 0000000..a859af1 --- /dev/null +++ b/third_party/bearssl/src/i32_decmod.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i32_decode_mod(uint32_t *x, const void *src, size_t len, const uint32_t *m) +{ + const unsigned char *buf; + uint32_t r; + size_t u, v, mlen; + + buf = src; + + /* + * First pass: determine whether the value fits. The 'r' value + * will contain the comparison result, as 0x00000000 (value is + * equal to the modulus), 0x00000001 (value is greater than the + * modulus), or 0xFFFFFFFF (value is lower than the modulus). + */ + mlen = (m[0] + 7) >> 3; + r = 0; + for (u = (mlen > len) ? mlen : len; u > 0; u --) { + uint32_t mb, xb; + + v = u - 1; + if (v >= mlen) { + mb = 0; + } else { + mb = (m[1 + (v >> 2)] >> ((v & 3) << 3)) & 0xFF; + } + if (v >= len) { + xb = 0; + } else { + xb = buf[len - u]; + } + r = MUX(EQ(r, 0), (uint32_t)CMP(xb, mb), r); + } + + /* + * Only r == 0xFFFFFFFF is acceptable. We want to set r to 0xFF if + * the value fits, 0x00 otherwise. + */ + r >>= 24; + br_i32_zero(x, m[0]); + u = (mlen > len) ? len : mlen; + while (u > 0) { + uint32_t xb; + + xb = buf[len - u] & r; + u --; + x[1 + (u >> 2)] |= xb << ((u & 3) << 3); + } + return r >> 7; +} diff --git a/third_party/bearssl/src/i32_decode.c b/third_party/bearssl/src/i32_decode.c new file mode 100644 index 0000000..f289038 --- /dev/null +++ b/third_party/bearssl/src/i32_decode.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_decode(uint32_t *x, const void *src, size_t len) +{ + const unsigned char *buf; + size_t u, v; + + buf = src; + u = len; + v = 1; + for (;;) { + if (u < 4) { + uint32_t w; + + if (u < 2) { + if (u == 0) { + break; + } else { + w = buf[0]; + } + } else { + if (u == 2) { + w = br_dec16be(buf); + } else { + w = ((uint32_t)buf[0] << 16) + | br_dec16be(buf + 1); + } + } + x[v ++] = w; + break; + } else { + u -= 4; + x[v ++] = br_dec32be(buf + u); + } + } + x[0] = br_i32_bit_length(x + 1, v - 1); +} diff --git a/third_party/bearssl/src/i32_decred.c b/third_party/bearssl/src/i32_decred.c new file mode 100644 index 0000000..dc476db --- /dev/null +++ b/third_party/bearssl/src/i32_decred.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_decode_reduce(uint32_t *x, + const void *src, size_t len, const uint32_t *m) +{ + uint32_t m_bitlen; + size_t mblen, k, q; + const unsigned char *buf; + + m_bitlen = m[0]; + + /* + * Special case for an invalid modulus. + */ + if (m_bitlen == 0) { + x[0] = 0; + return; + } + + /* + * Clear the destination. + */ + br_i32_zero(x, m_bitlen); + + /* + * First decode directly as many bytes as possible without + * reduction, taking care to leave a number of bytes which + * is a multiple of 4. + */ + mblen = (m_bitlen + 7) >> 3; + k = mblen - 1; + + /* + * Up to k bytes can be safely decoded. + */ + if (k >= len) { + br_i32_decode(x, src, len); + x[0] = m_bitlen; + return; + } + + /* + * We want to first inject some bytes with direct decoding, + * then extra bytes by whole 32-bit words. First compute + * the size that should be injected that way. + */ + buf = src; + q = (len - k + 3) & ~(size_t)3; + + /* + * It may happen that this is more than what we already have + * (by at most 3 bytes). Such a case may happen only with + * a very short modulus. In that case, we must process the first + * bytes "manually". + */ + if (q > len) { + int i; + uint32_t w; + + w = 0; + for (i = 0; i < 4; i ++) { + w <<= 8; + if (q <= len) { + w |= buf[len - q]; + } + q --; + } + br_i32_muladd_small(x, w, m); + } else { + br_i32_decode(x, buf, len - q); + x[0] = m_bitlen; + } + + /* + * At that point, we have exactly q bytes to inject, and q is + * a multiple of 4. + */ + for (k = len - q; k < len; k += 4) { + br_i32_muladd_small(x, br_dec32be(buf + k), m); + } +} diff --git a/third_party/bearssl/src/i32_div32.c b/third_party/bearssl/src/i32_div32.c new file mode 100644 index 0000000..d8b8023 --- /dev/null +++ b/third_party/bearssl/src/i32_div32.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r) +{ + /* TODO: optimize this */ + uint32_t q; + uint32_t ch, cf; + int k; + + q = 0; + ch = EQ(hi, d); + hi = MUX(ch, 0, hi); + for (k = 31; k > 0; k --) { + int j; + uint32_t w, ctl, hi2, lo2; + + j = 32 - k; + w = (hi << j) | (lo >> k); + ctl = GE(w, d) | (hi >> k); + hi2 = (w - d) >> j; + lo2 = lo - (d << k); + hi = MUX(ctl, hi2, hi); + lo = MUX(ctl, lo2, lo); + q |= ctl << k; + } + cf = GE(lo, d) | hi; + q |= cf; + *r = MUX(cf, lo - d, lo); + return q; +} diff --git a/third_party/bearssl/src/i32_encode.c b/third_party/bearssl/src/i32_encode.c new file mode 100644 index 0000000..303652f --- /dev/null +++ b/third_party/bearssl/src/i32_encode.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_encode(void *dst, size_t len, const uint32_t *x) +{ + unsigned char *buf; + size_t k; + + buf = dst; + + /* + * Compute the announced size of x in bytes; extra bytes are + * filled with zeros. + */ + k = (x[0] + 7) >> 3; + while (len > k) { + *buf ++ = 0; + len --; + } + + /* + * Now we use k as index within x[]. That index starts at 1; + * we initialize it to the topmost complete word, and process + * any remaining incomplete word. + */ + k = (len + 3) >> 2; + switch (len & 3) { + case 3: + *buf ++ = x[k] >> 16; + /* fall through */ + case 2: + *buf ++ = x[k] >> 8; + /* fall through */ + case 1: + *buf ++ = x[k]; + k --; + } + + /* + * Encode all complete words. + */ + while (k > 0) { + br_enc32be(buf, x[k]); + k --; + buf += 4; + } +} diff --git a/third_party/bearssl/src/i32_fmont.c b/third_party/bearssl/src/i32_fmont.c new file mode 100644 index 0000000..dc1c934 --- /dev/null +++ b/third_party/bearssl/src/i32_fmont.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i) +{ + size_t len, u, v; + + len = (m[0] + 31) >> 5; + for (u = 0; u < len; u ++) { + uint32_t f; + uint64_t cc; + + f = x[1] * m0i; + cc = 0; + for (v = 0; v < len; v ++) { + uint64_t z; + + z = (uint64_t)x[v + 1] + MUL(f, m[v + 1]) + cc; + cc = z >> 32; + if (v != 0) { + x[v] = (uint32_t)z; + } + } + x[len] = (uint32_t)cc; + } + + /* + * We may have to do an extra subtraction, but only if the + * value in x[] is indeed greater than or equal to that of m[], + * which is why we must do two calls (first call computes the + * carry, second call performs the subtraction only if the carry + * is 0). + */ + br_i32_sub(x, m, NOT(br_i32_sub(x, m, 0))); +} diff --git a/third_party/bearssl/src/i32_iszero.c b/third_party/bearssl/src/i32_iszero.c new file mode 100644 index 0000000..659df7f --- /dev/null +++ b/third_party/bearssl/src/i32_iszero.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i32_iszero(const uint32_t *x) +{ + uint32_t z; + size_t u; + + z = 0; + for (u = (x[0] + 31) >> 5; u > 0; u --) { + z |= x[u]; + } + return ~(z | -z) >> 31; +} diff --git a/third_party/bearssl/src/i32_modpow.c b/third_party/bearssl/src/i32_modpow.c new file mode 100644 index 0000000..034aba0 --- /dev/null +++ b/third_party/bearssl/src/i32_modpow.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_modpow(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2) +{ + size_t mlen; + uint32_t k; + + /* + * 'mlen' is the length of m[] expressed in bytes (including + * the "bit length" first field). + */ + mlen = ((m[0] + 63) >> 5) * sizeof m[0]; + + /* + * Throughout the algorithm: + * -- t1[] is in Montgomery representation; it contains x, x^2, + * x^4, x^8... + * -- The result is accumulated, in normal representation, in + * the x[] array. + * -- t2[] is used as destination buffer for each multiplication. + * + * Note that there is no need to call br_i32_from_monty(). + */ + memcpy(t1, x, mlen); + br_i32_to_monty(t1, m); + br_i32_zero(x, m[0]); + x[1] = 1; + for (k = 0; k < ((uint32_t)elen << 3); k ++) { + uint32_t ctl; + + ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1; + br_i32_montymul(t2, x, t1, m, m0i); + CCOPY(ctl, x, t2, mlen); + br_i32_montymul(t2, t1, t1, m, m0i); + memcpy(t1, t2, mlen); + } +} diff --git a/third_party/bearssl/src/i32_montmul.c b/third_party/bearssl/src/i32_montmul.c new file mode 100644 index 0000000..7edb376 --- /dev/null +++ b/third_party/bearssl/src/i32_montmul.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, + const uint32_t *m, uint32_t m0i) +{ + size_t len, u, v; + uint64_t dh; + + len = (m[0] + 31) >> 5; + br_i32_zero(d, m[0]); + dh = 0; + for (u = 0; u < len; u ++) { + uint32_t f, xu; + uint64_t r1, r2, zh; + + xu = x[u + 1]; + f = (d[1] + x[u + 1] * y[1]) * m0i; + r1 = 0; + r2 = 0; + for (v = 0; v < len; v ++) { + uint64_t z; + uint32_t t; + + z = (uint64_t)d[v + 1] + MUL(xu, y[v + 1]) + r1; + r1 = z >> 32; + t = (uint32_t)z; + z = (uint64_t)t + MUL(f, m[v + 1]) + r2; + r2 = z >> 32; + if (v != 0) { + d[v] = (uint32_t)z; + } + } + zh = dh + r1 + r2; + d[len] = (uint32_t)zh; + dh = zh >> 32; + } + + /* + * d[] may still be greater than m[] at that point; notably, the + * 'dh' word may be non-zero. + */ + br_i32_sub(d, m, NEQ(dh, 0) | NOT(br_i32_sub(d, m, 0))); +} diff --git a/third_party/bearssl/src/i32_mulacc.c b/third_party/bearssl/src/i32_mulacc.c new file mode 100644 index 0000000..55da385 --- /dev/null +++ b/third_party/bearssl/src/i32_mulacc.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b) +{ + size_t alen, blen, u; + + alen = (a[0] + 31) >> 5; + blen = (b[0] + 31) >> 5; + d[0] = a[0] + b[0]; + for (u = 0; u < blen; u ++) { + uint32_t f; + size_t v; +#if BR_64 + uint64_t cc; +#else + uint32_t cc; +#endif + + f = b[1 + u]; + cc = 0; + for (v = 0; v < alen; v ++) { + uint64_t z; + + z = (uint64_t)d[1 + u + v] + MUL(f, a[1 + v]) + cc; + cc = z >> 32; + d[1 + u + v] = (uint32_t)z; + } + d[1 + u + alen] = (uint32_t)cc; + } +} diff --git a/third_party/bearssl/src/i32_muladd.c b/third_party/bearssl/src/i32_muladd.c new file mode 100644 index 0000000..dd526ad --- /dev/null +++ b/third_party/bearssl/src/i32_muladd.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) +{ + uint32_t m_bitlen; + size_t u, mlen; + uint32_t a0, a1, b0, hi, g, q, tb; + uint32_t chf, clow, under, over; + uint64_t cc; + + /* + * We can test on the modulus bit length since we accept to + * leak that length. + */ + m_bitlen = m[0]; + if (m_bitlen == 0) { + return; + } + if (m_bitlen <= 32) { + x[1] = br_rem(x[1], z, m[1]); + return; + } + mlen = (m_bitlen + 31) >> 5; + + /* + * Principle: we estimate the quotient (x*2^32+z)/m by + * doing a 64/32 division with the high words. + * + * Let: + * w = 2^32 + * a = (w*a0 + a1) * w^N + a2 + * b = b0 * w^N + b2 + * such that: + * 0 <= a0 < w + * 0 <= a1 < w + * 0 <= a2 < w^N + * w/2 <= b0 < w + * 0 <= b2 < w^N + * a < w*b + * I.e. the two top words of a are a0:a1, the top word of b is + * b0, we ensured that b0 is "full" (high bit set), and a is + * such that the quotient q = a/b fits on one word (0 <= q < w). + * + * If a = b*q + r (with 0 <= r < q), we can estimate q by + * doing an Euclidean division on the top words: + * a0*w+a1 = b0*u + v (with 0 <= v < w) + * Then the following holds: + * 0 <= u <= w + * u-2 <= q <= u + */ + a0 = br_i32_word(x, m_bitlen - 32); + hi = x[mlen]; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a1 = br_i32_word(x, m_bitlen - 32); + b0 = br_i32_word(m, m_bitlen - 32); + + /* + * We estimate a divisor q. If the quotient returned by br_div() + * is g: + * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. + * -- Otherwise: + * -- if g == 0 then we set q = 0; + * -- otherwise, we set q = g - 1. + * The properties described above then ensure that the true + * quotient is q-1, q or q+1. + */ + g = br_div(a0, a1, b0); + q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); + + /* + * We subtract q*m from x (with the extra high word of value 'hi'). + * Since q may be off by 1 (in either direction), we may have to + * add or subtract m afterwards. + * + * The 'tb' flag will be true (1) at the end of the loop if the + * result is greater than or equal to the modulus (not counting + * 'hi' or the carry). + */ + cc = 0; + tb = 1; + for (u = 1; u <= mlen; u ++) { + uint32_t mw, zw, xw, nxw; + uint64_t zl; + + mw = m[u]; + zl = MUL(mw, q) + cc; + cc = (uint32_t)(zl >> 32); + zw = (uint32_t)zl; + xw = x[u]; + nxw = xw - zw; + cc += (uint64_t)GT(nxw, xw); + x[u] = nxw; + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); + } + + /* + * If we underestimated q, then either cc < hi (one extra bit + * beyond the top array word), or cc == hi and tb is true (no + * extra bit, but the result is not lower than the modulus). In + * these cases we must subtract m once. + * + * Otherwise, we may have overestimated, which will show as + * cc > hi (thus a negative result). Correction is adding m once. + */ + chf = (uint32_t)(cc >> 32); + clow = (uint32_t)cc; + over = chf | GT(clow, hi); + under = ~over & (tb | (~chf & LT(clow, hi))); + br_i32_add(x, m, over); + br_i32_sub(x, m, under); +} diff --git a/third_party/bearssl/src/i32_ninv32.c b/third_party/bearssl/src/i32_ninv32.c new file mode 100644 index 0000000..6564434 --- /dev/null +++ b/third_party/bearssl/src/i32_ninv32.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i32_ninv32(uint32_t x) +{ + uint32_t y; + + y = 2 - x; + y *= 2 - y * x; + y *= 2 - y * x; + y *= 2 - y * x; + y *= 2 - y * x; + return MUX(x & 1, -y, 0); +} diff --git a/third_party/bearssl/src/i32_reduce.c b/third_party/bearssl/src/i32_reduce.c new file mode 100644 index 0000000..90fff09 --- /dev/null +++ b/third_party/bearssl/src/i32_reduce.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m) +{ + uint32_t m_bitlen, a_bitlen; + size_t mlen, alen, u; + + m_bitlen = m[0]; + mlen = (m_bitlen + 31) >> 5; + + x[0] = m_bitlen; + if (m_bitlen == 0) { + return; + } + + /* + * If the source is shorter, then simply copy all words from a[] + * and zero out the upper words. + */ + a_bitlen = a[0]; + alen = (a_bitlen + 31) >> 5; + if (a_bitlen < m_bitlen) { + memcpy(x + 1, a + 1, alen * sizeof *a); + for (u = alen; u < mlen; u ++) { + x[u + 1] = 0; + } + return; + } + + /* + * The source length is at least equal to that of the modulus. + * We must thus copy N-1 words, and input the remaining words + * one by one. + */ + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); + x[mlen] = 0; + for (u = 1 + alen - mlen; u > 0; u --) { + br_i32_muladd_small(x, a[u], m); + } +} diff --git a/third_party/bearssl/src/i32_sub.c b/third_party/bearssl/src/i32_sub.c new file mode 100644 index 0000000..9c50023 --- /dev/null +++ b/third_party/bearssl/src/i32_sub.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 63) >> 5; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw - bw - cc; + + /* + * Carry is 1 if naw > aw. Carry is 1 also if naw == aw + * AND the carry was already 1. + */ + cc = (cc & EQ(naw, aw)) | GT(naw, aw); + a[u] = MUX(ctl, naw, aw); + } + return cc; +} diff --git a/third_party/bearssl/src/i32_tmont.c b/third_party/bearssl/src/i32_tmont.c new file mode 100644 index 0000000..058cd88 --- /dev/null +++ b/third_party/bearssl/src/i32_tmont.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_to_monty(uint32_t *x, const uint32_t *m) +{ + uint32_t k; + + for (k = (m[0] + 31) >> 5; k > 0; k --) { + br_i32_muladd_small(x, 0, m); + } +} diff --git a/third_party/bearssl/src/i62_modpow2.c b/third_party/bearssl/src/i62_modpow2.c new file mode 100644 index 0000000..2db537f --- /dev/null +++ b/third_party/bearssl/src/i62_modpow2.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#if BR_INT128 + +/* + * Compute x*y+v1+v2. Operands are 64-bit, and result is 128-bit, with + * high word in "hi" and low word in "lo". + */ +#define FMA1(hi, lo, x, y, v1, v2) do { \ + unsigned __int128 fmaz; \ + fmaz = (unsigned __int128)(x) * (unsigned __int128)(y) \ + + (unsigned __int128)(v1) + (unsigned __int128)(v2); \ + (hi) = (uint64_t)(fmaz >> 64); \ + (lo) = (uint64_t)fmaz; \ + } while (0) + +/* + * Compute x1*y1+x2*y2+v1+v2. Operands are 64-bit, and result is 128-bit, + * with high word in "hi" and low word in "lo". + * + * Callers should ensure that the two inner products, and the v1 and v2 + * operands, are multiple of 4 (this is not used by this specific definition + * but may help other implementations). + */ +#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2) do { \ + unsigned __int128 fmaz; \ + fmaz = (unsigned __int128)(x1) * (unsigned __int128)(y1) \ + + (unsigned __int128)(x2) * (unsigned __int128)(y2) \ + + (unsigned __int128)(v1) + (unsigned __int128)(v2); \ + (hi) = (uint64_t)(fmaz >> 64); \ + (lo) = (uint64_t)fmaz; \ + } while (0) + +#elif BR_UMUL128 + +#include <intrin.h> + +#define FMA1(hi, lo, x, y, v1, v2) do { \ + uint64_t fmahi, fmalo; \ + unsigned char fmacc; \ + fmalo = _umul128((x), (y), &fmahi); \ + fmacc = _addcarry_u64(0, fmalo, (v1), &fmalo); \ + _addcarry_u64(fmacc, fmahi, 0, &fmahi); \ + fmacc = _addcarry_u64(0, fmalo, (v2), &(lo)); \ + _addcarry_u64(fmacc, fmahi, 0, &(hi)); \ + } while (0) + +/* + * Normally we should use _addcarry_u64() for FMA2 too, but it makes + * Visual Studio crash. Instead we use this version, which leverages + * the fact that the vx operands, and the products, are multiple of 4. + * This is unfortunately slower. + */ +#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2) do { \ + uint64_t fma1hi, fma1lo; \ + uint64_t fma2hi, fma2lo; \ + uint64_t fmatt; \ + fma1lo = _umul128((x1), (y1), &fma1hi); \ + fma2lo = _umul128((x2), (y2), &fma2hi); \ + fmatt = (fma1lo >> 2) + (fma2lo >> 2) \ + + ((v1) >> 2) + ((v2) >> 2); \ + (lo) = fmatt << 2; \ + (hi) = fma1hi + fma2hi + (fmatt >> 62); \ + } while (0) + +/* + * The FMA2 macro definition we would prefer to use, but it triggers + * an internal compiler error in Visual Studio 2015. + * +#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2) do { \ + uint64_t fma1hi, fma1lo; \ + uint64_t fma2hi, fma2lo; \ + unsigned char fmacc; \ + fma1lo = _umul128((x1), (y1), &fma1hi); \ + fma2lo = _umul128((x2), (y2), &fma2hi); \ + fmacc = _addcarry_u64(0, fma1lo, (v1), &fma1lo); \ + _addcarry_u64(fmacc, fma1hi, 0, &fma1hi); \ + fmacc = _addcarry_u64(0, fma2lo, (v2), &fma2lo); \ + _addcarry_u64(fmacc, fma2hi, 0, &fma2hi); \ + fmacc = _addcarry_u64(0, fma1lo, fma2lo, &(lo)); \ + _addcarry_u64(fmacc, fma1hi, fma2hi, &(hi)); \ + } while (0) + */ + +#endif + +#define MASK62 ((uint64_t)0x3FFFFFFFFFFFFFFF) +#define MUL62_lo(x, y) (((uint64_t)(x) * (uint64_t)(y)) & MASK62) + +/* + * Subtract b from a, and return the final carry. If 'ctl32' is 0, then + * a[] is kept unmodified, but the final carry is still computed and + * returned. + */ +static uint32_t +i62_sub(uint64_t *a, const uint64_t *b, size_t num, uint32_t ctl32) +{ + uint64_t cc, mask; + size_t u; + + cc = 0; + ctl32 = -ctl32; + mask = (uint64_t)ctl32 | ((uint64_t)ctl32 << 32); + for (u = 0; u < num; u ++) { + uint64_t aw, bw, dw; + + aw = a[u]; + bw = b[u]; + dw = aw - bw - cc; + cc = dw >> 63; + dw &= MASK62; + a[u] = aw ^ (mask & (dw ^ aw)); + } + return (uint32_t)cc; +} + +/* + * Montgomery multiplication, over arrays of 62-bit values. The + * destination array (d) must be distinct from the other operands + * (x, y and m). All arrays are in little-endian format (least + * significant word comes first) over 'num' words. + */ +static void +montymul(uint64_t *d, const uint64_t *x, const uint64_t *y, + const uint64_t *m, size_t num, uint64_t m0i) +{ + uint64_t dh; + size_t u, num4; + + num4 = 1 + ((num - 1) & ~(size_t)3); + memset(d, 0, num * sizeof *d); + dh = 0; + for (u = 0; u < num; u ++) { + size_t v; + uint64_t f, xu; + uint64_t r, zh; + uint64_t hi, lo; + + xu = x[u] << 2; + f = MUL62_lo(d[0] + MUL62_lo(x[u], y[0]), m0i) << 2; + + FMA2(hi, lo, xu, y[0], f, m[0], d[0] << 2, 0); + r = hi; + + for (v = 1; v < num4; v += 4) { + FMA2(hi, lo, xu, y[v + 0], + f, m[v + 0], d[v + 0] << 2, r << 2); + r = hi + (r >> 62); + d[v - 1] = lo >> 2; + FMA2(hi, lo, xu, y[v + 1], + f, m[v + 1], d[v + 1] << 2, r << 2); + r = hi + (r >> 62); + d[v + 0] = lo >> 2; + FMA2(hi, lo, xu, y[v + 2], + f, m[v + 2], d[v + 2] << 2, r << 2); + r = hi + (r >> 62); + d[v + 1] = lo >> 2; + FMA2(hi, lo, xu, y[v + 3], + f, m[v + 3], d[v + 3] << 2, r << 2); + r = hi + (r >> 62); + d[v + 2] = lo >> 2; + } + for (; v < num; v ++) { + FMA2(hi, lo, xu, y[v], f, m[v], d[v] << 2, r << 2); + r = hi + (r >> 62); + d[v - 1] = lo >> 2; + } + + zh = dh + r; + d[num - 1] = zh & MASK62; + dh = zh >> 62; + } + i62_sub(d, m, num, (uint32_t)dh | NOT(i62_sub(d, m, num, 0))); +} + +/* + * Conversion back from Montgomery representation. + */ +static void +frommonty(uint64_t *x, const uint64_t *m, size_t num, uint64_t m0i) +{ + size_t u, v; + + for (u = 0; u < num; u ++) { + uint64_t f, cc; + + f = MUL62_lo(x[0], m0i) << 2; + cc = 0; + for (v = 0; v < num; v ++) { + uint64_t hi, lo; + + FMA1(hi, lo, f, m[v], x[v] << 2, cc); + cc = hi << 2; + if (v != 0) { + x[v - 1] = lo >> 2; + } + } + x[num - 1] = cc >> 2; + } + i62_sub(x, m, num, NOT(i62_sub(x, m, num, 0))); +} + +/* see inner.h */ +uint32_t +br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen, + const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen) +{ + size_t u, mw31num, mw62num; + uint64_t *x, *m, *t1, *t2; + uint64_t m0i; + uint32_t acc; + int win_len, acc_len; + + /* + * Get modulus size, in words. + */ + mw31num = (m31[0] + 31) >> 5; + mw62num = (mw31num + 1) >> 1; + + /* + * In order to apply this function, we must have enough room to + * copy the operand and modulus into the temporary array, along + * with at least two temporaries. If there is not enough room, + * switch to br_i31_modpow(). We also use br_i31_modpow() if the + * modulus length is not at least four words (94 bits or more). + */ + if (mw31num < 4 || (mw62num << 2) > twlen) { + /* + * We assume here that we can split an aligned uint64_t + * into two properly aligned uint32_t. Since both types + * are supposed to have an exact width with no padding, + * then this property must hold. + */ + size_t txlen; + + txlen = mw31num + 1; + if (twlen < txlen) { + return 0; + } + br_i31_modpow(x31, e, elen, m31, m0i31, + (uint32_t *)tmp, (uint32_t *)tmp + txlen); + return 1; + } + + /* + * Convert x to Montgomery representation: this means that + * we replace x with x*2^z mod m, where z is the smallest multiple + * of the word size such that 2^z >= m. We want to reuse the 31-bit + * functions here (for constant-time operation), but we need z + * for a 62-bit word size. + */ + for (u = 0; u < mw62num; u ++) { + br_i31_muladd_small(x31, 0, m31); + br_i31_muladd_small(x31, 0, m31); + } + + /* + * Assemble operands into arrays of 62-bit words. Note that + * all the arrays of 62-bit words that we will handle here + * are without any leading size word. + * + * We also adjust tmp and twlen to account for the words used + * for these extra arrays. + */ + m = tmp; + x = tmp + mw62num; + tmp += (mw62num << 1); + twlen -= (mw62num << 1); + for (u = 0; u < mw31num; u += 2) { + size_t v; + + v = u >> 1; + if ((u + 1) == mw31num) { + m[v] = (uint64_t)m31[u + 1]; + x[v] = (uint64_t)x31[u + 1]; + } else { + m[v] = (uint64_t)m31[u + 1] + + ((uint64_t)m31[u + 2] << 31); + x[v] = (uint64_t)x31[u + 1] + + ((uint64_t)x31[u + 2] << 31); + } + } + + /* + * Compute window size. We support windows up to 5 bits; for a + * window of size k bits, we need 2^k+1 temporaries (for k = 1, + * we use special code that uses only 2 temporaries). + */ + for (win_len = 5; win_len > 1; win_len --) { + if ((((uint32_t)1 << win_len) + 1) * mw62num <= twlen) { + break; + } + } + + t1 = tmp; + t2 = tmp + mw62num; + + /* + * Compute m0i, which is equal to -(1/m0) mod 2^62. We were + * provided with m0i31, which already fulfills this property + * modulo 2^31; the single expression below is then sufficient. + */ + m0i = (uint64_t)m0i31; + m0i = MUL62_lo(m0i, (uint64_t)2 + MUL62_lo(m0i, m[0])); + + /* + * Compute window contents. If the window has size one bit only, + * then t2 is set to x; otherwise, t2[0] is left untouched, and + * t2[k] is set to x^k (for k >= 1). + */ + if (win_len == 1) { + memcpy(t2, x, mw62num * sizeof *x); + } else { + uint64_t *base; + + memcpy(t2 + mw62num, x, mw62num * sizeof *x); + base = t2 + mw62num; + for (u = 2; u < ((unsigned)1 << win_len); u ++) { + montymul(base + mw62num, base, x, m, mw62num, m0i); + base += mw62num; + } + } + + /* + * Set x to 1, in Montgomery representation. We again use the + * 31-bit code. + */ + br_i31_zero(x31, m31[0]); + x31[(m31[0] + 31) >> 5] = 1; + br_i31_muladd_small(x31, 0, m31); + if (mw31num & 1) { + br_i31_muladd_small(x31, 0, m31); + } + for (u = 0; u < mw31num; u += 2) { + size_t v; + + v = u >> 1; + if ((u + 1) == mw31num) { + x[v] = (uint64_t)x31[u + 1]; + } else { + x[v] = (uint64_t)x31[u + 1] + + ((uint64_t)x31[u + 2] << 31); + } + } + + /* + * We process bits from most to least significant. At each + * loop iteration, we have acc_len bits in acc. + */ + acc = 0; + acc_len = 0; + while (acc_len > 0 || elen > 0) { + int i, k; + uint32_t bits; + uint64_t mask1, mask2; + + /* + * Get the next bits. + */ + k = win_len; + if (acc_len < win_len) { + if (elen > 0) { + acc = (acc << 8) | *e ++; + elen --; + acc_len += 8; + } else { + k = acc_len; + } + } + bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1); + acc_len -= k; + + /* + * We could get exactly k bits. Compute k squarings. + */ + for (i = 0; i < k; i ++) { + montymul(t1, x, x, m, mw62num, m0i); + memcpy(x, t1, mw62num * sizeof *x); + } + + /* + * Window lookup: we want to set t2 to the window + * lookup value, assuming the bits are non-zero. If + * the window length is 1 bit only, then t2 is + * already set; otherwise, we do a constant-time lookup. + */ + if (win_len > 1) { + uint64_t *base; + + memset(t2, 0, mw62num * sizeof *t2); + base = t2 + mw62num; + for (u = 1; u < ((uint32_t)1 << k); u ++) { + uint64_t mask; + size_t v; + + mask = -(uint64_t)EQ(u, bits); + for (v = 0; v < mw62num; v ++) { + t2[v] |= mask & base[v]; + } + base += mw62num; + } + } + + /* + * Multiply with the looked-up value. We keep the product + * only if the exponent bits are not all-zero. + */ + montymul(t1, x, t2, m, mw62num, m0i); + mask1 = -(uint64_t)EQ(bits, 0); + mask2 = ~mask1; + for (u = 0; u < mw62num; u ++) { + x[u] = (mask1 & x[u]) | (mask2 & t1[u]); + } + } + + /* + * Convert back from Montgomery representation. + */ + frommonty(x, m, mw62num, m0i); + + /* + * Convert result into 31-bit words. + */ + for (u = 0; u < mw31num; u += 2) { + uint64_t zw; + + zw = x[u >> 1]; + x31[u + 1] = (uint32_t)zw & 0x7FFFFFFF; + if ((u + 1) < mw31num) { + x31[u + 2] = (uint32_t)(zw >> 31); + } + } + return 1; +} + +#else + +/* see inner.h */ +uint32_t +br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen, + const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen) +{ + size_t mwlen; + + mwlen = (m31[0] + 63) >> 5; + if (twlen < mwlen) { + return 0; + } + return br_i31_modpow_opt(x31, e, elen, m31, m0i31, + (uint32_t *)tmp, twlen << 1); +} + +#endif + +/* see inner.h */ +uint32_t +br_i62_modpow_opt_as_i31(uint32_t *x31, const unsigned char *e, size_t elen, + const uint32_t *m31, uint32_t m0i31, uint32_t *tmp, size_t twlen) +{ + /* + * As documented, this function expects the 'tmp' argument to be + * 64-bit aligned. This is OK since this function is internal (it + * is not part of BearSSL's public API). + */ + return br_i62_modpow_opt(x31, e, elen, m31, m0i31, + (uint64_t *)tmp, twlen >> 1); +} diff --git a/third_party/bearssl/src/inner.h b/third_party/bearssl/src/inner.h new file mode 100644 index 0000000..0d40825 --- /dev/null +++ b/third_party/bearssl/src/inner.h @@ -0,0 +1,2559 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INNER_H__ +#define INNER_H__ + +#include <string.h> +#include <limits.h> + +#include "config.h" +#include "bearssl.h" + +/* + * On MSVC, disable the warning about applying unary minus on an + * unsigned type: it is standard, we do it all the time, and for + * good reasons. + */ +#if _MSC_VER +#pragma warning( disable : 4146 ) +#endif + +/* + * Maximum size for a RSA modulus (in bits). Allocated stack buffers + * depend on that size, so this value should be kept small. Currently, + * 2048-bit RSA keys offer adequate security, and should still do so for + * the next few decades; however, a number of widespread PKI have + * already set their root keys to RSA-4096, so we should be able to + * process such keys. + * + * This value MUST be a multiple of 64. This value MUST NOT exceed 47666 + * (some computations in RSA key generation rely on the factor size being + * no more than 23833 bits). RSA key sizes beyond 3072 bits don't make a + * lot of sense anyway. + */ +#define BR_MAX_RSA_SIZE 4096 + +/* + * Minimum size for a RSA modulus (in bits); this value is used only to + * filter out invalid parameters for key pair generation. Normally, + * applications should not use RSA keys smaller than 2048 bits; but some + * specific cases might need shorter keys, for legacy or research + * purposes. + */ +#define BR_MIN_RSA_SIZE 512 + +/* + * Maximum size for a RSA factor (in bits). This is for RSA private-key + * operations. Default is to support factors up to a bit more than half + * the maximum modulus size. + * + * This value MUST be a multiple of 32. + */ +#define BR_MAX_RSA_FACTOR ((BR_MAX_RSA_SIZE + 64) >> 1) + +/* + * Maximum size for an EC curve (modulus or order), in bits. Size of + * stack buffers depends on that parameter. This size MUST be a multiple + * of 8 (so that decoding an integer with that many bytes does not + * overflow). + */ +#define BR_MAX_EC_SIZE 528 + +/* + * Some macros to recognize the current architecture. Right now, we are + * interested into automatically recognizing architecture with efficient + * 64-bit types so that we may automatically use implementations that + * use 64-bit registers in that case. Future versions may detect, e.g., + * availability of SSE2 intrinsics. + * + * If 'unsigned long' is a 64-bit type, then we assume that 64-bit types + * are efficient. Otherwise, we rely on macros that depend on compiler, + * OS and architecture. In any case, failure to detect the architecture + * as 64-bit means that the 32-bit code will be used, and that code + * works also on 64-bit architectures (the 64-bit code may simply be + * more efficient). + * + * The test on 'unsigned long' should already catch most cases, the one + * notable exception being Windows code where 'unsigned long' is kept to + * 32-bit for compatibility with all the legacy code that liberally uses + * the 'DWORD' type for 32-bit values. + * + * Macro names are taken from: http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros + */ +#ifndef BR_64 +#if ((ULONG_MAX >> 31) >> 31) == 3 +#define BR_64 1 +#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64) +#define BR_64 1 +#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \ + || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) +#define BR_64 1 +#elif defined(__sparc64__) +#define BR_64 1 +#elif defined(__x86_64__) || defined(_M_X64) +#define BR_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define BR_64 1 +#elif defined(__mips64) +#define BR_64 1 +#endif +#endif + +/* + * Set BR_LOMUL on platforms where it makes sense. + */ +#ifndef BR_LOMUL +#if BR_ARMEL_CORTEXM_GCC +#define BR_LOMUL 1 +#endif +#endif + +/* + * Architecture detection. + */ +#ifndef BR_i386 +#if __i386__ || _M_IX86 +#define BR_i386 1 +#endif +#endif + +#ifndef BR_amd64 +#if __x86_64__ || _M_X64 +#define BR_amd64 1 +#endif +#endif + +/* + * Compiler brand and version. + * + * Implementations that use intrinsics need to detect the compiler type + * and version because some specific actions may be needed to activate + * the corresponding opcodes, both for header inclusion, and when using + * them in a function. + * + * BR_GCC, BR_CLANG and BR_MSC will be set to 1 for, respectively, GCC, + * Clang and MS Visual C. For each of them, sub-macros will be defined + * for versions; each sub-macro is set whenever the compiler version is + * at least as recent as the one corresponding to the macro. + */ + +/* + * GCC thresholds are on versions 4.4 to 4.9 and 5.0. + */ +#ifndef BR_GCC +#if __GNUC__ && !__clang__ +#define BR_GCC 1 + +#if __GNUC__ > 4 +#define BR_GCC_5_0 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 +#define BR_GCC_4_9 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8 +#define BR_GCC_4_8 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7 +#define BR_GCC_4_7 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6 +#define BR_GCC_4_6 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 5 +#define BR_GCC_4_5 1 +#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 4 +#define BR_GCC_4_4 1 +#endif + +#if BR_GCC_5_0 +#define BR_GCC_4_9 1 +#endif +#if BR_GCC_4_9 +#define BR_GCC_4_8 1 +#endif +#if BR_GCC_4_8 +#define BR_GCC_4_7 1 +#endif +#if BR_GCC_4_7 +#define BR_GCC_4_6 1 +#endif +#if BR_GCC_4_6 +#define BR_GCC_4_5 1 +#endif +#if BR_GCC_4_5 +#define BR_GCC_4_4 1 +#endif + +#endif +#endif + +/* + * Clang thresholds are on versions 3.7.0 and 3.8.0. + */ +#ifndef BR_CLANG +#if __clang__ +#define BR_CLANG 1 + +#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8) +#define BR_CLANG_3_8 1 +#elif __clang_major__ == 3 && __clang_minor__ >= 7 +#define BR_CLANG_3_7 1 +#endif + +#if BR_CLANG_3_8 +#define BR_CLANG_3_7 1 +#endif + +#endif +#endif + +/* + * MS Visual C thresholds are on Visual Studio 2005 to 2015. + */ +#ifndef BR_MSC +#if _MSC_VER +#define BR_MSC 1 + +#if _MSC_VER >= 1900 +#define BR_MSC_2015 1 +#elif _MSC_VER >= 1800 +#define BR_MSC_2013 1 +#elif _MSC_VER >= 1700 +#define BR_MSC_2012 1 +#elif _MSC_VER >= 1600 +#define BR_MSC_2010 1 +#elif _MSC_VER >= 1500 +#define BR_MSC_2008 1 +#elif _MSC_VER >= 1400 +#define BR_MSC_2005 1 +#endif + +#if BR_MSC_2015 +#define BR_MSC_2013 1 +#endif +#if BR_MSC_2013 +#define BR_MSC_2012 1 +#endif +#if BR_MSC_2012 +#define BR_MSC_2010 1 +#endif +#if BR_MSC_2010 +#define BR_MSC_2008 1 +#endif +#if BR_MSC_2008 +#define BR_MSC_2005 1 +#endif + +#endif +#endif + +/* + * GCC 4.4+ and Clang 3.7+ allow tagging specific functions with a + * 'target' attribute that activates support for specific opcodes. + */ +#if BR_GCC_4_4 || BR_CLANG_3_7 +#define BR_TARGET(x) __attribute__((target(x))) +#else +#define BR_TARGET(x) +#endif + +/* + * AES-NI intrinsics are available on x86 (32-bit and 64-bit) with + * GCC 4.8+, Clang 3.7+ and MSC 2012+. + */ +#ifndef BR_AES_X86NI +#if (BR_i386 || BR_amd64) && (BR_GCC_4_8 || BR_CLANG_3_7 || BR_MSC_2012) +#define BR_AES_X86NI 1 +#endif +#endif + +/* + * SSE2 intrinsics are available on x86 (32-bit and 64-bit) with + * GCC 4.4+, Clang 3.7+ and MSC 2005+. + */ +#ifndef BR_SSE2 +#if (BR_i386 || BR_amd64) && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005) +#define BR_SSE2 1 +#endif +#endif + +/* + * RDRAND intrinsics are available on x86 (32-bit and 64-bit) with + * GCC 4.6+, Clang 3.7+ and MSC 2012+. + */ +#ifndef BR_RDRAND +#if (BR_i386 || BR_amd64) && (BR_GCC_4_6 || BR_CLANG_3_7 || BR_MSC_2012) +#define BR_RDRAND 1 +#endif +#endif + +/* + * Determine type of OS for random number generation. Macro names and + * values are documented on: + * https://sourceforge.net/p/predef/wiki/OperatingSystems/ + * + * Win32's CryptGenRandom() should be available on Windows systems. + * + * /dev/urandom should work on all Unix-like systems (including macOS X). + * + * getentropy() is present on Linux (Glibc 2.25+), FreeBSD (12.0+) and + * OpenBSD (5.6+). For OpenBSD, there does not seem to be easy to use + * macros to test the minimum version, so we just assume that it is + * recent enough (last version without getentropy() has gone out of + * support in May 2015). + * + * Ideally we should use getentropy() on macOS (10.12+) too, but I don't + * know how to test the exact OS version with preprocessor macros. + * + * TODO: enrich the list of detected system. + */ + +#ifndef BR_USE_URANDOM +#if defined _AIX \ + || defined __ANDROID__ \ + || defined __FreeBSD__ \ + || defined __NetBSD__ \ + || defined __OpenBSD__ \ + || defined __DragonFly__ \ + || defined __linux__ \ + || (defined __sun && (defined __SVR4 || defined __svr4__)) \ + || (defined __APPLE__ && defined __MACH__) +#define BR_USE_URANDOM 1 +#endif +#endif + +#ifndef BR_USE_GETENTROPY +#if (defined __linux__ \ + && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))) \ + || (defined __FreeBSD__ && __FreeBSD__ >= 12) \ + || defined __OpenBSD__ +#define BR_USE_GETENTROPY 1 +#endif +#endif + +#ifndef BR_USE_WIN32_RAND +#if defined _WIN32 || defined _WIN64 +#define BR_USE_WIN32_RAND 1 +#endif +#endif + +/* + * POWER8 crypto support. We rely on compiler macros for the + * architecture, since we do not have a reliable, simple way to detect + * the required support at runtime (we could try running an opcode, and + * trapping the exception or signal on illegal instruction, but this + * induces some non-trivial OS dependencies that we would prefer to + * avoid if possible). + */ +#ifndef BR_POWER8 +#if __GNUC__ && ((_ARCH_PWR8 || _ARCH_PPC) && __CRYPTO__) +#define BR_POWER8 1 +#endif +#endif + +/* + * Detect endinanness on POWER8. + */ +#if BR_POWER8 +#if defined BR_POWER8_LE +#undef BR_POWER8_BE +#if BR_POWER8_LE +#define BR_POWER8_BE 0 +#else +#define BR_POWER8_BE 1 +#endif +#elif defined BR_POWER8_BE +#undef BR_POWER8_LE +#if BR_POWER8_BE +#define BR_POWER8_LE 0 +#else +#define BR_POWER8_LE 1 +#endif +#else +#if __LITTLE_ENDIAN__ +#define BR_POWER8_LE 1 +#define BR_POWER8_BE 0 +#else +#define BR_POWER8_LE 0 +#define BR_POWER8_BE 1 +#endif +#endif +#endif + +/* + * Detect support for 128-bit integers. + */ +#if !defined BR_INT128 && !defined BR_UMUL128 +#ifdef __SIZEOF_INT128__ +#define BR_INT128 1 +#elif _M_X64 +#define BR_UMUL128 1 +#endif +#endif + +/* + * Detect support for unaligned accesses with known endianness. + * + * x86 (both 32-bit and 64-bit) is little-endian and allows unaligned + * accesses. + * + * POWER/PowerPC allows unaligned accesses when big-endian. POWER8 and + * later also allow unaligned accesses when little-endian. + */ +#if !defined BR_LE_UNALIGNED && !defined BR_BE_UNALIGNED + +#if __i386 || __i386__ || __x86_64__ || _M_IX86 || _M_X64 +#define BR_LE_UNALIGNED 1 +#elif BR_POWER8_BE +#define BR_BE_UNALIGNED 1 +#elif BR_POWER8_LE +#define BR_LE_UNALIGNED 1 +#elif (__powerpc__ || __powerpc64__ || _M_PPC || _ARCH_PPC || _ARCH_PPC64) \ + && __BIG_ENDIAN__ +#define BR_BE_UNALIGNED 1 +#endif + +#endif + +/* ==================================================================== */ +/* + * Encoding/decoding functions. + * + * 32-bit and 64-bit decoding, both little-endian and big-endian, is + * implemented with the inline functions below. + * + * When allowed by some compile-time options (autodetected or provided), + * optimised code is used, to perform direct memory access when the + * underlying architecture supports it, both for endianness and + * alignment. This, however, may trigger strict aliasing issues; the + * code below uses unions to perform (supposedly) safe type punning. + * Since the C aliasing rules are relatively complex and were amended, + * or at least re-explained with different phrasing, in all successive + * versions of the C standard, it is always a bit risky to bet that any + * specific version of a C compiler got it right, for some notion of + * "right". + */ + +typedef union { + uint16_t u; + unsigned char b[sizeof(uint16_t)]; +} br_union_u16; + +typedef union { + uint32_t u; + unsigned char b[sizeof(uint32_t)]; +} br_union_u32; + +typedef union { + uint64_t u; + unsigned char b[sizeof(uint64_t)]; +} br_union_u64; + +static inline void +br_enc16le(void *dst, unsigned x) +{ +#if BR_LE_UNALIGNED + ((br_union_u16 *)dst)->u = x; +#else + unsigned char *buf; + + buf = dst; + buf[0] = (unsigned char)x; + buf[1] = (unsigned char)(x >> 8); +#endif +} + +static inline void +br_enc16be(void *dst, unsigned x) +{ +#if BR_BE_UNALIGNED + ((br_union_u16 *)dst)->u = x; +#else + unsigned char *buf; + + buf = dst; + buf[0] = (unsigned char)(x >> 8); + buf[1] = (unsigned char)x; +#endif +} + +static inline unsigned +br_dec16le(const void *src) +{ +#if BR_LE_UNALIGNED + return ((const br_union_u16 *)src)->u; +#else + const unsigned char *buf; + + buf = src; + return (unsigned)buf[0] | ((unsigned)buf[1] << 8); +#endif +} + +static inline unsigned +br_dec16be(const void *src) +{ +#if BR_BE_UNALIGNED + return ((const br_union_u16 *)src)->u; +#else + const unsigned char *buf; + + buf = src; + return ((unsigned)buf[0] << 8) | (unsigned)buf[1]; +#endif +} + +static inline void +br_enc32le(void *dst, uint32_t x) +{ +#if BR_LE_UNALIGNED + ((br_union_u32 *)dst)->u = x; +#else + unsigned char *buf; + + buf = dst; + buf[0] = (unsigned char)x; + buf[1] = (unsigned char)(x >> 8); + buf[2] = (unsigned char)(x >> 16); + buf[3] = (unsigned char)(x >> 24); +#endif +} + +static inline void +br_enc32be(void *dst, uint32_t x) +{ +#if BR_BE_UNALIGNED + ((br_union_u32 *)dst)->u = x; +#else + unsigned char *buf; + + buf = dst; + buf[0] = (unsigned char)(x >> 24); + buf[1] = (unsigned char)(x >> 16); + buf[2] = (unsigned char)(x >> 8); + buf[3] = (unsigned char)x; +#endif +} + +static inline uint32_t +br_dec32le(const void *src) +{ +#if BR_LE_UNALIGNED + return ((const br_union_u32 *)src)->u; +#else + const unsigned char *buf; + + buf = src; + return (uint32_t)buf[0] + | ((uint32_t)buf[1] << 8) + | ((uint32_t)buf[2] << 16) + | ((uint32_t)buf[3] << 24); +#endif +} + +static inline uint32_t +br_dec32be(const void *src) +{ +#if BR_BE_UNALIGNED + return ((const br_union_u32 *)src)->u; +#else + const unsigned char *buf; + + buf = src; + return ((uint32_t)buf[0] << 24) + | ((uint32_t)buf[1] << 16) + | ((uint32_t)buf[2] << 8) + | (uint32_t)buf[3]; +#endif +} + +static inline void +br_enc64le(void *dst, uint64_t x) +{ +#if BR_LE_UNALIGNED + ((br_union_u64 *)dst)->u = x; +#else + unsigned char *buf; + + buf = dst; + br_enc32le(buf, (uint32_t)x); + br_enc32le(buf + 4, (uint32_t)(x >> 32)); +#endif +} + +static inline void +br_enc64be(void *dst, uint64_t x) +{ +#if BR_BE_UNALIGNED + ((br_union_u64 *)dst)->u = x; +#else + unsigned char *buf; + + buf = dst; + br_enc32be(buf, (uint32_t)(x >> 32)); + br_enc32be(buf + 4, (uint32_t)x); +#endif +} + +static inline uint64_t +br_dec64le(const void *src) +{ +#if BR_LE_UNALIGNED + return ((const br_union_u64 *)src)->u; +#else + const unsigned char *buf; + + buf = src; + return (uint64_t)br_dec32le(buf) + | ((uint64_t)br_dec32le(buf + 4) << 32); +#endif +} + +static inline uint64_t +br_dec64be(const void *src) +{ +#if BR_BE_UNALIGNED + return ((const br_union_u64 *)src)->u; +#else + const unsigned char *buf; + + buf = src; + return ((uint64_t)br_dec32be(buf) << 32) + | (uint64_t)br_dec32be(buf + 4); +#endif +} + +/* + * Range decoding and encoding (for several successive values). + */ +void br_range_dec16le(uint16_t *v, size_t num, const void *src); +void br_range_dec16be(uint16_t *v, size_t num, const void *src); +void br_range_enc16le(void *dst, const uint16_t *v, size_t num); +void br_range_enc16be(void *dst, const uint16_t *v, size_t num); + +void br_range_dec32le(uint32_t *v, size_t num, const void *src); +void br_range_dec32be(uint32_t *v, size_t num, const void *src); +void br_range_enc32le(void *dst, const uint32_t *v, size_t num); +void br_range_enc32be(void *dst, const uint32_t *v, size_t num); + +void br_range_dec64le(uint64_t *v, size_t num, const void *src); +void br_range_dec64be(uint64_t *v, size_t num, const void *src); +void br_range_enc64le(void *dst, const uint64_t *v, size_t num); +void br_range_enc64be(void *dst, const uint64_t *v, size_t num); + +/* + * Byte-swap a 32-bit integer. + */ +static inline uint32_t +br_swap32(uint32_t x) +{ + x = ((x & (uint32_t)0x00FF00FF) << 8) + | ((x >> 8) & (uint32_t)0x00FF00FF); + return (x << 16) | (x >> 16); +} + +/* ==================================================================== */ +/* + * Support code for hash functions. + */ + +/* + * IV for MD5, SHA-1, SHA-224 and SHA-256. + */ +extern const uint32_t br_md5_IV[]; +extern const uint32_t br_sha1_IV[]; +extern const uint32_t br_sha224_IV[]; +extern const uint32_t br_sha256_IV[]; + +/* + * Round functions for MD5, SHA-1, SHA-224 and SHA-256 (SHA-224 and + * SHA-256 use the same round function). + */ +void br_md5_round(const unsigned char *buf, uint32_t *val); +void br_sha1_round(const unsigned char *buf, uint32_t *val); +void br_sha2small_round(const unsigned char *buf, uint32_t *val); + +/* + * The core function for the TLS PRF. It computes + * P_hash(secret, label + seed), and XORs the result into the dst buffer. + */ +void br_tls_phash(void *dst, size_t len, + const br_hash_class *dig, + const void *secret, size_t secret_len, const char *label, + size_t seed_num, const br_tls_prf_seed_chunk *seed); + +/* + * Copy all configured hash implementations from a multihash context + * to another. + */ +static inline void +br_multihash_copyimpl(br_multihash_context *dst, + const br_multihash_context *src) +{ + memcpy((void *)dst->impl, src->impl, sizeof src->impl); +} + +/* ==================================================================== */ +/* + * Constant-time primitives. These functions manipulate 32-bit values in + * order to provide constant-time comparisons and multiplexers. + * + * Boolean values (the "ctl" bits) MUST have value 0 or 1. + * + * Implementation notes: + * ===================== + * + * The uintN_t types are unsigned and with width exactly N bits; the C + * standard guarantees that computations are performed modulo 2^N, and + * there can be no overflow. Negation (unary '-') works on unsigned types + * as well. + * + * The intN_t types are guaranteed to have width exactly N bits, with no + * padding bit, and using two's complement representation. Casting + * intN_t to uintN_t really is conversion modulo 2^N. Beware that intN_t + * types, being signed, trigger implementation-defined behaviour on + * overflow (including raising some signal): with GCC, while modular + * arithmetics are usually applied, the optimizer may assume that + * overflows don't occur (unless the -fwrapv command-line option is + * added); Clang has the additional -ftrapv option to explicitly trap on + * integer overflow or underflow. + */ + +/* + * Negate a boolean. + */ +static inline uint32_t +NOT(uint32_t ctl) +{ + return ctl ^ 1; +} + +/* + * Multiplexer: returns x if ctl == 1, y if ctl == 0. + */ +static inline uint32_t +MUX(uint32_t ctl, uint32_t x, uint32_t y) +{ + return y ^ (-ctl & (x ^ y)); +} + +/* + * Equality check: returns 1 if x == y, 0 otherwise. + */ +static inline uint32_t +EQ(uint32_t x, uint32_t y) +{ + uint32_t q; + + q = x ^ y; + return NOT((q | -q) >> 31); +} + +/* + * Inequality check: returns 1 if x != y, 0 otherwise. + */ +static inline uint32_t +NEQ(uint32_t x, uint32_t y) +{ + uint32_t q; + + q = x ^ y; + return (q | -q) >> 31; +} + +/* + * Comparison: returns 1 if x > y, 0 otherwise. + */ +static inline uint32_t +GT(uint32_t x, uint32_t y) +{ + /* + * If both x < 2^31 and x < 2^31, then y-x will have its high + * bit set if x > y, cleared otherwise. + * + * If either x >= 2^31 or y >= 2^31 (but not both), then the + * result is the high bit of x. + * + * If both x >= 2^31 and y >= 2^31, then we can virtually + * subtract 2^31 from both, and we are back to the first case. + * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already + * fine. + */ + uint32_t z; + + z = y - x; + return (z ^ ((x ^ y) & (x ^ z))) >> 31; +} + +/* + * Other comparisons (greater-or-equal, lower-than, lower-or-equal). + */ +#define GE(x, y) NOT(GT(y, x)) +#define LT(x, y) GT(y, x) +#define LE(x, y) NOT(GT(x, y)) + +/* + * General comparison: returned value is -1, 0 or 1, depending on + * whether x is lower than, equal to, or greater than y. + */ +static inline int32_t +CMP(uint32_t x, uint32_t y) +{ + return (int32_t)GT(x, y) | -(int32_t)GT(y, x); +} + +/* + * Returns 1 if x == 0, 0 otherwise. Take care that the operand is signed. + */ +static inline uint32_t +EQ0(int32_t x) +{ + uint32_t q; + + q = (uint32_t)x; + return ~(q | -q) >> 31; +} + +/* + * Returns 1 if x > 0, 0 otherwise. Take care that the operand is signed. + */ +static inline uint32_t +GT0(int32_t x) +{ + /* + * High bit of -x is 0 if x == 0, but 1 if x > 0. + */ + uint32_t q; + + q = (uint32_t)x; + return (~q & -q) >> 31; +} + +/* + * Returns 1 if x >= 0, 0 otherwise. Take care that the operand is signed. + */ +static inline uint32_t +GE0(int32_t x) +{ + return ~(uint32_t)x >> 31; +} + +/* + * Returns 1 if x < 0, 0 otherwise. Take care that the operand is signed. + */ +static inline uint32_t +LT0(int32_t x) +{ + return (uint32_t)x >> 31; +} + +/* + * Returns 1 if x <= 0, 0 otherwise. Take care that the operand is signed. + */ +static inline uint32_t +LE0(int32_t x) +{ + uint32_t q; + + /* + * ~-x has its high bit set if and only if -x is nonnegative (as + * a signed int), i.e. x is in the -(2^31-1) to 0 range. We must + * do an OR with x itself to account for x = -2^31. + */ + q = (uint32_t)x; + return (q | ~-q) >> 31; +} + +/* + * Conditional copy: src[] is copied into dst[] if and only if ctl is 1. + * dst[] and src[] may overlap completely (but not partially). + */ +void br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len); + +#define CCOPY br_ccopy + +/* + * Compute the bit length of a 32-bit integer. Returned value is between 0 + * and 32 (inclusive). + */ +static inline uint32_t +BIT_LENGTH(uint32_t x) +{ + uint32_t k, c; + + k = NEQ(x, 0); + c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4; + c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3; + c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2; + c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1; + k += GT(x, 0x0001); + return k; +} + +/* + * Compute the minimum of x and y. + */ +static inline uint32_t +MIN(uint32_t x, uint32_t y) +{ + return MUX(GT(x, y), y, x); +} + +/* + * Compute the maximum of x and y. + */ +static inline uint32_t +MAX(uint32_t x, uint32_t y) +{ + return MUX(GT(x, y), x, y); +} + +/* + * Multiply two 32-bit integers, with a 64-bit result. This default + * implementation assumes that the basic multiplication operator + * yields constant-time code. + */ +#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y)) + +#if BR_CT_MUL31 + +/* + * Alternate implementation of MUL31, that will be constant-time on some + * (old) platforms where the default MUL31 is not. Unfortunately, it is + * also substantially slower, and yields larger code, on more modern + * platforms, which is why it is deactivated by default. + * + * MUL31_lo() must do some extra work because on some platforms, the + * _signed_ multiplication may return early if the top bits are 1. + * Simply truncating (casting) the output of MUL31() would not be + * sufficient, because the compiler may notice that we keep only the low + * word, and then replace automatically the unsigned multiplication with + * a signed multiplication opcode. + */ +#define MUL31(x, y) ((uint64_t)((x) | (uint32_t)0x80000000) \ + * (uint64_t)((y) | (uint32_t)0x80000000) \ + - ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \ + - ((uint64_t)1 << 62)) +static inline uint32_t +MUL31_lo(uint32_t x, uint32_t y) +{ + uint32_t xl, xh; + uint32_t yl, yh; + + xl = (x & 0xFFFF) | (uint32_t)0x80000000; + xh = (x >> 16) | (uint32_t)0x80000000; + yl = (y & 0xFFFF) | (uint32_t)0x80000000; + yh = (y >> 16) | (uint32_t)0x80000000; + return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF; +} + +#else + +/* + * Multiply two 31-bit integers, with a 62-bit result. This default + * implementation assumes that the basic multiplication operator + * yields constant-time code. + * The MUL31_lo() macro returns only the low 31 bits of the product. + */ +#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y)) +#define MUL31_lo(x, y) (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF) + +#endif + +/* + * Multiply two words together; the sum of the lengths of the two + * operands must not exceed 31 (for instance, one operand may use 16 + * bits if the other fits on 15). If BR_CT_MUL15 is non-zero, then the + * macro will contain some extra operations that help in making the + * operation constant-time on some platforms, where the basic 32-bit + * multiplication is not constant-time. + */ +#if BR_CT_MUL15 +#define MUL15(x, y) (((uint32_t)(x) | (uint32_t)0x80000000) \ + * ((uint32_t)(y) | (uint32_t)0x80000000) \ + & (uint32_t)0x7FFFFFFF) +#else +#define MUL15(x, y) ((uint32_t)(x) * (uint32_t)(y)) +#endif + +/* + * Arithmetic right shift (sign bit is copied). What happens when + * right-shifting a negative value is _implementation-defined_, so it + * does not trigger undefined behaviour, but it is still up to each + * compiler to define (and document) what it does. Most/all compilers + * will do an arithmetic shift, the sign bit being used to fill the + * holes; this is a native operation on the underlying CPU, and it would + * make little sense for the compiler to do otherwise. GCC explicitly + * documents that it follows that convention. + * + * Still, if BR_NO_ARITH_SHIFT is defined (and non-zero), then an + * alternate version will be used, that does not rely on such + * implementation-defined behaviour. Unfortunately, it is also slower + * and yields bigger code, which is why it is deactivated by default. + */ +#if BR_NO_ARITH_SHIFT +#define ARSH(x, n) (((uint32_t)(x) >> (n)) \ + | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) +#else +#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) +#endif + +/* + * Constant-time division. The dividend hi:lo is divided by the + * divisor d; the quotient is returned and the remainder is written + * in *r. If hi == d, then the quotient does not fit on 32 bits; + * returned value is thus truncated. If hi > d, returned values are + * indeterminate. + */ +uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r); + +/* + * Wrapper for br_divrem(); the remainder is returned, and the quotient + * is discarded. + */ +static inline uint32_t +br_rem(uint32_t hi, uint32_t lo, uint32_t d) +{ + uint32_t r; + + br_divrem(hi, lo, d, &r); + return r; +} + +/* + * Wrapper for br_divrem(); the quotient is returned, and the remainder + * is discarded. + */ +static inline uint32_t +br_div(uint32_t hi, uint32_t lo, uint32_t d) +{ + uint32_t r; + + return br_divrem(hi, lo, d, &r); +} + +/* ==================================================================== */ + +/* + * Integers 'i32' + * -------------- + * + * The 'i32' functions implement computations on big integers using + * an internal representation as an array of 32-bit integers. For + * an array x[]: + * -- x[0] contains the "announced bit length" of the integer + * -- x[1], x[2]... contain the value in little-endian order (x[1] + * contains the least significant 32 bits) + * + * Multiplications rely on the elementary 32x32->64 multiplication. + * + * The announced bit length specifies the number of bits that are + * significant in the subsequent 32-bit words. Unused bits in the + * last (most significant) word are set to 0; subsequent words are + * uninitialized and need not exist at all. + * + * The execution time and memory access patterns of all computations + * depend on the announced bit length, but not on the actual word + * values. For modular integers, the announced bit length of any integer + * modulo n is equal to the actual bit length of n; thus, computations + * on modular integers are "constant-time" (only the modulus length may + * leak). + */ + +/* + * Compute the actual bit length of an integer. The argument x should + * point to the first (least significant) value word of the integer. + * The len 'xlen' contains the number of 32-bit words to access. + * + * CT: value or length of x does not leak. + */ +uint32_t br_i32_bit_length(uint32_t *x, size_t xlen); + +/* + * Decode an integer from its big-endian unsigned representation. The + * "true" bit length of the integer is computed, but all words of x[] + * corresponding to the full 'len' bytes of the source are set. + * + * CT: value or length of x does not leak. + */ +void br_i32_decode(uint32_t *x, const void *src, size_t len); + +/* + * Decode an integer from its big-endian unsigned representation. The + * integer MUST be lower than m[]; the announced bit length written in + * x[] will be equal to that of m[]. All 'len' bytes from the source are + * read. + * + * Returned value is 1 if the decode value fits within the modulus, 0 + * otherwise. In the latter case, the x[] buffer will be set to 0 (but + * still with the announced bit length of m[]). + * + * CT: value or length of x does not leak. Memory access pattern depends + * only of 'len' and the announced bit length of m. Whether x fits or + * not does not leak either. + */ +uint32_t br_i32_decode_mod(uint32_t *x, + const void *src, size_t len, const uint32_t *m); + +/* + * Reduce an integer (a[]) modulo another (m[]). The result is written + * in x[] and its announced bit length is set to be equal to that of m[]. + * + * x[] MUST be distinct from a[] and m[]. + * + * CT: only announced bit lengths leak, not values of x, a or m. + */ +void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m); + +/* + * Decode an integer from its big-endian unsigned representation, and + * reduce it modulo the provided modulus m[]. The announced bit length + * of the result is set to be equal to that of the modulus. + * + * x[] MUST be distinct from m[]. + */ +void br_i32_decode_reduce(uint32_t *x, + const void *src, size_t len, const uint32_t *m); + +/* + * Encode an integer into its big-endian unsigned representation. The + * output length in bytes is provided (parameter 'len'); if the length + * is too short then the integer is appropriately truncated; if it is + * too long then the extra bytes are set to 0. + */ +void br_i32_encode(void *dst, size_t len, const uint32_t *x); + +/* + * Multiply x[] by 2^32 and then add integer z, modulo m[]. This + * function assumes that x[] and m[] have the same announced bit + * length, and the announced bit length of m[] matches its true + * bit length. + * + * x[] and m[] MUST be distinct arrays. + * + * CT: only the common announced bit length of x and m leaks, not + * the values of x, z or m. + */ +void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m); + +/* + * Extract one word from an integer. The offset is counted in bits. + * The word MUST entirely fit within the word elements corresponding + * to the announced bit length of a[]. + */ +static inline uint32_t +br_i32_word(const uint32_t *a, uint32_t off) +{ + size_t u; + unsigned j; + + u = (size_t)(off >> 5) + 1; + j = (unsigned)off & 31; + if (j == 0) { + return a[u]; + } else { + return (a[u] >> j) | (a[u + 1] << (32 - j)); + } +} + +/* + * Test whether an integer is zero. + */ +uint32_t br_i32_iszero(const uint32_t *x); + +/* + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] + * is unmodified, but the carry is still computed and returned. The + * arrays a[] and b[] MUST have the same announced bit length. + * + * a[] and b[] MAY be the same array, but partial overlap is not allowed. + */ +uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl); + +/* + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, + * then a[] is unmodified, but the carry is still computed and returned. + * The arrays a[] and b[] MUST have the same announced bit length. + * + * a[] and b[] MAY be the same array, but partial overlap is not allowed. + */ +uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl); + +/* + * Compute d+a*b, result in d. The initial announced bit length of d[] + * MUST match that of a[]. The d[] array MUST be large enough to + * accommodate the full result, plus (possibly) an extra word. The + * resulting announced bit length of d[] will be the sum of the announced + * bit lengths of a[] and b[] (therefore, it may be larger than the actual + * bit length of the numerical result). + * + * a[] and b[] may be the same array. d[] must be disjoint from both a[] + * and b[]. + */ +void br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b); + +/* + * Zeroize an integer. The announced bit length is set to the provided + * value, and the corresponding words are set to 0. + */ +static inline void +br_i32_zero(uint32_t *x, uint32_t bit_len) +{ + *x ++ = bit_len; + memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x); +} + +/* + * Compute -(1/x) mod 2^32. If x is even, then this function returns 0. + */ +uint32_t br_i32_ninv32(uint32_t x); + +/* + * Convert a modular integer to Montgomery representation. The integer x[] + * MUST be lower than m[], but with the same announced bit length. + */ +void br_i32_to_monty(uint32_t *x, const uint32_t *m); + +/* + * Convert a modular integer back from Montgomery representation. The + * integer x[] MUST be lower than m[], but with the same announced bit + * length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is + * the least significant value word of m[] (this works only if m[] is + * an odd integer). + */ +void br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i); + +/* + * Compute a modular Montgomery multiplication. d[] is filled with the + * value of x*y/R modulo m[] (where R is the Montgomery factor). The + * array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be + * numerically lower than m[]. x[] and y[] MAY be the same array. The + * "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least + * significant value word of m[] (this works only if m[] is an odd + * integer). + */ +void br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, + const uint32_t *m, uint32_t m0i); + +/* + * Compute a modular exponentiation. x[] MUST be an integer modulo m[] + * (same announced bit length, lower value). m[] MUST be odd. The + * exponent is in big-endian unsigned notation, over 'elen' bytes. The + * "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least + * significant value word of m[] (this works only if m[] is an odd + * integer). The t1[] and t2[] parameters must be temporary arrays, + * each large enough to accommodate an integer with the same size as m[]. + */ +void br_i32_modpow(uint32_t *x, const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2); + +/* ==================================================================== */ + +/* + * Integers 'i31' + * -------------- + * + * The 'i31' functions implement computations on big integers using + * an internal representation as an array of 32-bit integers. For + * an array x[]: + * -- x[0] encodes the array length and the "announced bit length" + * of the integer: namely, if the announced bit length is k, + * then x[0] = ((k / 31) << 5) + (k % 31). + * -- x[1], x[2]... contain the value in little-endian order, 31 + * bits per word (x[1] contains the least significant 31 bits). + * The upper bit of each word is 0. + * + * Multiplications rely on the elementary 32x32->64 multiplication. + * + * The announced bit length specifies the number of bits that are + * significant in the subsequent 32-bit words. Unused bits in the + * last (most significant) word are set to 0; subsequent words are + * uninitialized and need not exist at all. + * + * The execution time and memory access patterns of all computations + * depend on the announced bit length, but not on the actual word + * values. For modular integers, the announced bit length of any integer + * modulo n is equal to the actual bit length of n; thus, computations + * on modular integers are "constant-time" (only the modulus length may + * leak). + */ + +/* + * Test whether an integer is zero. + */ +uint32_t br_i31_iszero(const uint32_t *x); + +/* + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] + * is unmodified, but the carry is still computed and returned. The + * arrays a[] and b[] MUST have the same announced bit length. + * + * a[] and b[] MAY be the same array, but partial overlap is not allowed. + */ +uint32_t br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl); + +/* + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, + * then a[] is unmodified, but the carry is still computed and returned. + * The arrays a[] and b[] MUST have the same announced bit length. + * + * a[] and b[] MAY be the same array, but partial overlap is not allowed. + */ +uint32_t br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl); + +/* + * Compute the ENCODED actual bit length of an integer. The argument x + * should point to the first (least significant) value word of the + * integer. The len 'xlen' contains the number of 32-bit words to + * access. The upper bit of each value word MUST be 0. + * Returned value is ((k / 31) << 5) + (k % 31) if the bit length is k. + * + * CT: value or length of x does not leak. + */ +uint32_t br_i31_bit_length(uint32_t *x, size_t xlen); + +/* + * Decode an integer from its big-endian unsigned representation. The + * "true" bit length of the integer is computed and set in the encoded + * announced bit length (x[0]), but all words of x[] corresponding to + * the full 'len' bytes of the source are set. + * + * CT: value or length of x does not leak. + */ +void br_i31_decode(uint32_t *x, const void *src, size_t len); + +/* + * Decode an integer from its big-endian unsigned representation. The + * integer MUST be lower than m[]; the (encoded) announced bit length + * written in x[] will be equal to that of m[]. All 'len' bytes from the + * source are read. + * + * Returned value is 1 if the decode value fits within the modulus, 0 + * otherwise. In the latter case, the x[] buffer will be set to 0 (but + * still with the announced bit length of m[]). + * + * CT: value or length of x does not leak. Memory access pattern depends + * only of 'len' and the announced bit length of m. Whether x fits or + * not does not leak either. + */ +uint32_t br_i31_decode_mod(uint32_t *x, + const void *src, size_t len, const uint32_t *m); + +/* + * Zeroize an integer. The announced bit length is set to the provided + * value, and the corresponding words are set to 0. The ENCODED bit length + * is expected here. + */ +static inline void +br_i31_zero(uint32_t *x, uint32_t bit_len) +{ + *x ++ = bit_len; + memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x); +} + +/* + * Right-shift an integer. The shift amount must be lower than 31 + * bits. + */ +void br_i31_rshift(uint32_t *x, int count); + +/* + * Reduce an integer (a[]) modulo another (m[]). The result is written + * in x[] and its announced bit length is set to be equal to that of m[]. + * + * x[] MUST be distinct from a[] and m[]. + * + * CT: only announced bit lengths leak, not values of x, a or m. + */ +void br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m); + +/* + * Decode an integer from its big-endian unsigned representation, and + * reduce it modulo the provided modulus m[]. The announced bit length + * of the result is set to be equal to that of the modulus. + * + * x[] MUST be distinct from m[]. + */ +void br_i31_decode_reduce(uint32_t *x, + const void *src, size_t len, const uint32_t *m); + +/* + * Multiply x[] by 2^31 and then add integer z, modulo m[]. This + * function assumes that x[] and m[] have the same announced bit + * length, the announced bit length of m[] matches its true + * bit length. + * + * x[] and m[] MUST be distinct arrays. z MUST fit in 31 bits (upper + * bit set to 0). + * + * CT: only the common announced bit length of x and m leaks, not + * the values of x, z or m. + */ +void br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m); + +/* + * Encode an integer into its big-endian unsigned representation. The + * output length in bytes is provided (parameter 'len'); if the length + * is too short then the integer is appropriately truncated; if it is + * too long then the extra bytes are set to 0. + */ +void br_i31_encode(void *dst, size_t len, const uint32_t *x); + +/* + * Compute -(1/x) mod 2^31. If x is even, then this function returns 0. + */ +uint32_t br_i31_ninv31(uint32_t x); + +/* + * Compute a modular Montgomery multiplication. d[] is filled with the + * value of x*y/R modulo m[] (where R is the Montgomery factor). The + * array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be + * numerically lower than m[]. x[] and y[] MAY be the same array. The + * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least + * significant value word of m[] (this works only if m[] is an odd + * integer). + */ +void br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, + const uint32_t *m, uint32_t m0i); + +/* + * Convert a modular integer to Montgomery representation. The integer x[] + * MUST be lower than m[], but with the same announced bit length. + */ +void br_i31_to_monty(uint32_t *x, const uint32_t *m); + +/* + * Convert a modular integer back from Montgomery representation. The + * integer x[] MUST be lower than m[], but with the same announced bit + * length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is + * the least significant value word of m[] (this works only if m[] is + * an odd integer). + */ +void br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i); + +/* + * Compute a modular exponentiation. x[] MUST be an integer modulo m[] + * (same announced bit length, lower value). m[] MUST be odd. The + * exponent is in big-endian unsigned notation, over 'elen' bytes. The + * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least + * significant value word of m[] (this works only if m[] is an odd + * integer). The t1[] and t2[] parameters must be temporary arrays, + * each large enough to accommodate an integer with the same size as m[]. + */ +void br_i31_modpow(uint32_t *x, const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2); + +/* + * Compute a modular exponentiation. x[] MUST be an integer modulo m[] + * (same announced bit length, lower value). m[] MUST be odd. The + * exponent is in big-endian unsigned notation, over 'elen' bytes. The + * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least + * significant value word of m[] (this works only if m[] is an odd + * integer). The tmp[] array is used for temporaries, and has size + * 'twlen' words; it must be large enough to accommodate at least two + * temporary values with the same size as m[] (including the leading + * "bit length" word). If there is room for more temporaries, then this + * function may use the extra room for window-based optimisation, + * resulting in faster computations. + * + * Returned value is 1 on success, 0 on error. An error is reported if + * the provided tmp[] array is too short. + */ +uint32_t br_i31_modpow_opt(uint32_t *x, const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen); + +/* + * Compute d+a*b, result in d. The initial announced bit length of d[] + * MUST match that of a[]. The d[] array MUST be large enough to + * accommodate the full result, plus (possibly) an extra word. The + * resulting announced bit length of d[] will be the sum of the announced + * bit lengths of a[] and b[] (therefore, it may be larger than the actual + * bit length of the numerical result). + * + * a[] and b[] may be the same array. d[] must be disjoint from both a[] + * and b[]. + */ +void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b); + +/* + * Compute x/y mod m, result in x. Values x and y must be between 0 and + * m-1, and have the same announced bit length as m. Modulus m must be + * odd. The "m0i" parameter is equal to -1/m mod 2^31. The array 't' + * must point to a temporary area that can hold at least three integers + * of the size of m. + * + * m may not overlap x and y. x and y may overlap each other (this can + * be useful to test whether a value is invertible modulo m). t must be + * disjoint from all other arrays. + * + * Returned value is 1 on success, 0 otherwise. Success is attained if + * y is invertible modulo m. + */ +uint32_t br_i31_moddiv(uint32_t *x, const uint32_t *y, + const uint32_t *m, uint32_t m0i, uint32_t *t); + +/* ==================================================================== */ + +/* + * FIXME: document "i15" functions. + */ + +static inline void +br_i15_zero(uint16_t *x, uint16_t bit_len) +{ + *x ++ = bit_len; + memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x); +} + +uint32_t br_i15_iszero(const uint16_t *x); + +uint16_t br_i15_ninv15(uint16_t x); + +uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl); + +uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl); + +void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m); + +void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i); + +void br_i15_to_monty(uint16_t *x, const uint16_t *m); + +void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2); + +uint32_t br_i15_modpow_opt(uint16_t *x, const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen); + +void br_i15_encode(void *dst, size_t len, const uint16_t *x); + +uint32_t br_i15_decode_mod(uint16_t *x, + const void *src, size_t len, const uint16_t *m); + +void br_i15_rshift(uint16_t *x, int count); + +uint32_t br_i15_bit_length(uint16_t *x, size_t xlen); + +void br_i15_decode(uint16_t *x, const void *src, size_t len); + +void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i); + +void br_i15_decode_reduce(uint16_t *x, + const void *src, size_t len, const uint16_t *m); + +void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m); + +void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b); + +uint32_t br_i15_moddiv(uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i, uint16_t *t); + +/* + * Variant of br_i31_modpow_opt() that internally uses 64x64->128 + * multiplications. It expects the same parameters as br_i31_modpow_opt(), + * except that the temporaries should be 64-bit integers, not 32-bit + * integers. + */ +uint32_t br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen, + const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen); + +/* + * Type for a function with the same API as br_i31_modpow_opt() (some + * implementations of this type may have stricter alignment requirements + * on the temporaries). + */ +typedef uint32_t (*br_i31_modpow_opt_type)(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen); + +/* + * Wrapper for br_i62_modpow_opt() that uses the same type as + * br_i31_modpow_opt(); however, it requires its 'tmp' argument to the + * 64-bit aligned. + */ +uint32_t br_i62_modpow_opt_as_i31(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen); + +/* ==================================================================== */ + +static inline size_t +br_digest_size(const br_hash_class *digest_class) +{ + return (size_t)(digest_class->desc >> BR_HASHDESC_OUT_OFF) + & BR_HASHDESC_OUT_MASK; +} + +/* + * Get the output size (in bytes) of a hash function. + */ +size_t br_digest_size_by_ID(int digest_id); + +/* + * Get the OID (encoded OBJECT IDENTIFIER value, without tag and length) + * for a hash function. If digest_id is not a supported digest identifier + * (in particular if it is equal to 0, i.e. br_md5sha1_ID), then NULL is + * returned and *len is set to 0. + */ +const unsigned char *br_digest_OID(int digest_id, size_t *len); + +/* ==================================================================== */ +/* + * DES support functions. + */ + +/* + * Apply DES Initial Permutation. + */ +void br_des_do_IP(uint32_t *xl, uint32_t *xr); + +/* + * Apply DES Final Permutation (inverse of IP). + */ +void br_des_do_invIP(uint32_t *xl, uint32_t *xr); + +/* + * Key schedule unit: for a DES key (8 bytes), compute 16 subkeys. Each + * subkey is two 28-bit words represented as two 32-bit words; the PC-2 + * bit extration is NOT applied. + */ +void br_des_keysched_unit(uint32_t *skey, const void *key); + +/* + * Reversal of 16 DES sub-keys (for decryption). + */ +void br_des_rev_skey(uint32_t *skey); + +/* + * DES/3DES key schedule for 'des_tab' (encryption direction). Returned + * value is the number of rounds. + */ +unsigned br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len); + +/* + * DES/3DES key schedule for 'des_ct' (encryption direction). Returned + * value is the number of rounds. + */ +unsigned br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len); + +/* + * DES/3DES subkey decompression (from the compressed bitsliced subkeys). + */ +void br_des_ct_skey_expand(uint32_t *sk_exp, + unsigned num_rounds, const uint32_t *skey); + +/* + * DES/3DES block encryption/decryption ('des_tab'). + */ +void br_des_tab_process_block(unsigned num_rounds, + const uint32_t *skey, void *block); + +/* + * DES/3DES block encryption/decryption ('des_ct'). + */ +void br_des_ct_process_block(unsigned num_rounds, + const uint32_t *skey, void *block); + +/* ==================================================================== */ +/* + * AES support functions. + */ + +/* + * The AES S-box (256-byte table). + */ +extern const unsigned char br_aes_S[]; + +/* + * AES key schedule. skey[] is filled with n+1 128-bit subkeys, where n + * is the number of rounds (10 to 14, depending on key size). The number + * of rounds is returned. If the key size is invalid (not 16, 24 or 32), + * then 0 is returned. + * + * This implementation uses a 256-byte table and is NOT constant-time. + */ +unsigned br_aes_keysched(uint32_t *skey, const void *key, size_t key_len); + +/* + * AES key schedule for decryption ('aes_big' implementation). + */ +unsigned br_aes_big_keysched_inv(uint32_t *skey, + const void *key, size_t key_len); + +/* + * AES block encryption with the 'aes_big' implementation (fast, but + * not constant-time). This function encrypts a single block "in place". + */ +void br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data); + +/* + * AES block decryption with the 'aes_big' implementation (fast, but + * not constant-time). This function decrypts a single block "in place". + */ +void br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data); + +/* + * AES block encryption with the 'aes_small' implementation (small, but + * slow and not constant-time). This function encrypts a single block + * "in place". + */ +void br_aes_small_encrypt(unsigned num_rounds, + const uint32_t *skey, void *data); + +/* + * AES block decryption with the 'aes_small' implementation (small, but + * slow and not constant-time). This function decrypts a single block + * "in place". + */ +void br_aes_small_decrypt(unsigned num_rounds, + const uint32_t *skey, void *data); + +/* + * The constant-time implementation is "bitsliced": the 128-bit state is + * split over eight 32-bit words q* in the following way: + * + * -- Input block consists in 16 bytes: + * a00 a10 a20 a30 a01 a11 a21 a31 a02 a12 a22 a32 a03 a13 a23 a33 + * In the terminology of FIPS 197, this is a 4x4 matrix which is read + * column by column. + * + * -- Each byte is split into eight bits which are distributed over the + * eight words, at the same rank. Thus, for a byte x at rank k, bit 0 + * (least significant) of x will be at rank k in q0 (if that bit is b, + * then it contributes "b << k" to the value of q0), bit 1 of x will be + * at rank k in q1, and so on. + * + * -- Ranks given to bits are in "row order" and are either all even, or + * all odd. Two independent AES states are thus interleaved, one using + * the even ranks, the other the odd ranks. Row order means: + * a00 a01 a02 a03 a10 a11 a12 a13 a20 a21 a22 a23 a30 a31 a32 a33 + * + * Converting input bytes from two AES blocks to bitslice representation + * is done in the following way: + * -- Decode first block into the four words q0 q2 q4 q6, in that order, + * using little-endian convention. + * -- Decode second block into the four words q1 q3 q5 q7, in that order, + * using little-endian convention. + * -- Call br_aes_ct_ortho(). + * + * Converting back to bytes is done by using the reverse operations. Note + * that br_aes_ct_ortho() is its own inverse. + */ + +/* + * Perform bytewise orthogonalization of eight 32-bit words. Bytes + * of q0..q7 are spread over all words: for a byte x that occurs + * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit + * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j. + * + * This operation is an involution. + */ +void br_aes_ct_ortho(uint32_t *q); + +/* + * The AES S-box, as a bitsliced constant-time version. The input array + * consists in eight 32-bit words; 32 S-box instances are computed in + * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant) + * are spread over the words 0 to 7, at the same rank. + */ +void br_aes_ct_bitslice_Sbox(uint32_t *q); + +/* + * Like br_aes_bitslice_Sbox(), but for the inverse S-box. + */ +void br_aes_ct_bitslice_invSbox(uint32_t *q); + +/* + * Compute AES encryption on bitsliced data. Since input is stored on + * eight 32-bit words, two block encryptions are actually performed + * in parallel. + */ +void br_aes_ct_bitslice_encrypt(unsigned num_rounds, + const uint32_t *skey, uint32_t *q); + +/* + * Compute AES decryption on bitsliced data. Since input is stored on + * eight 32-bit words, two block decryptions are actually performed + * in parallel. + */ +void br_aes_ct_bitslice_decrypt(unsigned num_rounds, + const uint32_t *skey, uint32_t *q); + +/* + * AES key schedule, constant-time version. skey[] is filled with n+1 + * 128-bit subkeys, where n is the number of rounds (10 to 14, depending + * on key size). The number of rounds is returned. If the key size is + * invalid (not 16, 24 or 32), then 0 is returned. + */ +unsigned br_aes_ct_keysched(uint32_t *comp_skey, + const void *key, size_t key_len); + +/* + * Expand AES subkeys as produced by br_aes_ct_keysched(), into + * a larger array suitable for br_aes_ct_bitslice_encrypt() and + * br_aes_ct_bitslice_decrypt(). + */ +void br_aes_ct_skey_expand(uint32_t *skey, + unsigned num_rounds, const uint32_t *comp_skey); + +/* + * For the ct64 implementation, the same bitslicing technique is used, + * but four instances are interleaved. First instance uses bits 0, 4, + * 8, 12,... of each word; second instance uses bits 1, 5, 9, 13,... + * and so on. + */ + +/* + * Perform bytewise orthogonalization of eight 64-bit words. Bytes + * of q0..q7 are spread over all words: for a byte x that occurs + * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit + * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j. + * + * This operation is an involution. + */ +void br_aes_ct64_ortho(uint64_t *q); + +/* + * Interleave bytes for an AES input block. If input bytes are + * denoted 0123456789ABCDEF, and have been decoded with little-endian + * convention (w[0] contains 0123, with '3' being most significant; + * w[1] contains 4567, and so on), then output word q0 will be + * set to 08192A3B (again little-endian convention) and q1 will + * be set to 4C5D6E7F. + */ +void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w); + +/* + * Perform the opposite of br_aes_ct64_interleave_in(). + */ +void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1); + +/* + * The AES S-box, as a bitsliced constant-time version. The input array + * consists in eight 64-bit words; 64 S-box instances are computed in + * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant) + * are spread over the words 0 to 7, at the same rank. + */ +void br_aes_ct64_bitslice_Sbox(uint64_t *q); + +/* + * Like br_aes_bitslice_Sbox(), but for the inverse S-box. + */ +void br_aes_ct64_bitslice_invSbox(uint64_t *q); + +/* + * Compute AES encryption on bitsliced data. Since input is stored on + * eight 64-bit words, four block encryptions are actually performed + * in parallel. + */ +void br_aes_ct64_bitslice_encrypt(unsigned num_rounds, + const uint64_t *skey, uint64_t *q); + +/* + * Compute AES decryption on bitsliced data. Since input is stored on + * eight 64-bit words, four block decryptions are actually performed + * in parallel. + */ +void br_aes_ct64_bitslice_decrypt(unsigned num_rounds, + const uint64_t *skey, uint64_t *q); + +/* + * AES key schedule, constant-time version. skey[] is filled with n+1 + * 128-bit subkeys, where n is the number of rounds (10 to 14, depending + * on key size). The number of rounds is returned. If the key size is + * invalid (not 16, 24 or 32), then 0 is returned. + */ +unsigned br_aes_ct64_keysched(uint64_t *comp_skey, + const void *key, size_t key_len); + +/* + * Expand AES subkeys as produced by br_aes_ct64_keysched(), into + * a larger array suitable for br_aes_ct64_bitslice_encrypt() and + * br_aes_ct64_bitslice_decrypt(). + */ +void br_aes_ct64_skey_expand(uint64_t *skey, + unsigned num_rounds, const uint64_t *comp_skey); + +/* + * Test support for AES-NI opcodes. + */ +int br_aes_x86ni_supported(void); + +/* + * AES key schedule, using x86 AES-NI instructions. This yields the + * subkeys in the encryption direction. Number of rounds is returned. + * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned. + */ +unsigned br_aes_x86ni_keysched_enc(unsigned char *skni, + const void *key, size_t len); + +/* + * AES key schedule, using x86 AES-NI instructions. This yields the + * subkeys in the decryption direction. Number of rounds is returned. + * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned. + */ +unsigned br_aes_x86ni_keysched_dec(unsigned char *skni, + const void *key, size_t len); + +/* + * Test support for AES POWER8 opcodes. + */ +int br_aes_pwr8_supported(void); + +/* + * AES key schedule, using POWER8 instructions. This yields the + * subkeys in the encryption direction. Number of rounds is returned. + * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned. + */ +unsigned br_aes_pwr8_keysched(unsigned char *skni, + const void *key, size_t len); + +/* ==================================================================== */ +/* + * RSA. + */ + +/* + * Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is + * the encoded hash function OID, or NULL. + */ +uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + uint32_t n_bitlen, unsigned char *x); + +/* + * Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded + * hash function OID, or NULL. The provided 'sig' value is _after_ the + * modular exponentiation, i.e. it should be the padded hash. On + * success, the hashed message is extracted. + */ +uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len, + const unsigned char *hash_oid, size_t hash_len, + unsigned char *hash_out); + +/* + * Apply proper PSS padding. The 'x' buffer is output only: it + * receives the value that is to be exponentiated. + */ +uint32_t br_rsa_pss_sig_pad(const br_prng_class **rng, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + uint32_t n_bitlen, unsigned char *x); + +/* + * Check PSS padding. The provided value is the one _after_ + * the modular exponentiation; it is modified by this function. + * This function infers the signature length from the public key + * size, i.e. it assumes that this has already been verified (as + * part of the exponentiation). + */ +uint32_t br_rsa_pss_sig_unpad( + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + const br_rsa_public_key *pk, unsigned char *x); + +/* + * Apply OAEP padding. Returned value is the actual padded string length, + * or zero on error. + */ +size_t br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig, + const void *label, size_t label_len, const br_rsa_public_key *pk, + void *dst, size_t dst_nax_len, const void *src, size_t src_len); + +/* + * Unravel and check OAEP padding. If the padding is correct, then 1 is + * returned, '*len' is adjusted to the length of the message, and the + * data is moved to the start of the 'data' buffer. If the padding is + * incorrect, then 0 is returned and '*len' is untouched. Either way, + * the complete buffer contents are altered. + */ +uint32_t br_rsa_oaep_unpad(const br_hash_class *dig, + const void *label, size_t label_len, void *data, size_t *len); + +/* + * Compute MGF1 for a given seed, and XOR the output into the provided + * buffer. + */ +void br_mgf1_xor(void *data, size_t len, + const br_hash_class *dig, const void *seed, size_t seed_len); + +/* + * Inner function for RSA key generation; used by the "i31" and "i62" + * implementations. + */ +uint32_t br_rsa_i31_keygen_inner(const br_prng_class **rng, + br_rsa_private_key *sk, void *kbuf_priv, + br_rsa_public_key *pk, void *kbuf_pub, + unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31); + +/* ==================================================================== */ +/* + * Elliptic curves. + */ + +/* + * Type for generic EC parameters: curve order (unsigned big-endian + * encoding) and encoded conventional generator. + */ +typedef struct { + int curve; + const unsigned char *order; + size_t order_len; + const unsigned char *generator; + size_t generator_len; +} br_ec_curve_def; + +extern const br_ec_curve_def br_secp256r1; +extern const br_ec_curve_def br_secp384r1; +extern const br_ec_curve_def br_secp521r1; + +/* + * For Curve25519, the advertised "order" really is 2^255-1, since the + * point multipliction function really works over arbitrary 255-bit + * scalars. This value is only meant as a hint for ECDH key generation; + * only ECDSA uses the exact curve order, and ECDSA is not used with + * that specific curve. + */ +extern const br_ec_curve_def br_curve25519; + +/* + * Decode some bytes as an i31 integer, with truncation (corresponding + * to the 'bits2int' operation in RFC 6979). The target ENCODED bit + * length is provided as last parameter. The resulting value will have + * this declared bit length, and consists the big-endian unsigned decoding + * of exactly that many bits in the source (capped at the source length). + */ +void br_ecdsa_i31_bits2int(uint32_t *x, + const void *src, size_t len, uint32_t ebitlen); + +/* + * Decode some bytes as an i15 integer, with truncation (corresponding + * to the 'bits2int' operation in RFC 6979). The target ENCODED bit + * length is provided as last parameter. The resulting value will have + * this declared bit length, and consists the big-endian unsigned decoding + * of exactly that many bits in the source (capped at the source length). + */ +void br_ecdsa_i15_bits2int(uint16_t *x, + const void *src, size_t len, uint32_t ebitlen); + +/* ==================================================================== */ +/* + * ASN.1 support functions. + */ + +/* + * A br_asn1_uint structure contains encoding information about an + * INTEGER nonnegative value: pointer to the integer contents (unsigned + * big-endian representation), length of the integer contents, + * and length of the encoded value. The data shall have minimal length: + * - If the integer value is zero, then 'len' must be zero. + * - If the integer value is not zero, then data[0] must be non-zero. + * + * Under these conditions, 'asn1len' is necessarily equal to either len + * or len+1. + */ +typedef struct { + const unsigned char *data; + size_t len; + size_t asn1len; +} br_asn1_uint; + +/* + * Given an encoded integer (unsigned big-endian, with possible leading + * bytes of value 0), returned the "prepared INTEGER" structure. + */ +br_asn1_uint br_asn1_uint_prepare(const void *xdata, size_t xlen); + +/* + * Encode an ASN.1 length. The length of the encoded length is returned. + * If 'dest' is NULL, then no encoding is performed, but the length of + * the encoded length is still computed and returned. + */ +size_t br_asn1_encode_length(void *dest, size_t len); + +/* + * Convenient macro for computing lengths of lengths. + */ +#define len_of_len(len) br_asn1_encode_length(NULL, len) + +/* + * Encode a (prepared) ASN.1 INTEGER. The encoded length is returned. + * If 'dest' is NULL, then no encoding is performed, but the length of + * the encoded integer is still computed and returned. + */ +size_t br_asn1_encode_uint(void *dest, br_asn1_uint pp); + +/* + * Get the OID that identifies an elliptic curve. Returned value is + * the DER-encoded OID, with the length (always one byte) but without + * the tag. Thus, the first byte of the returned buffer contains the + * number of subsequent bytes in the value. If the curve is not + * recognised, NULL is returned. + */ +const unsigned char *br_get_curve_OID(int curve); + +/* + * Inner function for EC private key encoding. This is equivalent to + * the API function br_encode_ec_raw_der(), except for an extra + * parameter: if 'include_curve_oid' is zero, then the curve OID is + * _not_ included in the output blob (this is for PKCS#8 support). + */ +size_t br_encode_ec_raw_der_inner(void *dest, + const br_ec_private_key *sk, const br_ec_public_key *pk, + int include_curve_oid); + +/* ==================================================================== */ +/* + * SSL/TLS support functions. + */ + +/* + * Record types. + */ +#define BR_SSL_CHANGE_CIPHER_SPEC 20 +#define BR_SSL_ALERT 21 +#define BR_SSL_HANDSHAKE 22 +#define BR_SSL_APPLICATION_DATA 23 + +/* + * Handshake message types. + */ +#define BR_SSL_HELLO_REQUEST 0 +#define BR_SSL_CLIENT_HELLO 1 +#define BR_SSL_SERVER_HELLO 2 +#define BR_SSL_CERTIFICATE 11 +#define BR_SSL_SERVER_KEY_EXCHANGE 12 +#define BR_SSL_CERTIFICATE_REQUEST 13 +#define BR_SSL_SERVER_HELLO_DONE 14 +#define BR_SSL_CERTIFICATE_VERIFY 15 +#define BR_SSL_CLIENT_KEY_EXCHANGE 16 +#define BR_SSL_FINISHED 20 + +/* + * Alert levels. + */ +#define BR_LEVEL_WARNING 1 +#define BR_LEVEL_FATAL 2 + +/* + * Low-level I/O state. + */ +#define BR_IO_FAILED 0 +#define BR_IO_IN 1 +#define BR_IO_OUT 2 +#define BR_IO_INOUT 3 + +/* + * Mark a SSL engine as failed. The provided error code is recorded if + * the engine was not already marked as failed. If 'err' is 0, then the + * engine is marked as closed (without error). + */ +void br_ssl_engine_fail(br_ssl_engine_context *cc, int err); + +/* + * Test whether the engine is closed (normally or as a failure). + */ +static inline int +br_ssl_engine_closed(const br_ssl_engine_context *cc) +{ + return cc->iomode == BR_IO_FAILED; +} + +/* + * Configure a new maximum fragment length. If possible, the maximum + * length for outgoing records is immediately adjusted (if there are + * not already too many buffered bytes for that). + */ +void br_ssl_engine_new_max_frag_len( + br_ssl_engine_context *rc, unsigned max_frag_len); + +/* + * Test whether the current incoming record has been fully received + * or not. This functions returns 0 only if a complete record header + * has been received, but some of the (possibly encrypted) payload + * has not yet been obtained. + */ +int br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc); + +/* + * Flush the current record (if not empty). This is meant to be called + * from the handshake processor only. + */ +void br_ssl_engine_flush_record(br_ssl_engine_context *cc); + +/* + * Test whether there is some accumulated payload to send. + */ +static inline int +br_ssl_engine_has_pld_to_send(const br_ssl_engine_context *rc) +{ + return rc->oxa != rc->oxb && rc->oxa != rc->oxc; +} + +/* + * Initialize RNG in engine. Returned value is 1 on success, 0 on error. + * This function will try to use the OS-provided RNG, if available. If + * there is no OS-provided RNG, or if it failed, and no entropy was + * injected by the caller, then a failure will be reported. On error, + * the context error code is set. + */ +int br_ssl_engine_init_rand(br_ssl_engine_context *cc); + +/* + * Reset the handshake-related parts of the engine. + */ +void br_ssl_engine_hs_reset(br_ssl_engine_context *cc, + void (*hsinit)(void *), void (*hsrun)(void *)); + +/* + * Get the PRF to use for this context, for the provided PRF hash + * function ID. + */ +br_tls_prf_impl br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id); + +/* + * Consume the provided pre-master secret and compute the corresponding + * master secret. The 'prf_id' is the ID of the hash function to use + * with the TLS 1.2 PRF (ignored if the version is TLS 1.0 or 1.1). + */ +void br_ssl_engine_compute_master(br_ssl_engine_context *cc, + int prf_id, const void *pms, size_t len); + +/* + * Switch to CBC decryption for incoming records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF (ignored if not TLS 1.2+) + * mac_id id of hash function for HMAC + * bc_impl block cipher implementation (CBC decryption) + * cipher_key_len block cipher key length (in bytes) + */ +void br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc, + int is_client, int prf_id, int mac_id, + const br_block_cbcdec_class *bc_impl, size_t cipher_key_len); + +/* + * Switch to CBC encryption for outgoing records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF (ignored if not TLS 1.2+) + * mac_id id of hash function for HMAC + * bc_impl block cipher implementation (CBC encryption) + * cipher_key_len block cipher key length (in bytes) + */ +void br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc, + int is_client, int prf_id, int mac_id, + const br_block_cbcenc_class *bc_impl, size_t cipher_key_len); + +/* + * Switch to GCM decryption for incoming records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF + * bc_impl block cipher implementation (CTR) + * cipher_key_len block cipher key length (in bytes) + */ +void br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctr_class *bc_impl, size_t cipher_key_len); + +/* + * Switch to GCM encryption for outgoing records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF + * bc_impl block cipher implementation (CTR) + * cipher_key_len block cipher key length (in bytes) + */ +void br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctr_class *bc_impl, size_t cipher_key_len); + +/* + * Switch to ChaCha20+Poly1305 decryption for incoming records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF + */ +void br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc, + int is_client, int prf_id); + +/* + * Switch to ChaCha20+Poly1305 encryption for outgoing records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF + */ +void br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc, + int is_client, int prf_id); + +/* + * Switch to CCM decryption for incoming records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF + * bc_impl block cipher implementation (CTR+CBC) + * cipher_key_len block cipher key length (in bytes) + * tag_len tag length (in bytes) + */ +void br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctrcbc_class *bc_impl, + size_t cipher_key_len, size_t tag_len); + +/* + * Switch to GCM encryption for outgoing records. + * cc the engine context + * is_client non-zero for a client, zero for a server + * prf_id id of hash function for PRF + * bc_impl block cipher implementation (CTR+CBC) + * cipher_key_len block cipher key length (in bytes) + * tag_len tag length (in bytes) + */ +void br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctrcbc_class *bc_impl, + size_t cipher_key_len, size_t tag_len); + +/* + * Calls to T0-generated code. + */ +void br_ssl_hs_client_init_main(void *ctx); +void br_ssl_hs_client_run(void *ctx); +void br_ssl_hs_server_init_main(void *ctx); +void br_ssl_hs_server_run(void *ctx); + +/* + * Get the hash function to use for signatures, given a bit mask of + * supported hash functions. This implements a strict choice order + * (namely SHA-256, SHA-384, SHA-512, SHA-224, SHA-1). If the mask + * does not document support of any of these hash functions, then this + * functions returns 0. + */ +int br_ssl_choose_hash(unsigned bf); + +/* ==================================================================== */ + +/* + * PowerPC / POWER assembly stuff. The special BR_POWER_ASM_MACROS macro + * must be defined before including this file; this is done by source + * files that use some inline assembly for PowerPC / POWER machines. + */ + +#if BR_POWER_ASM_MACROS + +#define lxvw4x(xt, ra, rb) lxvw4x_(xt, ra, rb) +#define stxvw4x(xt, ra, rb) stxvw4x_(xt, ra, rb) + +#define bdnz(foo) bdnz_(foo) +#define bdz(foo) bdz_(foo) +#define beq(foo) beq_(foo) + +#define li(rx, value) li_(rx, value) +#define addi(rx, ra, imm) addi_(rx, ra, imm) +#define cmpldi(rx, imm) cmpldi_(rx, imm) +#define mtctr(rx) mtctr_(rx) +#define vspltb(vrt, vrb, uim) vspltb_(vrt, vrb, uim) +#define vspltw(vrt, vrb, uim) vspltw_(vrt, vrb, uim) +#define vspltisb(vrt, imm) vspltisb_(vrt, imm) +#define vspltisw(vrt, imm) vspltisw_(vrt, imm) +#define vrlw(vrt, vra, vrb) vrlw_(vrt, vra, vrb) +#define vsbox(vrt, vra) vsbox_(vrt, vra) +#define vxor(vrt, vra, vrb) vxor_(vrt, vra, vrb) +#define vand(vrt, vra, vrb) vand_(vrt, vra, vrb) +#define vsro(vrt, vra, vrb) vsro_(vrt, vra, vrb) +#define vsl(vrt, vra, vrb) vsl_(vrt, vra, vrb) +#define vsldoi(vt, va, vb, sh) vsldoi_(vt, va, vb, sh) +#define vsr(vrt, vra, vrb) vsr_(vrt, vra, vrb) +#define vaddcuw(vrt, vra, vrb) vaddcuw_(vrt, vra, vrb) +#define vadduwm(vrt, vra, vrb) vadduwm_(vrt, vra, vrb) +#define vsububm(vrt, vra, vrb) vsububm_(vrt, vra, vrb) +#define vsubuwm(vrt, vra, vrb) vsubuwm_(vrt, vra, vrb) +#define vsrw(vrt, vra, vrb) vsrw_(vrt, vra, vrb) +#define vcipher(vt, va, vb) vcipher_(vt, va, vb) +#define vcipherlast(vt, va, vb) vcipherlast_(vt, va, vb) +#define vncipher(vt, va, vb) vncipher_(vt, va, vb) +#define vncipherlast(vt, va, vb) vncipherlast_(vt, va, vb) +#define vperm(vt, va, vb, vc) vperm_(vt, va, vb, vc) +#define vpmsumd(vt, va, vb) vpmsumd_(vt, va, vb) +#define xxpermdi(vt, va, vb, d) xxpermdi_(vt, va, vb, d) + +#define lxvw4x_(xt, ra, rb) "\tlxvw4x\t" #xt "," #ra "," #rb "\n" +#define stxvw4x_(xt, ra, rb) "\tstxvw4x\t" #xt "," #ra "," #rb "\n" + +#define label(foo) #foo "%=:\n" +#define bdnz_(foo) "\tbdnz\t" #foo "%=\n" +#define bdz_(foo) "\tbdz\t" #foo "%=\n" +#define beq_(foo) "\tbeq\t" #foo "%=\n" + +#define li_(rx, value) "\tli\t" #rx "," #value "\n" +#define addi_(rx, ra, imm) "\taddi\t" #rx "," #ra "," #imm "\n" +#define cmpldi_(rx, imm) "\tcmpldi\t" #rx "," #imm "\n" +#define mtctr_(rx) "\tmtctr\t" #rx "\n" +#define vspltb_(vrt, vrb, uim) "\tvspltb\t" #vrt "," #vrb "," #uim "\n" +#define vspltw_(vrt, vrb, uim) "\tvspltw\t" #vrt "," #vrb "," #uim "\n" +#define vspltisb_(vrt, imm) "\tvspltisb\t" #vrt "," #imm "\n" +#define vspltisw_(vrt, imm) "\tvspltisw\t" #vrt "," #imm "\n" +#define vrlw_(vrt, vra, vrb) "\tvrlw\t" #vrt "," #vra "," #vrb "\n" +#define vsbox_(vrt, vra) "\tvsbox\t" #vrt "," #vra "\n" +#define vxor_(vrt, vra, vrb) "\tvxor\t" #vrt "," #vra "," #vrb "\n" +#define vand_(vrt, vra, vrb) "\tvand\t" #vrt "," #vra "," #vrb "\n" +#define vsro_(vrt, vra, vrb) "\tvsro\t" #vrt "," #vra "," #vrb "\n" +#define vsl_(vrt, vra, vrb) "\tvsl\t" #vrt "," #vra "," #vrb "\n" +#define vsldoi_(vt, va, vb, sh) "\tvsldoi\t" #vt "," #va "," #vb "," #sh "\n" +#define vsr_(vrt, vra, vrb) "\tvsr\t" #vrt "," #vra "," #vrb "\n" +#define vaddcuw_(vrt, vra, vrb) "\tvaddcuw\t" #vrt "," #vra "," #vrb "\n" +#define vadduwm_(vrt, vra, vrb) "\tvadduwm\t" #vrt "," #vra "," #vrb "\n" +#define vsububm_(vrt, vra, vrb) "\tvsububm\t" #vrt "," #vra "," #vrb "\n" +#define vsubuwm_(vrt, vra, vrb) "\tvsubuwm\t" #vrt "," #vra "," #vrb "\n" +#define vsrw_(vrt, vra, vrb) "\tvsrw\t" #vrt "," #vra "," #vrb "\n" +#define vcipher_(vt, va, vb) "\tvcipher\t" #vt "," #va "," #vb "\n" +#define vcipherlast_(vt, va, vb) "\tvcipherlast\t" #vt "," #va "," #vb "\n" +#define vncipher_(vt, va, vb) "\tvncipher\t" #vt "," #va "," #vb "\n" +#define vncipherlast_(vt, va, vb) "\tvncipherlast\t" #vt "," #va "," #vb "\n" +#define vperm_(vt, va, vb, vc) "\tvperm\t" #vt "," #va "," #vb "," #vc "\n" +#define vpmsumd_(vt, va, vb) "\tvpmsumd\t" #vt "," #va "," #vb "\n" +#define xxpermdi_(vt, va, vb, d) "\txxpermdi\t" #vt "," #va "," #vb "," #d "\n" + +#endif + +/* ==================================================================== */ +/* + * Special "activate intrinsics" code, needed for some compiler versions. + * This is defined at the end of this file, so that it won't impact any + * of the inline functions defined previously; and it is controlled by + * a specific macro defined in the caller code. + * + * Calling code conventions: + * + * - Caller must define BR_ENABLE_INTRINSICS before including "inner.h". + * - Functions that use intrinsics must be enclosed in an "enabled" + * region (between BR_TARGETS_X86_UP and BR_TARGETS_X86_DOWN). + * - Functions that use intrinsics must be tagged with the appropriate + * BR_TARGET(). + */ + +#if BR_ENABLE_INTRINSICS && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005) + +/* + * x86 intrinsics (both 32-bit and 64-bit). + */ +#if BR_i386 || BR_amd64 + +/* + * On GCC before version 5.0, we need to use the pragma to enable the + * target options globally, because the 'target' function attribute + * appears to be unreliable. Before 4.6 we must also avoid the + * push_options / pop_options mechanism, because it tends to trigger + * some internal compiler errors. + */ +#if BR_GCC && !BR_GCC_5_0 +#if BR_GCC_4_6 +#define BR_TARGETS_X86_UP \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul,rdrnd\")") +#define BR_TARGETS_X86_DOWN \ + _Pragma("GCC pop_options") +#else +#define BR_TARGETS_X86_UP \ + _Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul\")") +#define BR_TARGETS_X86_DOWN +#endif +#pragma GCC diagnostic ignored "-Wpsabi" +#endif + +#if BR_CLANG && !BR_CLANG_3_8 +#undef __SSE2__ +#undef __SSE3__ +#undef __SSSE3__ +#undef __SSE4_1__ +#undef __AES__ +#undef __PCLMUL__ +#undef __RDRND__ +#define __SSE2__ 1 +#define __SSE3__ 1 +#define __SSSE3__ 1 +#define __SSE4_1__ 1 +#define __AES__ 1 +#define __PCLMUL__ 1 +#define __RDRND__ 1 +#endif + +#ifndef BR_TARGETS_X86_UP +#define BR_TARGETS_X86_UP +#endif +#ifndef BR_TARGETS_X86_DOWN +#define BR_TARGETS_X86_DOWN +#endif + +#if BR_GCC || BR_CLANG +BR_TARGETS_X86_UP +#include <x86intrin.h> +#include <cpuid.h> +#define br_bswap32 __builtin_bswap32 +BR_TARGETS_X86_DOWN +#endif + +#if BR_MSC +#include <stdlib.h> +#include <intrin.h> +#include <immintrin.h> +#define br_bswap32 _byteswap_ulong +#endif + +static inline int +br_cpuid(uint32_t mask_eax, uint32_t mask_ebx, + uint32_t mask_ecx, uint32_t mask_edx) +{ +#if BR_GCC || BR_CLANG + unsigned eax, ebx, ecx, edx; + + if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { + if ((eax & mask_eax) == mask_eax + && (ebx & mask_ebx) == mask_ebx + && (ecx & mask_ecx) == mask_ecx + && (edx & mask_edx) == mask_edx) + { + return 1; + } + } +#elif BR_MSC + int info[4]; + + __cpuid(info, 1); + if (((uint32_t)info[0] & mask_eax) == mask_eax + && ((uint32_t)info[1] & mask_ebx) == mask_ebx + && ((uint32_t)info[2] & mask_ecx) == mask_ecx + && ((uint32_t)info[3] & mask_edx) == mask_edx) + { + return 1; + } +#endif + return 0; +} + +#endif + +#endif + +/* ==================================================================== */ + +#endif diff --git a/third_party/bearssl/src/md5.c b/third_party/bearssl/src/md5.c new file mode 100644 index 0000000..0df7abe --- /dev/null +++ b/third_party/bearssl/src/md5.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define F(B, C, D) ((((C) ^ (D)) & (B)) ^ (D)) +#define G(B, C, D) ((((C) ^ (B)) & (D)) ^ (C)) +#define H(B, C, D) ((B) ^ (C) ^ (D)) +#define I(B, C, D) ((C) ^ ((B) | ~(D))) + +#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +/* see inner.h */ +const uint32_t br_md5_IV[4] = { + 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 +}; + +static const uint32_t K[64] = { + 0xD76AA478, 0xE8C7B756, 0x242070DB, 0xC1BDCEEE, + 0xF57C0FAF, 0x4787C62A, 0xA8304613, 0xFD469501, + 0x698098D8, 0x8B44F7AF, 0xFFFF5BB1, 0x895CD7BE, + 0x6B901122, 0xFD987193, 0xA679438E, 0x49B40821, + + 0xF61E2562, 0xC040B340, 0x265E5A51, 0xE9B6C7AA, + 0xD62F105D, 0x02441453, 0xD8A1E681, 0xE7D3FBC8, + 0x21E1CDE6, 0xC33707D6, 0xF4D50D87, 0x455A14ED, + 0xA9E3E905, 0xFCEFA3F8, 0x676F02D9, 0x8D2A4C8A, + + 0xFFFA3942, 0x8771F681, 0x6D9D6122, 0xFDE5380C, + 0xA4BEEA44, 0x4BDECFA9, 0xF6BB4B60, 0xBEBFBC70, + 0x289B7EC6, 0xEAA127FA, 0xD4EF3085, 0x04881D05, + 0xD9D4D039, 0xE6DB99E5, 0x1FA27CF8, 0xC4AC5665, + + 0xF4292244, 0x432AFF97, 0xAB9423A7, 0xFC93A039, + 0x655B59C3, 0x8F0CCC92, 0xFFEFF47D, 0x85845DD1, + 0x6FA87E4F, 0xFE2CE6E0, 0xA3014314, 0x4E0811A1, + 0xF7537E82, 0xBD3AF235, 0x2AD7D2BB, 0xEB86D391 +}; + +static const unsigned char MP[48] = { + 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, + 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, + 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 +}; + +/* see inner.h */ +void +br_md5_round(const unsigned char *buf, uint32_t *val) +{ + uint32_t m[16]; + uint32_t a, b, c, d; + int i; + + a = val[0]; + b = val[1]; + c = val[2]; + d = val[3]; + /* obsolete + for (i = 0; i < 16; i ++) { + m[i] = br_dec32le(buf + (i << 2)); + } + */ + br_range_dec32le(m, 16, buf); + + for (i = 0; i < 16; i += 4) { + a = b + ROTL(a + F(b, c, d) + m[i + 0] + K[i + 0], 7); + d = a + ROTL(d + F(a, b, c) + m[i + 1] + K[i + 1], 12); + c = d + ROTL(c + F(d, a, b) + m[i + 2] + K[i + 2], 17); + b = c + ROTL(b + F(c, d, a) + m[i + 3] + K[i + 3], 22); + } + for (i = 16; i < 32; i += 4) { + a = b + ROTL(a + G(b, c, d) + m[MP[i - 16]] + K[i + 0], 5); + d = a + ROTL(d + G(a, b, c) + m[MP[i - 15]] + K[i + 1], 9); + c = d + ROTL(c + G(d, a, b) + m[MP[i - 14]] + K[i + 2], 14); + b = c + ROTL(b + G(c, d, a) + m[MP[i - 13]] + K[i + 3], 20); + } + for (i = 32; i < 48; i += 4) { + a = b + ROTL(a + H(b, c, d) + m[MP[i - 16]] + K[i + 0], 4); + d = a + ROTL(d + H(a, b, c) + m[MP[i - 15]] + K[i + 1], 11); + c = d + ROTL(c + H(d, a, b) + m[MP[i - 14]] + K[i + 2], 16); + b = c + ROTL(b + H(c, d, a) + m[MP[i - 13]] + K[i + 3], 23); + } + for (i = 48; i < 64; i += 4) { + a = b + ROTL(a + I(b, c, d) + m[MP[i - 16]] + K[i + 0], 6); + d = a + ROTL(d + I(a, b, c) + m[MP[i - 15]] + K[i + 1], 10); + c = d + ROTL(c + I(d, a, b) + m[MP[i - 14]] + K[i + 2], 15); + b = c + ROTL(b + I(c, d, a) + m[MP[i - 13]] + K[i + 3], 21); + } + + val[0] += a; + val[1] += b; + val[2] += c; + val[3] += d; +} + +/* see bearssl.h */ +void +br_md5_init(br_md5_context *cc) +{ + cc->vtable = &br_md5_vtable; + memcpy(cc->val, br_md5_IV, sizeof cc->val); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_md5_update(br_md5_context *cc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t ptr; + + buf = data; + ptr = (size_t)cc->count & 63; + while (len > 0) { + size_t clen; + + clen = 64 - ptr; + if (clen > len) { + clen = len; + } + memcpy(cc->buf + ptr, buf, clen); + ptr += clen; + buf += clen; + len -= clen; + cc->count += (uint64_t)clen; + if (ptr == 64) { + br_md5_round(cc->buf, cc->val); + ptr = 0; + } + } +} + +/* see bearssl.h */ +void +br_md5_out(const br_md5_context *cc, void *dst) +{ + unsigned char buf[64]; + uint32_t val[4]; + size_t ptr; + + ptr = (size_t)cc->count & 63; + memcpy(buf, cc->buf, ptr); + memcpy(val, cc->val, sizeof val); + buf[ptr ++] = 0x80; + if (ptr > 56) { + memset(buf + ptr, 0, 64 - ptr); + br_md5_round(buf, val); + memset(buf, 0, 56); + } else { + memset(buf + ptr, 0, 56 - ptr); + } + br_enc64le(buf + 56, cc->count << 3); + br_md5_round(buf, val); + br_range_enc32le(dst, val, 4); +} + +/* see bearssl.h */ +uint64_t +br_md5_state(const br_md5_context *cc, void *dst) +{ + br_range_enc32le(dst, cc->val, 4); + return cc->count; +} + +/* see bearssl.h */ +void +br_md5_set_state(br_md5_context *cc, const void *stb, uint64_t count) +{ + br_range_dec32le(cc->val, 4, stb); + cc->count = count; +} + +/* see bearssl.h */ +const br_hash_class br_md5_vtable = { + sizeof(br_md5_context), + BR_HASHDESC_ID(br_md5_ID) + | BR_HASHDESC_OUT(16) + | BR_HASHDESC_STATE(16) + | BR_HASHDESC_LBLEN(6) + | BR_HASHDESC_MD_PADDING, + (void (*)(const br_hash_class **))&br_md5_init, + (void (*)(const br_hash_class **, const void *, size_t))&br_md5_update, + (void (*)(const br_hash_class *const *, void *))&br_md5_out, + (uint64_t (*)(const br_hash_class *const *, void *))&br_md5_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_md5_set_state +}; diff --git a/third_party/bearssl/src/md5sha1.c b/third_party/bearssl/src/md5sha1.c new file mode 100644 index 0000000..f701aee --- /dev/null +++ b/third_party/bearssl/src/md5sha1.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl.h */ +void +br_md5sha1_init(br_md5sha1_context *cc) +{ + cc->vtable = &br_md5sha1_vtable; + memcpy(cc->val_md5, br_md5_IV, sizeof cc->val_md5); + memcpy(cc->val_sha1, br_sha1_IV, sizeof cc->val_sha1); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_md5sha1_update(br_md5sha1_context *cc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t ptr; + + buf = data; + ptr = (size_t)cc->count & 63; + while (len > 0) { + size_t clen; + + clen = 64 - ptr; + if (clen > len) { + clen = len; + } + memcpy(cc->buf + ptr, buf, clen); + ptr += clen; + buf += clen; + len -= clen; + cc->count += (uint64_t)clen; + if (ptr == 64) { + br_md5_round(cc->buf, cc->val_md5); + br_sha1_round(cc->buf, cc->val_sha1); + ptr = 0; + } + } +} + +/* see bearssl.h */ +void +br_md5sha1_out(const br_md5sha1_context *cc, void *dst) +{ + unsigned char buf[64]; + uint32_t val_md5[4]; + uint32_t val_sha1[5]; + size_t ptr; + unsigned char *out; + uint64_t count; + + count = cc->count; + ptr = (size_t)count & 63; + memcpy(buf, cc->buf, ptr); + memcpy(val_md5, cc->val_md5, sizeof val_md5); + memcpy(val_sha1, cc->val_sha1, sizeof val_sha1); + buf[ptr ++] = 0x80; + if (ptr > 56) { + memset(buf + ptr, 0, 64 - ptr); + br_md5_round(buf, val_md5); + br_sha1_round(buf, val_sha1); + memset(buf, 0, 56); + } else { + memset(buf + ptr, 0, 56 - ptr); + } + count <<= 3; + br_enc64le(buf + 56, count); + br_md5_round(buf, val_md5); + br_enc64be(buf + 56, count); + br_sha1_round(buf, val_sha1); + out = dst; + br_range_enc32le(out, val_md5, 4); + br_range_enc32be(out + 16, val_sha1, 5); +} + +/* see bearssl.h */ +uint64_t +br_md5sha1_state(const br_md5sha1_context *cc, void *dst) +{ + unsigned char *out; + + out = dst; + br_range_enc32le(out, cc->val_md5, 4); + br_range_enc32be(out + 16, cc->val_sha1, 5); + return cc->count; +} + +/* see bearssl.h */ +void +br_md5sha1_set_state(br_md5sha1_context *cc, const void *stb, uint64_t count) +{ + const unsigned char *buf; + + buf = stb; + br_range_dec32le(cc->val_md5, 4, buf); + br_range_dec32be(cc->val_sha1, 5, buf + 16); + cc->count = count; +} + +/* see bearssl.h */ +const br_hash_class br_md5sha1_vtable = { + sizeof(br_md5sha1_context), + BR_HASHDESC_ID(br_md5sha1_ID) + | BR_HASHDESC_OUT(36) + | BR_HASHDESC_STATE(36) + | BR_HASHDESC_LBLEN(6), + (void (*)(const br_hash_class **))&br_md5sha1_init, + (void (*)(const br_hash_class **, const void *, size_t)) + &br_md5sha1_update, + (void (*)(const br_hash_class *const *, void *)) + &br_md5sha1_out, + (uint64_t (*)(const br_hash_class *const *, void *)) + &br_md5sha1_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_md5sha1_set_state +}; diff --git a/third_party/bearssl/src/mgf1.c b/third_party/bearssl/src/mgf1.c new file mode 100644 index 0000000..7a23588 --- /dev/null +++ b/third_party/bearssl/src/mgf1.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_mgf1_xor(void *data, size_t len, + const br_hash_class *dig, const void *seed, size_t seed_len) +{ + unsigned char *buf; + size_t u, hlen; + uint32_t c; + + buf = data; + hlen = br_digest_size(dig); + for (u = 0, c = 0; u < len; u += hlen, c ++) { + br_hash_compat_context hc; + unsigned char tmp[64]; + size_t v; + + hc.vtable = dig; + dig->init(&hc.vtable); + dig->update(&hc.vtable, seed, seed_len); + br_enc32be(tmp, c); + dig->update(&hc.vtable, tmp, 4); + dig->out(&hc.vtable, tmp); + for (v = 0; v < hlen; v ++) { + if ((u + v) >= len) { + break; + } + buf[u + v] ^= tmp[v]; + } + } +} diff --git a/third_party/bearssl/src/multihash.c b/third_party/bearssl/src/multihash.c new file mode 100644 index 0000000..b6df2e0 --- /dev/null +++ b/third_party/bearssl/src/multihash.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * An aggregate context that is large enough for all supported hash + * functions. + */ +typedef union { + const br_hash_class *vtable; + br_md5_context md5; + br_sha1_context sha1; + br_sha224_context sha224; + br_sha256_context sha256; + br_sha384_context sha384; + br_sha512_context sha512; +} gen_hash_context; + +/* + * Get the offset to the state for a specific hash function within the + * context structure. This shall be called only for the supported hash + * functions, + */ +static size_t +get_state_offset(int id) +{ + if (id >= 5) { + /* + * SHA-384 has id 5, and SHA-512 has id 6. Both use + * eight 64-bit words for their state. + */ + return offsetof(br_multihash_context, val_64) + + ((size_t)(id - 5) * (8 * sizeof(uint64_t))); + } else { + /* + * MD5 has id 1, SHA-1 has id 2, SHA-224 has id 3 and + * SHA-256 has id 4. They use 32-bit words for their + * states (4 words for MD5, 5 for SHA-1, 8 for SHA-224 + * and 8 for SHA-256). + */ + unsigned x; + + x = id - 1; + x = ((x + (x & (x >> 1))) << 2) + (x >> 1); + return offsetof(br_multihash_context, val_32) + + x * sizeof(uint32_t); + } +} + +/* see bearssl_hash.h */ +void +br_multihash_zero(br_multihash_context *ctx) +{ + /* + * This is not standard, but yields very short and efficient code, + * and it works "everywhere". + */ + memset(ctx, 0, sizeof *ctx); +} + +/* see bearssl_hash.h */ +void +br_multihash_init(br_multihash_context *ctx) +{ + int i; + + ctx->count = 0; + for (i = 1; i <= 6; i ++) { + const br_hash_class *hc; + + hc = ctx->impl[i - 1]; + if (hc != NULL) { + gen_hash_context g; + + hc->init(&g.vtable); + hc->state(&g.vtable, + (unsigned char *)ctx + get_state_offset(i)); + } + } +} + +/* see bearssl_hash.h */ +void +br_multihash_update(br_multihash_context *ctx, const void *data, size_t len) +{ + const unsigned char *buf; + size_t ptr; + + buf = data; + ptr = (size_t)ctx->count & 127; + while (len > 0) { + size_t clen; + + clen = 128 - ptr; + if (clen > len) { + clen = len; + } + memcpy(ctx->buf + ptr, buf, clen); + ptr += clen; + buf += clen; + len -= clen; + ctx->count += (uint64_t)clen; + if (ptr == 128) { + int i; + + for (i = 1; i <= 6; i ++) { + const br_hash_class *hc; + + hc = ctx->impl[i - 1]; + if (hc != NULL) { + gen_hash_context g; + unsigned char *state; + + state = (unsigned char *)ctx + + get_state_offset(i); + hc->set_state(&g.vtable, + state, ctx->count - 128); + hc->update(&g.vtable, ctx->buf, 128); + hc->state(&g.vtable, state); + } + } + ptr = 0; + } + } +} + +/* see bearssl_hash.h */ +size_t +br_multihash_out(const br_multihash_context *ctx, int id, void *dst) +{ + const br_hash_class *hc; + gen_hash_context g; + const unsigned char *state; + + hc = ctx->impl[id - 1]; + if (hc == NULL) { + return 0; + } + state = (const unsigned char *)ctx + get_state_offset(id); + hc->set_state(&g.vtable, state, ctx->count & ~(uint64_t)127); + hc->update(&g.vtable, ctx->buf, ctx->count & (uint64_t)127); + hc->out(&g.vtable, dst); + return (hc->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK; +} diff --git a/third_party/bearssl/src/poly1305_ctmul.c b/third_party/bearssl/src/poly1305_ctmul.c new file mode 100644 index 0000000..150e610 --- /dev/null +++ b/third_party/bearssl/src/poly1305_ctmul.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Perform the inner processing of blocks for Poly1305. The accumulator + * and the r key are provided as arrays of 26-bit words (these words + * are allowed to have an extra bit, i.e. use 27 bits). + * + * On output, all accumulator words fit on 26 bits, except acc[1], which + * may be slightly larger (but by a very small amount only). + */ +static void +poly1305_inner(uint32_t *acc, const uint32_t *r, const void *data, size_t len) +{ + /* + * Implementation notes: we split the 130-bit values into five + * 26-bit words. This gives us some space for carries. + * + * This code is inspired from the public-domain code available + * on: + * https://github.com/floodyberry/poly1305-donna + * + * Since we compute modulo 2^130-5, the "upper words" become + * low words with a factor of 5; that is, x*2^130 = x*5 mod p. + */ + const unsigned char *buf; + uint32_t a0, a1, a2, a3, a4; + uint32_t r0, r1, r2, r3, r4; + uint32_t u1, u2, u3, u4; + + r0 = r[0]; + r1 = r[1]; + r2 = r[2]; + r3 = r[3]; + r4 = r[4]; + + u1 = r1 * 5; + u2 = r2 * 5; + u3 = r3 * 5; + u4 = r4 * 5; + + a0 = acc[0]; + a1 = acc[1]; + a2 = acc[2]; + a3 = acc[3]; + a4 = acc[4]; + + buf = data; + while (len > 0) { + uint64_t w0, w1, w2, w3, w4; + uint64_t c; + unsigned char tmp[16]; + + /* + * If there is a partial block, right-pad it with zeros. + */ + if (len < 16) { + memset(tmp, 0, sizeof tmp); + memcpy(tmp, buf, len); + buf = tmp; + len = 16; + } + + /* + * Decode next block and apply the "high bit"; that value + * is added to the accumulator. + */ + a0 += br_dec32le(buf) & 0x03FFFFFF; + a1 += (br_dec32le(buf + 3) >> 2) & 0x03FFFFFF; + a2 += (br_dec32le(buf + 6) >> 4) & 0x03FFFFFF; + a3 += (br_dec32le(buf + 9) >> 6) & 0x03FFFFFF; + a4 += (br_dec32le(buf + 12) >> 8) | 0x01000000; + + /* + * Compute multiplication. + */ +#define M(x, y) ((uint64_t)(x) * (uint64_t)(y)) + + w0 = M(a0, r0) + M(a1, u4) + M(a2, u3) + M(a3, u2) + M(a4, u1); + w1 = M(a0, r1) + M(a1, r0) + M(a2, u4) + M(a3, u3) + M(a4, u2); + w2 = M(a0, r2) + M(a1, r1) + M(a2, r0) + M(a3, u4) + M(a4, u3); + w3 = M(a0, r3) + M(a1, r2) + M(a2, r1) + M(a3, r0) + M(a4, u4); + w4 = M(a0, r4) + M(a1, r3) + M(a2, r2) + M(a3, r1) + M(a4, r0); + +#undef M + /* + * Perform some (partial) modular reduction. This step is + * enough to keep values in ranges such that there won't + * be carry overflows. Most of the reduction was done in + * the multiplication step (by using the 'u*' values, and + * using the fact that 2^130 = -5 mod p); here we perform + * some carry propagation. + */ + c = w0 >> 26; + a0 = (uint32_t)w0 & 0x3FFFFFF; + w1 += c; + c = w1 >> 26; + a1 = (uint32_t)w1 & 0x3FFFFFF; + w2 += c; + c = w2 >> 26; + a2 = (uint32_t)w2 & 0x3FFFFFF; + w3 += c; + c = w3 >> 26; + a3 = (uint32_t)w3 & 0x3FFFFFF; + w4 += c; + c = w4 >> 26; + a4 = (uint32_t)w4 & 0x3FFFFFF; + a0 += (uint32_t)c * 5; + a1 += a0 >> 26; + a0 &= 0x3FFFFFF; + + buf += 16; + len -= 16; + } + + acc[0] = a0; + acc[1] = a1; + acc[2] = a2; + acc[3] = a3; + acc[4] = a4; +} + +/* see bearssl_block.h */ +void +br_poly1305_ctmul_run(const void *key, const void *iv, + void *data, size_t len, const void *aad, size_t aad_len, + void *tag, br_chacha20_run ichacha, int encrypt) +{ + unsigned char pkey[32], foot[16]; + uint32_t r[5], acc[5], cc, ctl, hi; + uint64_t w; + int i; + + /* + * Compute the MAC key. The 'r' value is the first 16 bytes of + * pkey[]. + */ + memset(pkey, 0, sizeof pkey); + ichacha(key, iv, 0, pkey, sizeof pkey); + + /* + * If encrypting, ChaCha20 must run first, followed by Poly1305. + * When decrypting, the operations are reversed. + */ + if (encrypt) { + ichacha(key, iv, 1, data, len); + } + + /* + * Run Poly1305. We must process the AAD, then ciphertext, then + * the footer (with the lengths). Note that the AAD and ciphertext + * are meant to be padded with zeros up to the next multiple of 16, + * and the length of the footer is 16 bytes as well. + */ + + /* + * Decode the 'r' value into 26-bit words, with the "clamping" + * operation applied. + */ + r[0] = br_dec32le(pkey) & 0x03FFFFFF; + r[1] = (br_dec32le(pkey + 3) >> 2) & 0x03FFFF03; + r[2] = (br_dec32le(pkey + 6) >> 4) & 0x03FFC0FF; + r[3] = (br_dec32le(pkey + 9) >> 6) & 0x03F03FFF; + r[4] = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF; + + /* + * Accumulator is 0. + */ + memset(acc, 0, sizeof acc); + + /* + * Process the additional authenticated data, ciphertext, and + * footer in due order. + */ + br_enc64le(foot, (uint64_t)aad_len); + br_enc64le(foot + 8, (uint64_t)len); + poly1305_inner(acc, r, aad, aad_len); + poly1305_inner(acc, r, data, len); + poly1305_inner(acc, r, foot, sizeof foot); + + /* + * Finalise modular reduction. This is done with carry propagation + * and applying the '2^130 = -5 mod p' rule. Note that the output + * of poly1035_inner() is already mostly reduced, since only + * acc[1] may be (very slightly) above 2^26. A single loop back + * to acc[1] will be enough to make the value fit in 130 bits. + */ + cc = 0; + for (i = 1; i <= 6; i ++) { + int j; + + j = (i >= 5) ? i - 5 : i; + acc[j] += cc; + cc = acc[j] >> 26; + acc[j] &= 0x03FFFFFF; + } + + /* + * We may still have a value in the 2^130-5..2^130-1 range, in + * which case we must reduce it again. The code below selects, + * in constant-time, between 'acc' and 'acc-p', + */ + ctl = GT(acc[0], 0x03FFFFFA); + for (i = 1; i < 5; i ++) { + ctl &= EQ(acc[i], 0x03FFFFFF); + } + cc = 5; + for (i = 0; i < 5; i ++) { + uint32_t t; + + t = (acc[i] + cc); + cc = t >> 26; + t &= 0x03FFFFFF; + acc[i] = MUX(ctl, t, acc[i]); + } + + /* + * Convert back the accumulator to 32-bit words, and add the + * 's' value (second half of pkey[]). That addition is done + * modulo 2^128. + */ + w = (uint64_t)acc[0] + ((uint64_t)acc[1] << 26) + br_dec32le(pkey + 16); + br_enc32le((unsigned char *)tag, (uint32_t)w); + w = (w >> 32) + ((uint64_t)acc[2] << 20) + br_dec32le(pkey + 20); + br_enc32le((unsigned char *)tag + 4, (uint32_t)w); + w = (w >> 32) + ((uint64_t)acc[3] << 14) + br_dec32le(pkey + 24); + br_enc32le((unsigned char *)tag + 8, (uint32_t)w); + hi = (uint32_t)(w >> 32) + (acc[4] << 8) + br_dec32le(pkey + 28); + br_enc32le((unsigned char *)tag + 12, hi); + + /* + * If decrypting, then ChaCha20 runs _after_ Poly1305. + */ + if (!encrypt) { + ichacha(key, iv, 1, data, len); + } +} diff --git a/third_party/bearssl/src/poly1305_ctmul32.c b/third_party/bearssl/src/poly1305_ctmul32.c new file mode 100644 index 0000000..15d9635 --- /dev/null +++ b/third_party/bearssl/src/poly1305_ctmul32.c @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Perform the inner processing of blocks for Poly1305. + */ +static void +poly1305_inner(uint32_t *a, const uint32_t *r, const void *data, size_t len) +{ + /* + * Implementation notes: we split the 130-bit values into ten + * 13-bit words. This gives us some space for carries and allows + * using only 32x32->32 multiplications, which are way faster than + * 32x32->64 multiplications on the ARM Cortex-M0/M0+, and also + * help in making constant-time code on the Cortex-M3. + * + * Since we compute modulo 2^130-5, the "upper words" become + * low words with a factor of 5; that is, x*2^130 = x*5 mod p. + * This has already been integrated in the r[] array, which + * is extended to the 0..18 range. + * + * In each loop iteration, a[] and r[] words are 13-bit each, + * except a[1] which may use 14 bits. + */ + const unsigned char *buf; + + buf = data; + while (len > 0) { + unsigned char tmp[16]; + uint32_t b[10]; + unsigned u, v; + uint32_t z, cc1, cc2; + + /* + * If there is a partial block, right-pad it with zeros. + */ + if (len < 16) { + memset(tmp, 0, sizeof tmp); + memcpy(tmp, buf, len); + buf = tmp; + len = 16; + } + + /* + * Decode next block and apply the "high bit"; that value + * is added to the accumulator. + */ + v = br_dec16le(buf); + a[0] += v & 0x01FFF; + v >>= 13; + v |= buf[2] << 3; + v |= buf[3] << 11; + a[1] += v & 0x01FFF; + v >>= 13; + v |= buf[4] << 6; + a[2] += v & 0x01FFF; + v >>= 13; + v |= buf[5] << 1; + v |= buf[6] << 9; + a[3] += v & 0x01FFF; + v >>= 13; + v |= buf[7] << 4; + v |= buf[8] << 12; + a[4] += v & 0x01FFF; + v >>= 13; + v |= buf[9] << 7; + a[5] += v & 0x01FFF; + v >>= 13; + v |= buf[10] << 2; + v |= buf[11] << 10; + a[6] += v & 0x01FFF; + v >>= 13; + v |= buf[12] << 5; + a[7] += v & 0x01FFF; + v = br_dec16le(buf + 13); + a[8] += v & 0x01FFF; + v >>= 13; + v |= buf[15] << 3; + a[9] += v | 0x00800; + + /* + * At that point, all a[] values fit on 14 bits, while + * all r[] values fit on 13 bits. Thus products fit on + * 27 bits, and we can accumulate up to 31 of them in + * a 32-bit word and still have some room for carries. + */ + + /* + * Now a[] contains words with values up to 14 bits each. + * We perform the multiplication with r[]. + * + * The extended words of r[] may be larger than 13 bits + * (they are 5 times a 13-bit word) so the full summation + * may yield values up to 46 times a 27-bit word, which + * does not fit on a 32-bit word. To avoid that issue, we + * must split the loop below in two, with a carry + * propagation operation in the middle. + */ + cc1 = 0; + for (u = 0; u < 10; u ++) { + uint32_t s; + + s = cc1 + + MUL15(a[0], r[u + 9 - 0]) + + MUL15(a[1], r[u + 9 - 1]) + + MUL15(a[2], r[u + 9 - 2]) + + MUL15(a[3], r[u + 9 - 3]) + + MUL15(a[4], r[u + 9 - 4]); + b[u] = s & 0x1FFF; + cc1 = s >> 13; + } + cc2 = 0; + for (u = 0; u < 10; u ++) { + uint32_t s; + + s = b[u] + cc2 + + MUL15(a[5], r[u + 9 - 5]) + + MUL15(a[6], r[u + 9 - 6]) + + MUL15(a[7], r[u + 9 - 7]) + + MUL15(a[8], r[u + 9 - 8]) + + MUL15(a[9], r[u + 9 - 9]); + b[u] = s & 0x1FFF; + cc2 = s >> 13; + } + memcpy(a, b, sizeof b); + + /* + * The two carries "loop back" with a factor of 5. We + * propagate them into a[0] and a[1]. + */ + z = cc1 + cc2; + z += (z << 2) + a[0]; + a[0] = z & 0x1FFF; + a[1] += z >> 13; + + buf += 16; + len -= 16; + } +} + +/* see bearssl_block.h */ +void +br_poly1305_ctmul32_run(const void *key, const void *iv, + void *data, size_t len, const void *aad, size_t aad_len, + void *tag, br_chacha20_run ichacha, int encrypt) +{ + unsigned char pkey[32], foot[16]; + uint32_t z, r[19], acc[10], cc, ctl; + int i; + + /* + * Compute the MAC key. The 'r' value is the first 16 bytes of + * pkey[]. + */ + memset(pkey, 0, sizeof pkey); + ichacha(key, iv, 0, pkey, sizeof pkey); + + /* + * If encrypting, ChaCha20 must run first, followed by Poly1305. + * When decrypting, the operations are reversed. + */ + if (encrypt) { + ichacha(key, iv, 1, data, len); + } + + /* + * Run Poly1305. We must process the AAD, then ciphertext, then + * the footer (with the lengths). Note that the AAD and ciphertext + * are meant to be padded with zeros up to the next multiple of 16, + * and the length of the footer is 16 bytes as well. + */ + + /* + * Decode the 'r' value into 13-bit words, with the "clamping" + * operation applied. + */ + z = br_dec32le(pkey) & 0x03FFFFFF; + r[9] = z & 0x1FFF; + r[10] = z >> 13; + z = (br_dec32le(pkey + 3) >> 2) & 0x03FFFF03; + r[11] = z & 0x1FFF; + r[12] = z >> 13; + z = (br_dec32le(pkey + 6) >> 4) & 0x03FFC0FF; + r[13] = z & 0x1FFF; + r[14] = z >> 13; + z = (br_dec32le(pkey + 9) >> 6) & 0x03F03FFF; + r[15] = z & 0x1FFF; + r[16] = z >> 13; + z = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF; + r[17] = z & 0x1FFF; + r[18] = z >> 13; + + /* + * Extend r[] with the 5x factor pre-applied. + */ + for (i = 0; i < 9; i ++) { + r[i] = MUL15(5, r[i + 10]); + } + + /* + * Accumulator is 0. + */ + memset(acc, 0, sizeof acc); + + /* + * Process the additional authenticated data, ciphertext, and + * footer in due order. + */ + br_enc64le(foot, (uint64_t)aad_len); + br_enc64le(foot + 8, (uint64_t)len); + poly1305_inner(acc, r, aad, aad_len); + poly1305_inner(acc, r, data, len); + poly1305_inner(acc, r, foot, sizeof foot); + + /* + * Finalise modular reduction. This is done with carry propagation + * and applying the '2^130 = -5 mod p' rule. Note that the output + * of poly1035_inner() is already mostly reduced, since only + * acc[1] may be (very slightly) above 2^13. A single loop back + * to acc[1] will be enough to make the value fit in 130 bits. + */ + cc = 0; + for (i = 1; i < 10; i ++) { + z = acc[i] + cc; + acc[i] = z & 0x1FFF; + cc = z >> 13; + } + z = acc[0] + cc + (cc << 2); + acc[0] = z & 0x1FFF; + acc[1] += z >> 13; + + /* + * We may still have a value in the 2^130-5..2^130-1 range, in + * which case we must reduce it again. The code below selects, + * in constant-time, between 'acc' and 'acc-p', + */ + ctl = GT(acc[0], 0x1FFA); + for (i = 1; i < 10; i ++) { + ctl &= EQ(acc[i], 0x1FFF); + } + acc[0] = MUX(ctl, acc[0] - 0x1FFB, acc[0]); + for (i = 1; i < 10; i ++) { + acc[i] &= ~(-ctl); + } + + /* + * Convert back the accumulator to 32-bit words, and add the + * 's' value (second half of pkey[]). That addition is done + * modulo 2^128. + */ + z = acc[0] + (acc[1] << 13) + br_dec16le(pkey + 16); + br_enc16le((unsigned char *)tag, z & 0xFFFF); + z = (z >> 16) + (acc[2] << 10) + br_dec16le(pkey + 18); + br_enc16le((unsigned char *)tag + 2, z & 0xFFFF); + z = (z >> 16) + (acc[3] << 7) + br_dec16le(pkey + 20); + br_enc16le((unsigned char *)tag + 4, z & 0xFFFF); + z = (z >> 16) + (acc[4] << 4) + br_dec16le(pkey + 22); + br_enc16le((unsigned char *)tag + 6, z & 0xFFFF); + z = (z >> 16) + (acc[5] << 1) + (acc[6] << 14) + br_dec16le(pkey + 24); + br_enc16le((unsigned char *)tag + 8, z & 0xFFFF); + z = (z >> 16) + (acc[7] << 11) + br_dec16le(pkey + 26); + br_enc16le((unsigned char *)tag + 10, z & 0xFFFF); + z = (z >> 16) + (acc[8] << 8) + br_dec16le(pkey + 28); + br_enc16le((unsigned char *)tag + 12, z & 0xFFFF); + z = (z >> 16) + (acc[9] << 5) + br_dec16le(pkey + 30); + br_enc16le((unsigned char *)tag + 14, z & 0xFFFF); + + /* + * If decrypting, then ChaCha20 runs _after_ Poly1305. + */ + if (!encrypt) { + ichacha(key, iv, 1, data, len); + } +} diff --git a/third_party/bearssl/src/poly1305_ctmulq.c b/third_party/bearssl/src/poly1305_ctmulq.c new file mode 100644 index 0000000..b00683a --- /dev/null +++ b/third_party/bearssl/src/poly1305_ctmulq.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#if BR_INT128 + +#define MUL128(hi, lo, x, y) do { \ + unsigned __int128 mul128tmp; \ + mul128tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \ + (hi) = (uint64_t)(mul128tmp >> 64); \ + (lo) = (uint64_t)mul128tmp; \ + } while (0) + +#elif BR_UMUL128 + +#include <intrin.h> + +#define MUL128(hi, lo, x, y) do { \ + (lo) = _umul128((x), (y), &(hi)); \ + } while (0) + +#endif + +#define MASK42 ((uint64_t)0x000003FFFFFFFFFF) +#define MASK44 ((uint64_t)0x00000FFFFFFFFFFF) + +/* + * The "accumulator" word is nominally a 130-bit value. We split it into + * words of 44 bits, each held in a 64-bit variable. + * + * If the current accumulator is a = a0 + a1*W + a2*W^2 (where W = 2^44) + * and r = r0 + r1*W + r2*W^2, then: + * + * a*r = (a0*r0) + * + (a0*r1 + a1*r0) * W + * + (a0*r2 + a1*r1 + a2*r0) * W^2 + * + (a1*r2 + a2*r1) * W^3 + * + (a2*r2) * W^4 + * + * We want to reduce that value modulo p = 2^130-5, so W^3 = 20 mod p, + * and W^4 = 20*W mod p. Thus, if we define u1 = 20*r1 and u2 = 20*r2, + * then the equations above become: + * + * b0 = a0*r0 + a1*u2 + a2*u1 + * b1 = a0*r1 + a1*r0 + a2*u2 + * b2 = a0*r2 + a1*r1 + a2*r0 + * + * In order to make u1 fit in 44 bits, we can change these equations + * into: + * + * b0 = a0*r0 + a1*u2 + a2*t1 + * b1 = a0*r1 + a1*r0 + a2*t2 + * b2 = a0*r2 + a1*r1 + a2*r0 + * + * Where t1 is u1 truncated to 44 bits, and t2 is u2 added to the extra + * bits of u1. Note that since r is clamped down to a 124-bit value, the + * values u2 and t2 fit on 44 bits too. + * + * The bx values are larger than 44 bits, so we may split them into a + * lower half (cx, 44 bits) and an upper half (dx). The new values for + * the accumulator are then: + * + * e0 = c0 + 20*d2 + * e1 = c1 + d0 + * e2 = c2 + d1 + * + * The equations allow for some room, i.e. the ax values may be larger + * than 44 bits. Similarly, the ex values will usually be larger than + * the ax. Thus, some sort of carry propagation must be done regularly, + * though not necessarily at each iteration. In particular, we do not + * need to compute the additions (for the bx values) over 128-bit + * quantities; we can stick to 64-bit computations. + * + * + * Since the 128-bit result of a 64x64 multiplication is actually + * represented over two 64-bit registers, it is cheaper to arrange for + * any split that happens between the "high" and "low" halves to be on + * that 64-bit boundary. This is done by left shifting the rx, ux and tx + * by 20 bits (since they all fit on 44 bits each, this shift is + * always possible). + */ + +static void +poly1305_inner_big(uint64_t *acc, uint64_t *r, const void *data, size_t len) +{ + +#define MX(hi, lo, m0, m1, m2) do { \ + uint64_t mxhi, mxlo; \ + MUL128(mxhi, mxlo, a0, m0); \ + (hi) = mxhi; \ + (lo) = mxlo >> 20; \ + MUL128(mxhi, mxlo, a1, m1); \ + (hi) += mxhi; \ + (lo) += mxlo >> 20; \ + MUL128(mxhi, mxlo, a2, m2); \ + (hi) += mxhi; \ + (lo) += mxlo >> 20; \ + } while (0) + + const unsigned char *buf; + uint64_t a0, a1, a2; + uint64_t r0, r1, r2, t1, t2, u2; + + r0 = r[0]; + r1 = r[1]; + r2 = r[2]; + t1 = r[3]; + t2 = r[4]; + u2 = r[5]; + a0 = acc[0]; + a1 = acc[1]; + a2 = acc[2]; + buf = data; + + while (len > 0) { + uint64_t v0, v1, v2; + uint64_t c0, c1, c2, d0, d1, d2; + + v0 = br_dec64le(buf + 0); + v1 = br_dec64le(buf + 8); + v2 = v1 >> 24; + v1 = ((v0 >> 44) | (v1 << 20)) & MASK44; + v0 &= MASK44; + a0 += v0; + a1 += v1; + a2 += v2 + ((uint64_t)1 << 40); + MX(d0, c0, r0, u2, t1); + MX(d1, c1, r1, r0, t2); + MX(d2, c2, r2, r1, r0); + a0 = c0 + 20 * d2; + a1 = c1 + d0; + a2 = c2 + d1; + + v0 = br_dec64le(buf + 16); + v1 = br_dec64le(buf + 24); + v2 = v1 >> 24; + v1 = ((v0 >> 44) | (v1 << 20)) & MASK44; + v0 &= MASK44; + a0 += v0; + a1 += v1; + a2 += v2 + ((uint64_t)1 << 40); + MX(d0, c0, r0, u2, t1); + MX(d1, c1, r1, r0, t2); + MX(d2, c2, r2, r1, r0); + a0 = c0 + 20 * d2; + a1 = c1 + d0; + a2 = c2 + d1; + + v0 = br_dec64le(buf + 32); + v1 = br_dec64le(buf + 40); + v2 = v1 >> 24; + v1 = ((v0 >> 44) | (v1 << 20)) & MASK44; + v0 &= MASK44; + a0 += v0; + a1 += v1; + a2 += v2 + ((uint64_t)1 << 40); + MX(d0, c0, r0, u2, t1); + MX(d1, c1, r1, r0, t2); + MX(d2, c2, r2, r1, r0); + a0 = c0 + 20 * d2; + a1 = c1 + d0; + a2 = c2 + d1; + + v0 = br_dec64le(buf + 48); + v1 = br_dec64le(buf + 56); + v2 = v1 >> 24; + v1 = ((v0 >> 44) | (v1 << 20)) & MASK44; + v0 &= MASK44; + a0 += v0; + a1 += v1; + a2 += v2 + ((uint64_t)1 << 40); + MX(d0, c0, r0, u2, t1); + MX(d1, c1, r1, r0, t2); + MX(d2, c2, r2, r1, r0); + a0 = c0 + 20 * d2; + a1 = c1 + d0; + a2 = c2 + d1; + + a1 += a0 >> 44; + a0 &= MASK44; + a2 += a1 >> 44; + a1 &= MASK44; + a0 += 20 * (a2 >> 44); + a2 &= MASK44; + + buf += 64; + len -= 64; + } + acc[0] = a0; + acc[1] = a1; + acc[2] = a2; + +#undef MX +} + +static void +poly1305_inner_small(uint64_t *acc, uint64_t *r, const void *data, size_t len) +{ + const unsigned char *buf; + uint64_t a0, a1, a2; + uint64_t r0, r1, r2, t1, t2, u2; + + r0 = r[0]; + r1 = r[1]; + r2 = r[2]; + t1 = r[3]; + t2 = r[4]; + u2 = r[5]; + a0 = acc[0]; + a1 = acc[1]; + a2 = acc[2]; + buf = data; + + while (len > 0) { + uint64_t v0, v1, v2; + uint64_t c0, c1, c2, d0, d1, d2; + unsigned char tmp[16]; + + if (len < 16) { + memcpy(tmp, buf, len); + memset(tmp + len, 0, (sizeof tmp) - len); + buf = tmp; + len = 16; + } + v0 = br_dec64le(buf + 0); + v1 = br_dec64le(buf + 8); + + v2 = v1 >> 24; + v1 = ((v0 >> 44) | (v1 << 20)) & MASK44; + v0 &= MASK44; + + a0 += v0; + a1 += v1; + a2 += v2 + ((uint64_t)1 << 40); + +#define MX(hi, lo, m0, m1, m2) do { \ + uint64_t mxhi, mxlo; \ + MUL128(mxhi, mxlo, a0, m0); \ + (hi) = mxhi; \ + (lo) = mxlo >> 20; \ + MUL128(mxhi, mxlo, a1, m1); \ + (hi) += mxhi; \ + (lo) += mxlo >> 20; \ + MUL128(mxhi, mxlo, a2, m2); \ + (hi) += mxhi; \ + (lo) += mxlo >> 20; \ + } while (0) + + MX(d0, c0, r0, u2, t1); + MX(d1, c1, r1, r0, t2); + MX(d2, c2, r2, r1, r0); + +#undef MX + + a0 = c0 + 20 * d2; + a1 = c1 + d0; + a2 = c2 + d1; + + a1 += a0 >> 44; + a0 &= MASK44; + a2 += a1 >> 44; + a1 &= MASK44; + a0 += 20 * (a2 >> 44); + a2 &= MASK44; + + buf += 16; + len -= 16; + } + acc[0] = a0; + acc[1] = a1; + acc[2] = a2; +} + +static inline void +poly1305_inner(uint64_t *acc, uint64_t *r, const void *data, size_t len) +{ + if (len >= 64) { + size_t len2; + + len2 = len & ~(size_t)63; + poly1305_inner_big(acc, r, data, len2); + data = (const unsigned char *)data + len2; + len -= len2; + } + if (len > 0) { + poly1305_inner_small(acc, r, data, len); + } +} + +/* see bearssl_block.h */ +void +br_poly1305_ctmulq_run(const void *key, const void *iv, + void *data, size_t len, const void *aad, size_t aad_len, + void *tag, br_chacha20_run ichacha, int encrypt) +{ + unsigned char pkey[32], foot[16]; + uint64_t r[6], acc[3], r0, r1; + uint32_t v0, v1, v2, v3, v4; + uint64_t w0, w1, w2, w3; + uint32_t ctl; + + /* + * Compute the MAC key. The 'r' value is the first 16 bytes of + * pkey[]. + */ + memset(pkey, 0, sizeof pkey); + ichacha(key, iv, 0, pkey, sizeof pkey); + + /* + * If encrypting, ChaCha20 must run first, followed by Poly1305. + * When decrypting, the operations are reversed. + */ + if (encrypt) { + ichacha(key, iv, 1, data, len); + } + + /* + * Run Poly1305. We must process the AAD, then ciphertext, then + * the footer (with the lengths). Note that the AAD and ciphertext + * are meant to be padded with zeros up to the next multiple of 16, + * and the length of the footer is 16 bytes as well. + */ + + /* + * Apply the "clamping" on r. + */ + pkey[ 3] &= 0x0F; + pkey[ 4] &= 0xFC; + pkey[ 7] &= 0x0F; + pkey[ 8] &= 0xFC; + pkey[11] &= 0x0F; + pkey[12] &= 0xFC; + pkey[15] &= 0x0F; + + /* + * Decode the 'r' value into 44-bit words, left-shifted by 20 bits. + * Also compute the u1 and u2 values. + */ + r0 = br_dec64le(pkey + 0); + r1 = br_dec64le(pkey + 8); + r[0] = r0 << 20; + r[1] = ((r0 >> 24) | (r1 << 40)) & ~(uint64_t)0xFFFFF; + r[2] = (r1 >> 4) & ~(uint64_t)0xFFFFF; + r1 = 20 * (r[1] >> 20); + r[3] = r1 << 20; + r[5] = 20 * r[2]; + r[4] = (r[5] + (r1 >> 24)) & ~(uint64_t)0xFFFFF; + + /* + * Accumulator is 0. + */ + acc[0] = 0; + acc[1] = 0; + acc[2] = 0; + + /* + * Process the additional authenticated data, ciphertext, and + * footer in due order. + */ + br_enc64le(foot, (uint64_t)aad_len); + br_enc64le(foot + 8, (uint64_t)len); + poly1305_inner(acc, r, aad, aad_len); + poly1305_inner(acc, r, data, len); + poly1305_inner_small(acc, r, foot, sizeof foot); + + /* + * Finalise modular reduction. At that point, the value consists + * in three 44-bit values (the lowest one might be slightly above + * 2^44). Two loops shall be sufficient. + */ + acc[1] += (acc[0] >> 44); + acc[0] &= MASK44; + acc[2] += (acc[1] >> 44); + acc[1] &= MASK44; + acc[0] += 5 * (acc[2] >> 42); + acc[2] &= MASK42; + acc[1] += (acc[0] >> 44); + acc[0] &= MASK44; + acc[2] += (acc[1] >> 44); + acc[1] &= MASK44; + acc[0] += 5 * (acc[2] >> 42); + acc[2] &= MASK42; + + /* + * The value may still fall in the 2^130-5..2^130-1 range, in + * which case we must reduce it again. The code below selects, + * in constant-time, between 'acc' and 'acc-p'. We encode the + * value over four 32-bit integers to finish the operation. + */ + v0 = (uint32_t)acc[0]; + v1 = (uint32_t)(acc[0] >> 32) | ((uint32_t)acc[1] << 12); + v2 = (uint32_t)(acc[1] >> 20) | ((uint32_t)acc[2] << 24); + v3 = (uint32_t)(acc[2] >> 8); + v4 = (uint32_t)(acc[2] >> 40); + + ctl = GT(v0, 0xFFFFFFFA); + ctl &= EQ(v1, 0xFFFFFFFF); + ctl &= EQ(v2, 0xFFFFFFFF); + ctl &= EQ(v3, 0xFFFFFFFF); + ctl &= EQ(v4, 0x00000003); + v0 = MUX(ctl, v0 + 5, v0); + v1 = MUX(ctl, 0, v1); + v2 = MUX(ctl, 0, v2); + v3 = MUX(ctl, 0, v3); + + /* + * Add the "s" value. This is done modulo 2^128. Don't forget + * carry propagation... + */ + w0 = (uint64_t)v0 + (uint64_t)br_dec32le(pkey + 16); + w1 = (uint64_t)v1 + (uint64_t)br_dec32le(pkey + 20) + (w0 >> 32); + w2 = (uint64_t)v2 + (uint64_t)br_dec32le(pkey + 24) + (w1 >> 32); + w3 = (uint64_t)v3 + (uint64_t)br_dec32le(pkey + 28) + (w2 >> 32); + v0 = (uint32_t)w0; + v1 = (uint32_t)w1; + v2 = (uint32_t)w2; + v3 = (uint32_t)w3; + + /* + * Encode the tag. + */ + br_enc32le((unsigned char *)tag + 0, v0); + br_enc32le((unsigned char *)tag + 4, v1); + br_enc32le((unsigned char *)tag + 8, v2); + br_enc32le((unsigned char *)tag + 12, v3); + + /* + * If decrypting, then ChaCha20 runs _after_ Poly1305. + */ + if (!encrypt) { + ichacha(key, iv, 1, data, len); + } +} + +/* see bearssl_block.h */ +br_poly1305_run +br_poly1305_ctmulq_get(void) +{ + return &br_poly1305_ctmulq_run; +} + +#else + +/* see bearssl_block.h */ +br_poly1305_run +br_poly1305_ctmulq_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/poly1305_i15.c b/third_party/bearssl/src/poly1305_i15.c new file mode 100644 index 0000000..6f89212 --- /dev/null +++ b/third_party/bearssl/src/poly1305_i15.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This is a "reference" implementation of Poly1305 that uses the + * generic "i15" code for big integers. It is slow, but it handles all + * big-integer operations with generic code, thereby avoiding most + * tricky situations with carry propagation and modular reduction. + */ + +/* + * Modulus: 2^130-5. + */ +static const uint16_t P1305[] = { + 0x008A, + 0x7FFB, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x03FF +}; + +/* + * -p mod 2^15. + */ +#define P0I 0x4CCD + +/* + * R^2 mod p, for conversion to Montgomery representation (R = 2^135, + * since we use 9 words of 15 bits each, and 15*9 = 135). + */ +static const uint16_t R2[] = { + 0x008A, + 0x6400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +/* + * Perform the inner processing of blocks for Poly1305. The "r" array + * is in Montgomery representation, while the "a" array is not. + */ +static void +poly1305_inner(uint16_t *a, const uint16_t *r, const void *data, size_t len) +{ + const unsigned char *buf; + + buf = data; + while (len > 0) { + unsigned char tmp[16], rev[16]; + uint16_t b[10]; + uint32_t ctl; + int i; + + /* + * If there is a partial block, right-pad it with zeros. + */ + if (len < 16) { + memset(tmp, 0, sizeof tmp); + memcpy(tmp, buf, len); + buf = tmp; + len = 16; + } + + /* + * Decode next block and apply the "high bit". Since + * decoding is little-endian, we must byte-swap the buffer. + */ + for (i = 0; i < 16; i ++) { + rev[i] = buf[15 - i]; + } + br_i15_decode_mod(b, rev, sizeof rev, P1305); + b[9] |= 0x0100; + + /* + * Add the accumulator to the decoded block (modular + * addition). + */ + ctl = br_i15_add(b, a, 1); + ctl |= NOT(br_i15_sub(b, P1305, 0)); + br_i15_sub(b, P1305, ctl); + + /* + * Multiply by r, result is the new accumulator value. + */ + br_i15_montymul(a, b, r, P1305, P0I); + + buf += 16; + len -= 16; + } +} + +/* + * Byteswap a 16-byte value. + */ +static void +byteswap16(unsigned char *buf) +{ + int i; + + for (i = 0; i < 8; i ++) { + unsigned x; + + x = buf[i]; + buf[i] = buf[15 - i]; + buf[15 - i] = x; + } +} + +/* see bearssl_block.h */ +void +br_poly1305_i15_run(const void *key, const void *iv, + void *data, size_t len, const void *aad, size_t aad_len, + void *tag, br_chacha20_run ichacha, int encrypt) +{ + unsigned char pkey[32], foot[16]; + uint16_t t[10], r[10], acc[10]; + + /* + * Compute the MAC key. The 'r' value is the first 16 bytes of + * pkey[]. + */ + memset(pkey, 0, sizeof pkey); + ichacha(key, iv, 0, pkey, sizeof pkey); + + /* + * If encrypting, ChaCha20 must run first, followed by Poly1305. + * When decrypting, the operations are reversed. + */ + if (encrypt) { + ichacha(key, iv, 1, data, len); + } + + /* + * Run Poly1305. We must process the AAD, then ciphertext, then + * the footer (with the lengths). Note that the AAD and ciphertext + * are meant to be padded with zeros up to the next multiple of 16, + * and the length of the footer is 16 bytes as well. + */ + + /* + * Apply the "clamping" operation on the encoded 'r' value. + */ + pkey[ 3] &= 0x0F; + pkey[ 7] &= 0x0F; + pkey[11] &= 0x0F; + pkey[15] &= 0x0F; + pkey[ 4] &= 0xFC; + pkey[ 8] &= 0xFC; + pkey[12] &= 0xFC; + + /* + * Decode the clamped 'r' value. Decoding should use little-endian + * so we must byteswap the value first. + */ + byteswap16(pkey); + br_i15_decode_mod(t, pkey, 16, P1305); + + /* + * Convert 'r' to Montgomery representation. + */ + br_i15_montymul(r, t, R2, P1305, P0I); + + /* + * Accumulator is 0. + */ + br_i15_zero(acc, 0x8A); + + /* + * Process the additional authenticated data, ciphertext, and + * footer in due order. + */ + br_enc64le(foot, (uint64_t)aad_len); + br_enc64le(foot + 8, (uint64_t)len); + poly1305_inner(acc, r, aad, aad_len); + poly1305_inner(acc, r, data, len); + poly1305_inner(acc, r, foot, sizeof foot); + + /* + * Decode the value 's'. Again, a byteswap is needed. + */ + byteswap16(pkey + 16); + br_i15_decode_mod(t, pkey + 16, 16, P1305); + + /* + * Add the value 's' to the accumulator. That addition is done + * modulo 2^128, so we just ignore the carry. + */ + br_i15_add(acc, t, 1); + + /* + * Encode the result (128 low bits) to the tag. Encoding should + * be little-endian. + */ + br_i15_encode(tag, 16, acc); + byteswap16(tag); + + /* + * If decrypting, then ChaCha20 runs _after_ Poly1305. + */ + if (!encrypt) { + ichacha(key, iv, 1, data, len); + } +} diff --git a/third_party/bearssl/src/prf.c b/third_party/bearssl/src/prf.c new file mode 100644 index 0000000..f04a5fb --- /dev/null +++ b/third_party/bearssl/src/prf.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_tls_phash(void *dst, size_t len, + const br_hash_class *dig, + const void *secret, size_t secret_len, const char *label, + size_t seed_num, const br_tls_prf_seed_chunk *seed) +{ + unsigned char *buf; + unsigned char tmp[64], a[64]; + br_hmac_key_context kc; + br_hmac_context hc; + size_t label_len, hlen, u; + + if (len == 0) { + return; + } + buf = dst; + for (label_len = 0; label[label_len]; label_len ++); + hlen = br_digest_size(dig); + br_hmac_key_init(&kc, dig, secret, secret_len); + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, label, label_len); + for (u = 0; u < seed_num; u ++) { + br_hmac_update(&hc, seed[u].data, seed[u].len); + } + br_hmac_out(&hc, a); + for (;;) { + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, a, hlen); + br_hmac_update(&hc, label, label_len); + for (u = 0; u < seed_num; u ++) { + br_hmac_update(&hc, seed[u].data, seed[u].len); + } + br_hmac_out(&hc, tmp); + for (u = 0; u < hlen && u < len; u ++) { + buf[u] ^= tmp[u]; + } + buf += u; + len -= u; + if (len == 0) { + return; + } + br_hmac_init(&hc, &kc, 0); + br_hmac_update(&hc, a, hlen); + br_hmac_out(&hc, a); + } +} diff --git a/third_party/bearssl/src/prf_md5sha1.c b/third_party/bearssl/src/prf_md5sha1.c new file mode 100644 index 0000000..3212833 --- /dev/null +++ b/third_party/bearssl/src/prf_md5sha1.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl.h */ +void +br_tls10_prf(void *dst, size_t len, + const void *secret, size_t secret_len, const char *label, + size_t seed_num, const br_tls_prf_seed_chunk *seed) +{ + const unsigned char *s1; + size_t slen; + + s1 = secret; + slen = (secret_len + 1) >> 1; + memset(dst, 0, len); + br_tls_phash(dst, len, &br_md5_vtable, + s1, slen, label, seed_num, seed); + br_tls_phash(dst, len, &br_sha1_vtable, + s1 + secret_len - slen, slen, label, seed_num, seed); +} diff --git a/third_party/bearssl/src/prf_sha256.c b/third_party/bearssl/src/prf_sha256.c new file mode 100644 index 0000000..76041de --- /dev/null +++ b/third_party/bearssl/src/prf_sha256.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl.h */ +void +br_tls12_sha256_prf(void *dst, size_t len, + const void *secret, size_t secret_len, const char *label, + size_t seed_num, const br_tls_prf_seed_chunk *seed) +{ + memset(dst, 0, len); + br_tls_phash(dst, len, &br_sha256_vtable, + secret, secret_len, label, seed_num, seed); +} diff --git a/third_party/bearssl/src/prf_sha384.c b/third_party/bearssl/src/prf_sha384.c new file mode 100644 index 0000000..c20c4e6 --- /dev/null +++ b/third_party/bearssl/src/prf_sha384.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl.h */ +void +br_tls12_sha384_prf(void *dst, size_t len, + const void *secret, size_t secret_len, const char *label, + size_t seed_num, const br_tls_prf_seed_chunk *seed) +{ + memset(dst, 0, len); + br_tls_phash(dst, len, &br_sha384_vtable, + secret, secret_len, label, seed_num, seed); +} diff --git a/third_party/bearssl/src/rsa_default_keygen.c b/third_party/bearssl/src/rsa_default_keygen.c new file mode 100644 index 0000000..f2e83c8 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_keygen.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_keygen +br_rsa_keygen_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_keygen; +#elif BR_LOMUL + return &br_rsa_i15_keygen; +#else + return &br_rsa_i31_keygen; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_modulus.c b/third_party/bearssl/src/rsa_default_modulus.c new file mode 100644 index 0000000..57d4be5 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_modulus.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_compute_modulus +br_rsa_compute_modulus_get_default(void) +{ +#if BR_LOMUL + return &br_rsa_i15_compute_modulus; +#else + return &br_rsa_i31_compute_modulus; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_oaep_decrypt.c b/third_party/bearssl/src/rsa_default_oaep_decrypt.c new file mode 100644 index 0000000..7345d64 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_oaep_decrypt.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_oaep_decrypt +br_rsa_oaep_decrypt_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_oaep_decrypt; +#elif BR_LOMUL + return &br_rsa_i15_oaep_decrypt; +#else + return &br_rsa_i31_oaep_decrypt; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_oaep_encrypt.c b/third_party/bearssl/src/rsa_default_oaep_encrypt.c new file mode 100644 index 0000000..ae33fcc --- /dev/null +++ b/third_party/bearssl/src/rsa_default_oaep_encrypt.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_oaep_encrypt +br_rsa_oaep_encrypt_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_oaep_encrypt; +#elif BR_LOMUL + return &br_rsa_i15_oaep_encrypt; +#else + return &br_rsa_i31_oaep_encrypt; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_pkcs1_sign.c b/third_party/bearssl/src/rsa_default_pkcs1_sign.c new file mode 100644 index 0000000..e926704 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_pkcs1_sign.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_pkcs1_sign +br_rsa_pkcs1_sign_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_pkcs1_sign; +#elif BR_LOMUL + return &br_rsa_i15_pkcs1_sign; +#else + return &br_rsa_i31_pkcs1_sign; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c new file mode 100644 index 0000000..b3dbeb7 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_pkcs1_vrfy +br_rsa_pkcs1_vrfy_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_pkcs1_vrfy; +#elif BR_LOMUL + return &br_rsa_i15_pkcs1_vrfy; +#else + return &br_rsa_i31_pkcs1_vrfy; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_priv.c b/third_party/bearssl/src/rsa_default_priv.c new file mode 100644 index 0000000..bb0b2c0 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_priv.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_private +br_rsa_private_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_private; +#elif BR_LOMUL + return &br_rsa_i15_private; +#else + return &br_rsa_i31_private; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_privexp.c b/third_party/bearssl/src/rsa_default_privexp.c new file mode 100644 index 0000000..cda4555 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_privexp.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_compute_privexp +br_rsa_compute_privexp_get_default(void) +{ +#if BR_LOMUL + return &br_rsa_i15_compute_privexp; +#else + return &br_rsa_i31_compute_privexp; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_pss_sign.c b/third_party/bearssl/src/rsa_default_pss_sign.c new file mode 100644 index 0000000..ce4f3e0 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_pss_sign.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_pss_sign +br_rsa_pss_sign_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_pss_sign; +#elif BR_LOMUL + return &br_rsa_i15_pss_sign; +#else + return &br_rsa_i31_pss_sign; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_pss_vrfy.c b/third_party/bearssl/src/rsa_default_pss_vrfy.c new file mode 100644 index 0000000..e3a9ad9 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_pss_vrfy.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_pss_vrfy +br_rsa_pss_vrfy_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_pss_vrfy; +#elif BR_LOMUL + return &br_rsa_i15_pss_vrfy; +#else + return &br_rsa_i31_pss_vrfy; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_pub.c b/third_party/bearssl/src/rsa_default_pub.c new file mode 100644 index 0000000..a1f03ef --- /dev/null +++ b/third_party/bearssl/src/rsa_default_pub.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_public +br_rsa_public_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_public; +#elif BR_LOMUL + return &br_rsa_i15_public; +#else + return &br_rsa_i31_public; +#endif +} diff --git a/third_party/bearssl/src/rsa_default_pubexp.c b/third_party/bearssl/src/rsa_default_pubexp.c new file mode 100644 index 0000000..47bc000 --- /dev/null +++ b/third_party/bearssl/src/rsa_default_pubexp.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_compute_pubexp +br_rsa_compute_pubexp_get_default(void) +{ +#if BR_LOMUL + return &br_rsa_i15_compute_pubexp; +#else + return &br_rsa_i31_compute_pubexp; +#endif +} diff --git a/third_party/bearssl/src/rsa_i15_keygen.c b/third_party/bearssl/src/rsa_i15_keygen.c new file mode 100644 index 0000000..e8da419 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_keygen.c @@ -0,0 +1,583 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Make a random integer of the provided size. The size is encoded. + * The header word is untouched. + */ +static void +mkrand(const br_prng_class **rng, uint16_t *x, uint32_t esize) +{ + size_t u, len; + unsigned m; + + len = (esize + 15) >> 4; + (*rng)->generate(rng, x + 1, len * sizeof(uint16_t)); + for (u = 1; u < len; u ++) { + x[u] &= 0x7FFF; + } + m = esize & 15; + if (m == 0) { + x[len] &= 0x7FFF; + } else { + x[len] &= 0x7FFF >> (15 - m); + } +} + +/* + * This is the big-endian unsigned representation of the product of + * all small primes from 13 to 1481. + */ +static const unsigned char SMALL_PRIMES[] = { + 0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A, + 0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7, + 0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37, + 0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5, + 0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E, + 0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6, + 0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C, + 0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40, + 0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50, + 0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7, + 0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3, + 0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E, + 0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC, + 0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08, + 0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B, + 0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22, + 0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77, + 0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E, + 0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80, + 0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8, + 0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2, + 0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC, + 0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54, + 0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74, + 0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C, + 0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD +}; + +/* + * We need temporary values for at least 7 integers of the same size + * as a factor (including header word); more space helps with performance + * (in modular exponentiations), but we much prefer to remain under + * 2 kilobytes in total, to save stack space. The macro TEMPS below + * exceeds 1024 (which is a count in 16-bit words) when BR_MAX_RSA_SIZE + * is greater than 4350 (default value is 4096, so the 2-kB limit is + * maintained unless BR_MAX_RSA_SIZE was modified). + */ +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define TEMPS MAX(1024, 7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 29) / 15)) + +/* + * Perform trial division on a candidate prime. This computes + * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The + * br_i15_moddiv() function will report an error if y is not invertible + * modulo x. Returned value is 1 on success (none of the small primes + * divides x), 0 on error (a non-trivial GCD is obtained). + * + * This function assumes that x is odd. + */ +static uint32_t +trial_divisions(const uint16_t *x, uint16_t *t) +{ + uint16_t *y; + uint16_t x0i; + + y = t; + t += 1 + ((x[0] + 15) >> 4); + x0i = br_i15_ninv15(x[1]); + br_i15_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x); + return br_i15_moddiv(y, y, x, x0i, t); +} + +/* + * Perform n rounds of Miller-Rabin on the candidate prime x. This + * function assumes that x = 3 mod 4. + * + * Returned value is 1 on success (all rounds completed successfully), + * 0 otherwise. + */ +static uint32_t +miller_rabin(const br_prng_class **rng, const uint16_t *x, int n, + uint16_t *t, size_t tlen) +{ + /* + * Since x = 3 mod 4, the Miller-Rabin test is simple: + * - get a random base a (such that 1 < a < x-1) + * - compute z = a^((x-1)/2) mod x + * - if z != 1 and z != x-1, the number x is composite + * + * We generate bases 'a' randomly with a size which is + * one bit less than x, which ensures that a < x-1. It + * is not useful to verify that a > 1 because the probability + * that we get a value a equal to 0 or 1 is much smaller + * than the probability of our Miller-Rabin tests not to + * detect a composite, which is already quite smaller than the + * probability of the hardware misbehaving and return a + * composite integer because of some glitch (e.g. bad RAM + * or ill-timed cosmic ray). + */ + unsigned char *xm1d2; + size_t xlen, xm1d2_len, xm1d2_len_u16, u; + uint32_t asize; + unsigned cc; + uint16_t x0i; + + /* + * Compute (x-1)/2 (encoded). + */ + xm1d2 = (unsigned char *)t; + xm1d2_len = ((x[0] - (x[0] >> 4)) + 7) >> 3; + br_i15_encode(xm1d2, xm1d2_len, x); + cc = 0; + for (u = 0; u < xm1d2_len; u ++) { + unsigned w; + + w = xm1d2[u]; + xm1d2[u] = (unsigned char)((w >> 1) | cc); + cc = w << 7; + } + + /* + * We used some words of the provided buffer for (x-1)/2. + */ + xm1d2_len_u16 = (xm1d2_len + 1) >> 1; + t += xm1d2_len_u16; + tlen -= xm1d2_len_u16; + + xlen = (x[0] + 15) >> 4; + asize = x[0] - 1 - EQ0(x[0] & 15); + x0i = br_i15_ninv15(x[1]); + while (n -- > 0) { + uint16_t *a; + uint32_t eq1, eqm1; + + /* + * Generate a random base. We don't need the base to be + * really uniform modulo x, so we just get a random + * number which is one bit shorter than x. + */ + a = t; + a[0] = x[0]; + a[xlen] = 0; + mkrand(rng, a, asize); + + /* + * Compute a^((x-1)/2) mod x. We assume here that the + * function will not fail (the temporary array is large + * enough). + */ + br_i15_modpow_opt(a, xm1d2, xm1d2_len, + x, x0i, t + 1 + xlen, tlen - 1 - xlen); + + /* + * We must obtain either 1 or x-1. Note that x is odd, + * hence x-1 differs from x only in its low word (no + * carry). + */ + eq1 = a[1] ^ 1; + eqm1 = a[1] ^ (x[1] - 1); + for (u = 2; u <= xlen; u ++) { + eq1 |= a[u]; + eqm1 |= a[u] ^ x[u]; + } + + if ((EQ0(eq1) | EQ0(eqm1)) == 0) { + return 0; + } + } + return 1; +} + +/* + * Create a random prime of the provided size. 'size' is the _encoded_ + * bit length. The two top bits and the two bottom bits are set to 1. + */ +static void +mkprime(const br_prng_class **rng, uint16_t *x, uint32_t esize, + uint32_t pubexp, uint16_t *t, size_t tlen) +{ + size_t len; + + x[0] = esize; + len = (esize + 15) >> 4; + for (;;) { + size_t u; + uint32_t m3, m5, m7, m11; + int rounds; + + /* + * Generate random bits. We force the two top bits and the + * two bottom bits to 1. + */ + mkrand(rng, x, esize); + if ((esize & 15) == 0) { + x[len] |= 0x6000; + } else if ((esize & 15) == 1) { + x[len] |= 0x0001; + x[len - 1] |= 0x4000; + } else { + x[len] |= 0x0003 << ((esize & 15) - 2); + } + x[1] |= 0x0003; + + /* + * Trial division with low primes (3, 5, 7 and 11). We + * use the following properties: + * + * 2^2 = 1 mod 3 + * 2^4 = 1 mod 5 + * 2^3 = 1 mod 7 + * 2^10 = 1 mod 11 + */ + m3 = 0; + m5 = 0; + m7 = 0; + m11 = 0; + for (u = 0; u < len; u ++) { + uint32_t w; + + w = x[1 + u]; + m3 += w << (u & 1); + m3 = (m3 & 0xFF) + (m3 >> 8); + m5 += w << ((4 - u) & 3); + m5 = (m5 & 0xFF) + (m5 >> 8); + m7 += w; + m7 = (m7 & 0x1FF) + (m7 >> 9); + m11 += w << (5 & -(u & 1)); + m11 = (m11 & 0x3FF) + (m11 >> 10); + } + + /* + * Maximum values of m* at this point: + * m3: 511 + * m5: 2310 + * m7: 510 + * m11: 2047 + * We use the same properties to make further reductions. + */ + + m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 46 */ + m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 16 */ + m3 = ((m3 * 43) >> 5) & 3; + + m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 263 */ + m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 30 */ + m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 15 */ + m5 -= 10 & -GT(m5, 9); + m5 -= 5 & -GT(m5, 4); + + m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 69 */ + m7 = (m7 & 7) + (m7 >> 3); /* max: 14 */ + m7 = ((m7 * 147) >> 7) & 7; + + /* + * 2^5 = 32 = -1 mod 11. + */ + m11 = (m11 & 0x1F) + 66 - (m11 >> 5); /* max: 97 */ + m11 -= 88 & -GT(m11, 87); + m11 -= 44 & -GT(m11, 43); + m11 -= 22 & -GT(m11, 21); + m11 -= 11 & -GT(m11, 10); + + /* + * If any of these modulo is 0, then the candidate is + * not prime. Also, if pubexp is 3, 5, 7 or 11, and the + * corresponding modulus is 1, then the candidate must + * be rejected, because we need e to be invertible + * modulo p-1. We can use simple comparisons here + * because they won't leak information on a candidate + * that we keep, only on one that we reject (and is thus + * not secret). + */ + if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) { + continue; + } + if ((pubexp == 3 && m3 == 1) + || (pubexp == 5 && m5 == 1) + || (pubexp == 7 && m7 == 1) + || (pubexp == 11 && m11 == 1)) + { + continue; + } + + /* + * More trial divisions. + */ + if (!trial_divisions(x, t)) { + continue; + } + + /* + * Miller-Rabin algorithm. Since we selected a random + * integer, not a maliciously crafted integer, we can use + * relatively few rounds to lower the risk of a false + * positive (i.e. declaring prime a non-prime) under + * 2^(-80). It is not useful to lower the probability much + * below that, since that would be substantially below + * the probability of the hardware misbehaving. Sufficient + * numbers of rounds are extracted from the Handbook of + * Applied Cryptography, note 4.49 (page 149). + * + * Since we work on the encoded size (esize), we need to + * compare with encoded thresholds. + */ + if (esize < 320) { + rounds = 12; + } else if (esize < 480) { + rounds = 9; + } else if (esize < 693) { + rounds = 6; + } else if (esize < 906) { + rounds = 4; + } else if (esize < 1386) { + rounds = 3; + } else { + rounds = 2; + } + + if (miller_rabin(rng, x, rounds, t, tlen)) { + return; + } + } +} + +/* + * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided + * as parameter (with announced bit length equal to that of p). This + * function computes d = 1/e mod p-1 (for an odd integer e). Returned + * value is 1 on success, 0 on error (an error is reported if e is not + * invertible modulo p-1). + * + * The temporary buffer (t) must have room for at least 4 integers of + * the size of p. + */ +static uint32_t +invert_pubexp(uint16_t *d, const uint16_t *m, uint32_t e, uint16_t *t) +{ + uint16_t *f; + uint32_t r; + + f = t; + t += 1 + ((m[0] + 15) >> 4); + + /* + * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd. + */ + br_i15_zero(d, m[0]); + d[1] = 1; + br_i15_zero(f, m[0]); + f[1] = e & 0x7FFF; + f[2] = (e >> 15) & 0x7FFF; + f[3] = e >> 30; + r = br_i15_moddiv(d, f, m, br_i15_ninv15(m[1]), t); + + /* + * We really want d = 1/e mod p-1, with p = 2m. By the CRT, + * the result is either the d we got, or d + m. + * + * Let's write e*d = 1 + k*m, for some integer k. Integers e + * and m are odd. If d is odd, then e*d is odd, which implies + * that k must be even; in that case, e*d = 1 + (k/2)*2m, and + * thus d is already fine. Conversely, if d is even, then k + * is odd, and we must add m to d in order to get the correct + * result. + */ + br_i15_add(d, m, (uint32_t)(1 - (d[1] & 1))); + + return r; +} + +/* + * Swap two buffers in RAM. They must be disjoint. + */ +static void +bufswap(void *b1, void *b2, size_t len) +{ + size_t u; + unsigned char *buf1, *buf2; + + buf1 = b1; + buf2 = b2; + for (u = 0; u < len; u ++) { + unsigned w; + + w = buf1[u]; + buf1[u] = buf2[u]; + buf2[u] = w; + } +} + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_keygen(const br_prng_class **rng, + br_rsa_private_key *sk, void *kbuf_priv, + br_rsa_public_key *pk, void *kbuf_pub, + unsigned size, uint32_t pubexp) +{ + uint32_t esize_p, esize_q; + size_t plen, qlen, tlen; + uint16_t *p, *q, *t; + uint16_t tmp[TEMPS]; + uint32_t r; + + if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) { + return 0; + } + if (pubexp == 0) { + pubexp = 3; + } else if (pubexp == 1 || (pubexp & 1) == 0) { + return 0; + } + + esize_p = (size + 1) >> 1; + esize_q = size - esize_p; + sk->n_bitlen = size; + sk->p = kbuf_priv; + sk->plen = (esize_p + 7) >> 3; + sk->q = sk->p + sk->plen; + sk->qlen = (esize_q + 7) >> 3; + sk->dp = sk->q + sk->qlen; + sk->dplen = sk->plen; + sk->dq = sk->dp + sk->dplen; + sk->dqlen = sk->qlen; + sk->iq = sk->dq + sk->dqlen; + sk->iqlen = sk->plen; + + if (pk != NULL) { + pk->n = kbuf_pub; + pk->nlen = (size + 7) >> 3; + pk->e = pk->n + pk->nlen; + pk->elen = 4; + br_enc32be(pk->e, pubexp); + while (*pk->e == 0) { + pk->e ++; + pk->elen --; + } + } + + /* + * We now switch to encoded sizes. + * + * floor((x * 17477) / (2^18)) is equal to floor(x/15) for all + * integers x from 0 to 23833. + */ + esize_p += MUL15(esize_p, 17477) >> 18; + esize_q += MUL15(esize_q, 17477) >> 18; + plen = (esize_p + 15) >> 4; + qlen = (esize_q + 15) >> 4; + p = tmp; + q = p + 1 + plen; + t = q + 1 + qlen; + tlen = ((sizeof tmp) / sizeof(uint16_t)) - (2 + plen + qlen); + + /* + * When looking for primes p and q, we temporarily divide + * candidates by 2, in order to compute the inverse of the + * public exponent. + */ + + for (;;) { + mkprime(rng, p, esize_p, pubexp, t, tlen); + br_i15_rshift(p, 1); + if (invert_pubexp(t, p, pubexp, t + 1 + plen)) { + br_i15_add(p, p, 1); + p[1] |= 1; + br_i15_encode(sk->p, sk->plen, p); + br_i15_encode(sk->dp, sk->dplen, t); + break; + } + } + + for (;;) { + mkprime(rng, q, esize_q, pubexp, t, tlen); + br_i15_rshift(q, 1); + if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) { + br_i15_add(q, q, 1); + q[1] |= 1; + br_i15_encode(sk->q, sk->qlen, q); + br_i15_encode(sk->dq, sk->dqlen, t); + break; + } + } + + /* + * If p and q have the same size, then it is possible that q > p + * (when the target modulus size is odd, we generate p with a + * greater bit length than q). If q > p, we want to swap p and q + * (and also dp and dq) for two reasons: + * - The final step below (inversion of q modulo p) is easier if + * p > q. + * - While BearSSL's RSA code is perfectly happy with RSA keys such + * that p < q, some other implementations have restrictions and + * require p > q. + * + * Note that we can do a simple non-constant-time swap here, + * because the only information we leak here is that we insist on + * returning p and q such that p > q, which is not a secret. + */ + if (esize_p == esize_q && br_i15_sub(p, q, 0) == 1) { + bufswap(p, q, (1 + plen) * sizeof *p); + bufswap(sk->p, sk->q, sk->plen); + bufswap(sk->dp, sk->dq, sk->dplen); + } + + /* + * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p. + * + * We ensured that p >= q, so this is just a matter of updating the + * header word for q (and possibly adding an extra word). + * + * Theoretically, the call below may fail, in case we were + * extraordinarily unlucky, and p = q. Another failure case is if + * Miller-Rabin failed us _twice_, and p and q are non-prime and + * have a factor is common. We report the error mostly because it + * is cheap and we can, but in practice this never happens (or, at + * least, it happens way less often than hardware glitches). + */ + q[0] = p[0]; + if (plen > qlen) { + q[plen] = 0; + t ++; + tlen --; + } + br_i15_zero(t, p[0]); + t[1] = 1; + r = br_i15_moddiv(t, q, p, br_i15_ninv15(p[1]), t + 1 + plen); + br_i15_encode(sk->iq, sk->iqlen, t); + + /* + * Compute the public modulus too, if required. + */ + if (pk != NULL) { + br_i15_zero(t, p[0]); + br_i15_mulacc(t, p, q); + br_i15_encode(pk->n, pk->nlen, t); + } + + return r; +} diff --git a/third_party/bearssl/src/rsa_i15_modulus.c b/third_party/bearssl/src/rsa_i15_modulus.c new file mode 100644 index 0000000..16458c3 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_modulus.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i15_compute_modulus(void *n, const br_rsa_private_key *sk) +{ + uint16_t tmp[4 * (((BR_MAX_RSA_SIZE / 2) + 14) / 15) + 5]; + uint16_t *t, *p, *q; + const unsigned char *pbuf, *qbuf; + size_t nlen, plen, qlen, tlen; + + /* + * Compute actual byte and lengths for p and q. + */ + pbuf = sk->p; + plen = sk->plen; + while (plen > 0 && *pbuf == 0) { + pbuf ++; + plen --; + } + qbuf = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *qbuf == 0) { + qbuf ++; + qlen --; + } + + t = tmp; + tlen = (sizeof tmp) / (sizeof tmp[0]); + + /* + * Decode p. + */ + if ((15 * tlen) < (plen << 3) + 15) { + return 0; + } + br_i15_decode(t, pbuf, plen); + p = t; + plen = (p[0] + 31) >> 4; + t += plen; + tlen -= plen; + + /* + * Decode q. + */ + if ((15 * tlen) < (qlen << 3) + 15) { + return 0; + } + br_i15_decode(t, qbuf, qlen); + q = t; + qlen = (q[0] + 31) >> 4; + t += qlen; + tlen -= qlen; + + /* + * Computation can proceed only if we have enough room for the + * modulus. + */ + if (tlen < (plen + qlen + 1)) { + return 0; + } + + /* + * Private key already contains the modulus bit length, from which + * we can infer the output length. Even if n is NULL, we still had + * to decode p and q to make sure that the product can be computed. + */ + nlen = (sk->n_bitlen + 7) >> 3; + if (n != NULL) { + br_i15_zero(t, p[0]); + br_i15_mulacc(t, p, q); + br_i15_encode(n, nlen, t); + } + return nlen; +} diff --git a/third_party/bearssl/src/rsa_i15_oaep_decrypt.c b/third_party/bearssl/src/rsa_i15_oaep_decrypt.c new file mode 100644 index 0000000..927eecd --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_oaep_decrypt.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_oaep_decrypt(const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_private_key *sk, void *data, size_t *len) +{ + uint32_t r; + + if (*len != ((sk->n_bitlen + 7) >> 3)) { + return 0; + } + r = br_rsa_i15_private(data, sk); + r &= br_rsa_oaep_unpad(dig, label, label_len, data, len); + return r; +} diff --git a/third_party/bearssl/src/rsa_i15_oaep_encrypt.c b/third_party/bearssl/src/rsa_i15_oaep_encrypt.c new file mode 100644 index 0000000..b9a6cfa --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_oaep_encrypt.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i15_oaep_encrypt( + const br_prng_class **rnd, const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_public_key *pk, + void *dst, size_t dst_max_len, + const void *src, size_t src_len) +{ + size_t dlen; + + dlen = br_rsa_oaep_pad(rnd, dig, label, label_len, + pk, dst, dst_max_len, src, src_len); + if (dlen == 0) { + return 0; + } + return dlen & -(size_t)br_rsa_i15_public(dst, dlen, pk); +} diff --git a/third_party/bearssl/src/rsa_i15_pkcs1_sign.c b/third_party/bearssl/src/rsa_i15_pkcs1_sign.c new file mode 100644 index 0000000..f519423 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_pkcs1_sign.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; + } + return br_rsa_i15_private(x, sk); +} diff --git a/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c new file mode 100644 index 0000000..2c35184 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen, + const unsigned char *hash_oid, size_t hash_len, + const br_rsa_public_key *pk, unsigned char *hash_out) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i15_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); +} diff --git a/third_party/bearssl/src/rsa_i15_priv.c b/third_party/bearssl/src/rsa_i15_priv.c new file mode 100644 index 0000000..177cc3a --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_priv.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define U (2 + ((BR_MAX_RSA_FACTOR + 14) / 15)) +#define TLEN (8 * U) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_private(unsigned char *x, const br_rsa_private_key *sk) +{ + const unsigned char *p, *q; + size_t plen, qlen; + size_t fwlen; + uint16_t p0i, q0i; + size_t xlen, u; + uint16_t tmp[1 + TLEN]; + long z; + uint16_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + uint32_t r; + + /* + * Compute the actual lengths of p and q, in bytes. + * These lengths are not considered secret (we cannot really hide + * them anyway in constant-time code). + */ + p = sk->p; + plen = sk->plen; + while (plen > 0 && *p == 0) { + p ++; + plen --; + } + q = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *q == 0) { + q ++; + qlen --; + } + + /* + * Compute the maximum factor length, in words. + */ + z = (long)(plen > qlen ? plen : qlen) << 3; + fwlen = 1; + while (z > 0) { + z -= 15; + fwlen ++; + } + /* + * Round up the word length to an even number. + */ + fwlen += (fwlen & 1); + + /* + * We need to fit at least 6 values in the stack buffer. + */ + if (6 * fwlen > TLEN) { + return 0; + } + + /* + * Compute signature length (in bytes). + */ + xlen = (sk->n_bitlen + 7) >> 3; + + /* + * Ensure 32-bit alignment for value words. + */ + mq = tmp; + if (((uintptr_t)mq & 2) == 0) { + mq ++; + } + + /* + * Decode q. + */ + br_i15_decode(mq, q, qlen); + + /* + * Decode p. + */ + t1 = mq + fwlen; + br_i15_decode(t1, p, plen); + + /* + * Compute the modulus (product of the two factors), to compare + * it with the source value. We use br_i15_mulacc(), since it's + * already used later on. + */ + t2 = mq + 2 * fwlen; + br_i15_zero(t2, mq[0]); + br_i15_mulacc(t2, mq, t1); + + /* + * We encode the modulus into bytes, to perform the comparison + * with bytes. We know that the product length, in bytes, is + * exactly xlen. + * The comparison actually computes the carry when subtracting + * the modulus from the source value; that carry must be 1 for + * a value in the correct range. We keep it in r, which is our + * accumulator for the error code. + */ + t3 = mq + 4 * fwlen; + br_i15_encode(t3, xlen, t2); + u = xlen; + r = 0; + while (u > 0) { + uint32_t wn, wx; + + u --; + wn = ((unsigned char *)t3)[u]; + wx = x[u]; + r = ((wx - (wn + r)) >> 8) & 1; + } + + /* + * Move the decoded p to another temporary buffer. + */ + mp = mq + 2 * fwlen; + memmove(mp, t1, fwlen * sizeof *t1); + + /* + * Compute s2 = x^dq mod q. + */ + q0i = br_i15_ninv15(mq[1]); + s2 = mq + fwlen; + br_i15_decode_reduce(s2, x, xlen, mq); + r &= br_i15_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i, + mq + 3 * fwlen, TLEN - 3 * fwlen); + + /* + * Compute s1 = x^dq mod q. + */ + p0i = br_i15_ninv15(mp[1]); + s1 = mq + 3 * fwlen; + br_i15_decode_reduce(s1, x, xlen, mp); + r &= br_i15_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i, + mq + 4 * fwlen, TLEN - 4 * fwlen); + + /* + * Compute: + * h = (s1 - s2)*(1/q) mod p + * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is + * unclear about whether p may be lower than q (some existing, + * widely deployed implementations of RSA don't tolerate p < q), + * but we want to support that occurrence, so we need to use the + * reduction function. + * + * Since we use br_i15_decode_reduce() for iq (purportedly, the + * inverse of q modulo p), we also tolerate improperly large + * values for this parameter. + */ + t1 = mq + 4 * fwlen; + t2 = mq + 5 * fwlen; + br_i15_reduce(t2, s2, mp); + br_i15_add(s1, mp, br_i15_sub(s1, t2, 1)); + br_i15_to_monty(s1, mp); + br_i15_decode_reduce(t1, sk->iq, sk->iqlen, mp); + br_i15_montymul(t2, s1, t1, mp, p0i); + + /* + * h is now in t2. We compute the final result: + * s = s2 + q*h + * All these operations are non-modular. + * + * We need mq, s2 and t2. We use the t3 buffer as destination. + * The buffers mp, s1 and t1 are no longer needed, so we can + * reuse them for t3. Moreover, the first step of the computation + * is to copy s2 into t3, after which s2 is not needed. Right + * now, mq is in slot 0, s2 is in slot 1, and t2 in slot 5. + * Therefore, we have ample room for t3 by simply using s2. + */ + t3 = s2; + br_i15_mulacc(t3, mq, t2); + + /* + * Encode the result. Since we already checked the value of xlen, + * we can just use it right away. + */ + br_i15_encode(x, xlen, t3); + + /* + * The only error conditions remaining at that point are invalid + * values for p and q (even integers). + */ + return p0i & q0i & r; +} diff --git a/third_party/bearssl/src/rsa_i15_privexp.c b/third_party/bearssl/src/rsa_i15_privexp.c new file mode 100644 index 0000000..57d6918 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_privexp.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i15_compute_privexp(void *d, + const br_rsa_private_key *sk, uint32_t e) +{ + /* + * We want to invert e modulo phi = (p-1)(q-1). This first + * requires computing phi, which is easy since we have the factors + * p and q in the private key structure. + * + * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer. + * We could invert e modulo phi/4 then patch the result to + * modulo phi, but this would involve assembling three modulus-wide + * values (phi/4, 1 and e) and calling moddiv, that requires + * three more temporaries, for a total of six big integers, or + * slightly more than 3 kB of stack space for RSA-4096. This + * exceeds our stack requirements. + * + * Instead, we first use one step of the extended GCD: + * + * - We compute phi = k*e + r (Euclidean division of phi by e). + * If public exponent e is correct, then r != 0 (e must be + * invertible modulo phi). We also have k != 0 since we + * enforce non-ridiculously-small factors. + * + * - We find small u, v such that u*e - v*r = 1 (using a + * binary GCD; we can arrange for u < r and v < e, i.e. all + * values fit on 32 bits). + * + * - Solution is: d = u + v*k + * This last computation is exact: since u < r and v < e, + * the above implies d < r + e*((phi-r)/e) = phi + */ + + uint16_t tmp[4 * ((BR_MAX_RSA_FACTOR + 14) / 15) + 12]; + uint16_t *p, *q, *k, *m, *z, *phi; + const unsigned char *pbuf, *qbuf; + size_t plen, qlen, u, len, dlen; + uint32_t r, a, b, u0, v0, u1, v1, he, hr; + int i; + + /* + * Check that e is correct. + */ + if (e < 3 || (e & 1) == 0) { + return 0; + } + + /* + * Check lengths of p and q, and that they are both odd. + */ + pbuf = sk->p; + plen = sk->plen; + while (plen > 0 && *pbuf == 0) { + pbuf ++; + plen --; + } + if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8) + || (pbuf[plen - 1] & 1) != 1) + { + return 0; + } + qbuf = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *qbuf == 0) { + qbuf ++; + qlen --; + } + if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8) + || (qbuf[qlen - 1] & 1) != 1) + { + return 0; + } + + /* + * Output length is that of the modulus. + */ + dlen = (sk->n_bitlen + 7) >> 3; + if (d == NULL) { + return dlen; + } + + p = tmp; + br_i15_decode(p, pbuf, plen); + plen = (p[0] + 15) >> 4; + q = p + 1 + plen; + br_i15_decode(q, qbuf, qlen); + qlen = (q[0] + 15) >> 4; + + /* + * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that + * we do not need anymore). The mulacc function sets the announced + * bit length of t to be the sum of the announced bit lengths of + * p-1 and q-1, which is usually exact but may overshoot by one 1 + * bit in some cases; we readjust it to its true length. + */ + p[1] --; + q[1] --; + phi = q + 1 + qlen; + br_i15_zero(phi, p[0]); + br_i15_mulacc(phi, p, q); + len = (phi[0] + 15) >> 4; + memmove(tmp, phi, (1 + len) * sizeof *phi); + phi = tmp; + phi[0] = br_i15_bit_length(phi + 1, len); + len = (phi[0] + 15) >> 4; + + /* + * Divide phi by public exponent e. The final remainder r must be + * non-zero (otherwise, the key is invalid). The quotient is k, + * which we write over phi, since we don't need phi after that. + */ + r = 0; + for (u = len; u >= 1; u --) { + /* + * Upon entry, r < e, and phi[u] < 2^15; hence, + * hi:lo < e*2^15. Thus, the produced word k[u] + * must be lower than 2^15, and the new remainder r + * is lower than e. + */ + uint32_t hi, lo; + + hi = r >> 17; + lo = (r << 15) + phi[u]; + phi[u] = br_divrem(hi, lo, e, &r); + } + if (r == 0) { + return 0; + } + k = phi; + + /* + * Compute u and v such that u*e - v*r = GCD(e,r). We use + * a binary GCD algorithm, with 6 extra integers a, b, + * u0, u1, v0 and v1. Initial values are: + * a = e u0 = 1 v0 = 0 + * b = r u1 = r v1 = e-1 + * The following invariants are maintained: + * a = u0*e - v0*r + * b = u1*e - v1*r + * 0 < a <= e + * 0 < b <= r + * 0 <= u0 <= r + * 0 <= v0 <= e + * 0 <= u1 <= r + * 0 <= v1 <= e + * + * At each iteration, we reduce either a or b by one bit, and + * adjust u0, u1, v0 and v1 to maintain the invariants: + * - if a is even, then a <- a/2 + * - otherwise, if b is even, then b <- b/2 + * - otherwise, if a > b, then a <- (a-b)/2 + * - otherwise, if b > a, then b <- (b-a)/2 + * Algorithm stops when a = b. At that point, the common value + * is the GCD of e and r; it must be 1 (otherwise, the private + * key or public exponent is not valid). The (u0,v0) or (u1,v1) + * pairs are the solution we are looking for. + * + * Since either a or b is reduced by at least 1 bit at each + * iteration, 62 iterations are enough to reach the end + * condition. + * + * To maintain the invariants, we must compute the same operations + * on the u* and v* values that we do on a and b: + * - When a is divided by 2, u0 and v0 must be divided by 2. + * - When b is divided by 2, u1 and v1 must be divided by 2. + * - When b is subtracted from a, u1 and v1 are subtracted from + * u0 and v0, respectively. + * - When a is subtracted from b, u0 and v0 are subtracted from + * u1 and v1, respectively. + * + * However, we want to keep the u* and v* values in their proper + * ranges. The following remarks apply: + * + * - When a is divided by 2, then a is even. Therefore: + * + * * If r is odd, then u0 and v0 must have the same parity; + * if they are both odd, then adding r to u0 and e to v0 + * makes them both even, and the division by 2 brings them + * back to the proper range. + * + * * If r is even, then u0 must be even; if v0 is odd, then + * adding r to u0 and e to v0 makes them both even, and the + * division by 2 brings them back to the proper range. + * + * Thus, all we need to do is to look at the parity of v0, + * and add (r,e) to (u0,v0) when v0 is odd. In order to avoid + * a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the + * division (r+1 does not overflow since r < e; and (e/2)+1 + * is equal to (e+1)/2 since e is odd). + * + * - When we subtract b from a, three cases may occur: + * + * * u1 <= u0 and v1 <= v0: just do the subtractions + * + * * u1 > u0 and v1 > v0: compute: + * (u0, v0) <- (u0 + r - u1, v0 + e - v1) + * + * * u1 <= u0 and v1 > v0: compute: + * (u0, v0) <- (u0 + r - u1, v0 + e - v1) + * + * The fourth case (u1 > u0 and v1 <= v0) is not possible + * because it would contradict "b < a" (which is the reason + * why we subtract b from a). + * + * The tricky case is the third one: from the equations, it + * seems that u0 may go out of range. However, the invariants + * and ranges of other values imply that, in that case, the + * new u0 does not actually exceed the range. + * + * We can thus handle the subtraction by adding (r,e) based + * solely on the comparison between v0 and v1. + */ + a = e; + b = r; + u0 = 1; + v0 = 0; + u1 = r; + v1 = e - 1; + hr = (r + 1) >> 1; + he = (e >> 1) + 1; + for (i = 0; i < 62; i ++) { + uint32_t oa, ob, agtb, bgta; + uint32_t sab, sba, da, db; + uint32_t ctl; + + oa = a & 1; /* 1 if a is odd */ + ob = b & 1; /* 1 if b is odd */ + agtb = GT(a, b); /* 1 if a > b */ + bgta = GT(b, a); /* 1 if b > a */ + + sab = oa & ob & agtb; /* 1 if a <- a-b */ + sba = oa & ob & bgta; /* 1 if b <- b-a */ + + /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */ + ctl = GT(v1, v0); + a -= b & -sab; + u0 -= (u1 - (r & -ctl)) & -sab; + v0 -= (v1 - (e & -ctl)) & -sab; + + /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */ + ctl = GT(v0, v1); + b -= a & -sba; + u1 -= (u0 - (r & -ctl)) & -sba; + v1 -= (v0 - (e & -ctl)) & -sba; + + da = NOT(oa) | sab; /* 1 if a <- a/2 */ + db = (oa & NOT(ob)) | sba; /* 1 if b <- b/2 */ + + /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */ + ctl = v0 & 1; + a ^= (a ^ (a >> 1)) & -da; + u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da; + v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da; + + /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */ + ctl = v1 & 1; + b ^= (b ^ (b >> 1)) & -db; + u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db; + v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db; + } + + /* + * Check that the GCD is indeed 1. If not, then the key is invalid + * (and there's no harm in leaking that piece of information). + */ + if (a != 1) { + return 0; + } + + /* + * Now we have u0*e - v0*r = 1. Let's compute the result as: + * d = u0 + v0*k + * We still have k in the tmp[] array, and its announced bit + * length is that of phi. + */ + m = k + 1 + len; + m[0] = (2 << 4) + 2; /* bit length is 32 bits, encoded */ + m[1] = v0 & 0x7FFF; + m[2] = (v0 >> 15) & 0x7FFF; + m[3] = v0 >> 30; + z = m + 4; + br_i15_zero(z, k[0]); + z[1] = u0 & 0x7FFF; + z[2] = (u0 >> 15) & 0x7FFF; + z[3] = u0 >> 30; + br_i15_mulacc(z, k, m); + + /* + * Encode the result. + */ + br_i15_encode(d, dlen, z); + return dlen; +} diff --git a/third_party/bearssl/src/rsa_i15_pss_sign.c b/third_party/bearssl/src/rsa_i15_pss_sign.c new file mode 100644 index 0000000..dd9385b --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_pss_sign.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_pss_sign(const br_prng_class **rng, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash, + salt_len, sk->n_bitlen, x)) + { + return 0; + } + return br_rsa_i15_private(x, sk); +} diff --git a/third_party/bearssl/src/rsa_i15_pss_vrfy.c b/third_party/bearssl/src/rsa_i15_pss_vrfy.c new file mode 100644 index 0000000..7d9f2cb --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_pss_vrfy.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_pss_vrfy(const unsigned char *x, size_t xlen, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const void *hash, size_t salt_len, const br_rsa_public_key *pk) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i15_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pss_sig_unpad(hf_data, hf_mgf1, + hash, salt_len, pk, sig); +} diff --git a/third_party/bearssl/src/rsa_i15_pub.c b/third_party/bearssl/src/rsa_i15_pub.c new file mode 100644 index 0000000..9eab5e8 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_pub.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * As a strict minimum, we need four buffers that can hold a + * modular integer. + */ +#define TLEN (4 * (2 + ((BR_MAX_RSA_SIZE + 14) / 15))) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_public(unsigned char *x, size_t xlen, + const br_rsa_public_key *pk) +{ + const unsigned char *n; + size_t nlen; + uint16_t tmp[1 + TLEN]; + uint16_t *m, *a, *t; + size_t fwlen; + long z; + uint16_t m0i; + uint32_t r; + + /* + * Get the actual length of the modulus, and see if it fits within + * our stack buffer. We also check that the length of x[] is valid. + */ + n = pk->n; + nlen = pk->nlen; + while (nlen > 0 && *n == 0) { + n ++; + nlen --; + } + if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) { + return 0; + } + z = (long)nlen << 3; + fwlen = 1; + while (z > 0) { + z -= 15; + fwlen ++; + } + /* + * Round up length to an even number. + */ + fwlen += (fwlen & 1); + + /* + * The modulus gets decoded into m[]. + * The value to exponentiate goes into a[]. + * The temporaries for modular exponentiations are in t[]. + * + * We want the first value word of each integer to be aligned + * on a 32-bit boundary. + */ + m = tmp; + if (((uintptr_t)m & 2) == 0) { + m ++; + } + a = m + fwlen; + t = m + 2 * fwlen; + + /* + * Decode the modulus. + */ + br_i15_decode(m, n, nlen); + m0i = br_i15_ninv15(m[1]); + + /* + * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be + * an odd integer. + */ + r = m0i & 1; + + /* + * Decode x[] into a[]; we also check that its value is proper. + */ + r &= br_i15_decode_mod(a, x, xlen, m); + + /* + * Compute the modular exponentiation. + */ + br_i15_modpow_opt(a, pk->e, pk->elen, m, m0i, t, TLEN - 2 * fwlen); + + /* + * Encode the result. + */ + br_i15_encode(x, xlen, a); + return r; +} diff --git a/third_party/bearssl/src/rsa_i15_pubexp.c b/third_party/bearssl/src/rsa_i15_pubexp.c new file mode 100644 index 0000000..803bff7 --- /dev/null +++ b/third_party/bearssl/src/rsa_i15_pubexp.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Recompute public exponent, based on factor p and reduced private + * exponent dp. + */ +static uint32_t +get_pubexp(const unsigned char *pbuf, size_t plen, + const unsigned char *dpbuf, size_t dplen) +{ + /* + * dp is the inverse of e modulo p-1. If p = 3 mod 4, then + * p-1 = 2*((p-1)/2). Taken modulo 2, e is odd and has inverse 1; + * thus, dp must be odd. + * + * We compute the inverse of dp modulo (p-1)/2. This requires + * first reducing dp modulo (p-1)/2 (this can be done with a + * conditional subtract, no need to use the generic modular + * reduction function); then, we use moddiv. + */ + + uint16_t tmp[6 * ((BR_MAX_RSA_FACTOR + 29) / 15)]; + uint16_t *p, *dp, *x; + size_t len; + uint32_t e; + + /* + * Compute actual factor length (in bytes) and check that it fits + * under our size constraints. + */ + while (plen > 0 && *pbuf == 0) { + pbuf ++; + plen --; + } + if (plen == 0 || plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)) { + return 0; + } + + /* + * Compute actual reduced exponent length (in bytes) and check that + * it is not longer than p. + */ + while (dplen > 0 && *dpbuf == 0) { + dpbuf ++; + dplen --; + } + if (dplen > plen || dplen == 0 + || (dplen == plen && dpbuf[0] > pbuf[0])) + { + return 0; + } + + /* + * Verify that p = 3 mod 4 and that dp is odd. + */ + if ((pbuf[plen - 1] & 3) != 3 || (dpbuf[dplen - 1] & 1) != 1) { + return 0; + } + + /* + * Decode p and compute (p-1)/2. + */ + p = tmp; + br_i15_decode(p, pbuf, plen); + len = (p[0] + 31) >> 4; + br_i15_rshift(p, 1); + + /* + * Decode dp and make sure its announced bit length matches that of + * p (we already know that the size of dp, in bits, does not exceed + * the size of p, so we just have to copy the header word). + */ + dp = p + len; + memset(dp, 0, len * sizeof *dp); + br_i15_decode(dp, dpbuf, dplen); + dp[0] = p[0]; + + /* + * Subtract (p-1)/2 from dp if necessary. + */ + br_i15_sub(dp, p, NOT(br_i15_sub(dp, p, 0))); + + /* + * If another subtraction is needed, then this means that the + * value was invalid. We don't care to leak information about + * invalid keys. + */ + if (br_i15_sub(dp, p, 0) == 0) { + return 0; + } + + /* + * Invert dp modulo (p-1)/2. If the inversion fails, then the + * key value was invalid. + */ + x = dp + len; + br_i15_zero(x, p[0]); + x[1] = 1; + if (br_i15_moddiv(x, dp, p, br_i15_ninv15(p[1]), x + len) == 0) { + return 0; + } + + /* + * We now have an inverse. We must set it to zero (error) if its + * length is greater than 32 bits and/or if it is an even integer. + * Take care that the bit_length function returns an encoded + * bit length. + */ + e = (uint32_t)x[1] | ((uint32_t)x[2] << 15) | ((uint32_t)x[3] << 30); + e &= -LT(br_i15_bit_length(x + 1, len - 1), 35); + e &= -(e & 1); + return e; +} + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_compute_pubexp(const br_rsa_private_key *sk) +{ + /* + * Get the public exponent from both p and q. This is the right + * exponent if we get twice the same value. + */ + uint32_t ep, eq; + + ep = get_pubexp(sk->p, sk->plen, sk->dp, sk->dplen); + eq = get_pubexp(sk->q, sk->qlen, sk->dq, sk->dqlen); + return ep & -EQ(ep, eq); +} diff --git a/third_party/bearssl/src/rsa_i31_keygen.c b/third_party/bearssl/src/rsa_i31_keygen.c new file mode 100644 index 0000000..77708f8 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_keygen.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_keygen(const br_prng_class **rng, + br_rsa_private_key *sk, void *kbuf_priv, + br_rsa_public_key *pk, void *kbuf_pub, + unsigned size, uint32_t pubexp) +{ + return br_rsa_i31_keygen_inner(rng, + sk, kbuf_priv, pk, kbuf_pub, size, pubexp, + &br_i31_modpow_opt); +} diff --git a/third_party/bearssl/src/rsa_i31_keygen_inner.c b/third_party/bearssl/src/rsa_i31_keygen_inner.c new file mode 100644 index 0000000..98df445 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_keygen_inner.c @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Make a random integer of the provided size. The size is encoded. + * The header word is untouched. + */ +static void +mkrand(const br_prng_class **rng, uint32_t *x, uint32_t esize) +{ + size_t u, len; + unsigned m; + + len = (esize + 31) >> 5; + (*rng)->generate(rng, x + 1, len * sizeof(uint32_t)); + for (u = 1; u < len; u ++) { + x[u] &= 0x7FFFFFFF; + } + m = esize & 31; + if (m == 0) { + x[len] &= 0x7FFFFFFF; + } else { + x[len] &= 0x7FFFFFFF >> (31 - m); + } +} + +/* + * This is the big-endian unsigned representation of the product of + * all small primes from 13 to 1481. + */ +static const unsigned char SMALL_PRIMES[] = { + 0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A, + 0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7, + 0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37, + 0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5, + 0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E, + 0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6, + 0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C, + 0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40, + 0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50, + 0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7, + 0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3, + 0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E, + 0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC, + 0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08, + 0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B, + 0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22, + 0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77, + 0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E, + 0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80, + 0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8, + 0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2, + 0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC, + 0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54, + 0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74, + 0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C, + 0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD +}; + +/* + * We need temporary values for at least 7 integers of the same size + * as a factor (including header word); more space helps with performance + * (in modular exponentiations), but we much prefer to remain under + * 2 kilobytes in total, to save stack space. The macro TEMPS below + * exceeds 512 (which is a count in 32-bit words) when BR_MAX_RSA_SIZE + * is greater than 4464 (default value is 4096, so the 2-kB limit is + * maintained unless BR_MAX_RSA_SIZE was modified). + */ +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define ROUND2(x) ((((x) + 1) >> 1) << 1) + +#define TEMPS MAX(512, ROUND2(7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 61) / 31))) + +/* + * Perform trial division on a candidate prime. This computes + * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The + * br_i31_moddiv() function will report an error if y is not invertible + * modulo x. Returned value is 1 on success (none of the small primes + * divides x), 0 on error (a non-trivial GCD is obtained). + * + * This function assumes that x is odd. + */ +static uint32_t +trial_divisions(const uint32_t *x, uint32_t *t) +{ + uint32_t *y; + uint32_t x0i; + + y = t; + t += 1 + ((x[0] + 31) >> 5); + x0i = br_i31_ninv31(x[1]); + br_i31_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x); + return br_i31_moddiv(y, y, x, x0i, t); +} + +/* + * Perform n rounds of Miller-Rabin on the candidate prime x. This + * function assumes that x = 3 mod 4. + * + * Returned value is 1 on success (all rounds completed successfully), + * 0 otherwise. + */ +static uint32_t +miller_rabin(const br_prng_class **rng, const uint32_t *x, int n, + uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31) +{ + /* + * Since x = 3 mod 4, the Miller-Rabin test is simple: + * - get a random base a (such that 1 < a < x-1) + * - compute z = a^((x-1)/2) mod x + * - if z != 1 and z != x-1, the number x is composite + * + * We generate bases 'a' randomly with a size which is + * one bit less than x, which ensures that a < x-1. It + * is not useful to verify that a > 1 because the probability + * that we get a value a equal to 0 or 1 is much smaller + * than the probability of our Miller-Rabin tests not to + * detect a composite, which is already quite smaller than the + * probability of the hardware misbehaving and return a + * composite integer because of some glitch (e.g. bad RAM + * or ill-timed cosmic ray). + */ + unsigned char *xm1d2; + size_t xlen, xm1d2_len, xm1d2_len_u32, u; + uint32_t asize; + unsigned cc; + uint32_t x0i; + + /* + * Compute (x-1)/2 (encoded). + */ + xm1d2 = (unsigned char *)t; + xm1d2_len = ((x[0] - (x[0] >> 5)) + 7) >> 3; + br_i31_encode(xm1d2, xm1d2_len, x); + cc = 0; + for (u = 0; u < xm1d2_len; u ++) { + unsigned w; + + w = xm1d2[u]; + xm1d2[u] = (unsigned char)((w >> 1) | cc); + cc = w << 7; + } + + /* + * We used some words of the provided buffer for (x-1)/2. + */ + xm1d2_len_u32 = (xm1d2_len + 3) >> 2; + t += xm1d2_len_u32; + tlen -= xm1d2_len_u32; + + xlen = (x[0] + 31) >> 5; + asize = x[0] - 1 - EQ0(x[0] & 31); + x0i = br_i31_ninv31(x[1]); + while (n -- > 0) { + uint32_t *a, *t2; + uint32_t eq1, eqm1; + size_t t2len; + + /* + * Generate a random base. We don't need the base to be + * really uniform modulo x, so we just get a random + * number which is one bit shorter than x. + */ + a = t; + a[0] = x[0]; + a[xlen] = 0; + mkrand(rng, a, asize); + + /* + * Compute a^((x-1)/2) mod x. We assume here that the + * function will not fail (the temporary array is large + * enough). + */ + t2 = t + 1 + xlen; + t2len = tlen - 1 - xlen; + if ((t2len & 1) != 0) { + /* + * Since the source array is 64-bit aligned and + * has an even number of elements (TEMPS), we + * can use the parity of the remaining length to + * detect and adjust alignment. + */ + t2 ++; + t2len --; + } + mp31(a, xm1d2, xm1d2_len, x, x0i, t2, t2len); + + /* + * We must obtain either 1 or x-1. Note that x is odd, + * hence x-1 differs from x only in its low word (no + * carry). + */ + eq1 = a[1] ^ 1; + eqm1 = a[1] ^ (x[1] - 1); + for (u = 2; u <= xlen; u ++) { + eq1 |= a[u]; + eqm1 |= a[u] ^ x[u]; + } + + if ((EQ0(eq1) | EQ0(eqm1)) == 0) { + return 0; + } + } + return 1; +} + +/* + * Create a random prime of the provided size. 'size' is the _encoded_ + * bit length. The two top bits and the two bottom bits are set to 1. + */ +static void +mkprime(const br_prng_class **rng, uint32_t *x, uint32_t esize, + uint32_t pubexp, uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31) +{ + size_t len; + + x[0] = esize; + len = (esize + 31) >> 5; + for (;;) { + size_t u; + uint32_t m3, m5, m7, m11; + int rounds, s7, s11; + + /* + * Generate random bits. We force the two top bits and the + * two bottom bits to 1. + */ + mkrand(rng, x, esize); + if ((esize & 31) == 0) { + x[len] |= 0x60000000; + } else if ((esize & 31) == 1) { + x[len] |= 0x00000001; + x[len - 1] |= 0x40000000; + } else { + x[len] |= 0x00000003 << ((esize & 31) - 2); + } + x[1] |= 0x00000003; + + /* + * Trial division with low primes (3, 5, 7 and 11). We + * use the following properties: + * + * 2^2 = 1 mod 3 + * 2^4 = 1 mod 5 + * 2^3 = 1 mod 7 + * 2^10 = 1 mod 11 + */ + m3 = 0; + m5 = 0; + m7 = 0; + m11 = 0; + s7 = 0; + s11 = 0; + for (u = 0; u < len; u ++) { + uint32_t w, w3, w5, w7, w11; + + w = x[1 + u]; + w3 = (w & 0xFFFF) + (w >> 16); /* max: 98302 */ + w5 = (w & 0xFFFF) + (w >> 16); /* max: 98302 */ + w7 = (w & 0x7FFF) + (w >> 15); /* max: 98302 */ + w11 = (w & 0xFFFFF) + (w >> 20); /* max: 1050622 */ + + m3 += w3 << (u & 1); + m3 = (m3 & 0xFF) + (m3 >> 8); /* max: 1025 */ + + m5 += w5 << ((4 - u) & 3); + m5 = (m5 & 0xFFF) + (m5 >> 12); /* max: 4479 */ + + m7 += w7 << s7; + m7 = (m7 & 0x1FF) + (m7 >> 9); /* max: 1280 */ + if (++ s7 == 3) { + s7 = 0; + } + + m11 += w11 << s11; + if (++ s11 == 10) { + s11 = 0; + } + m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 526847 */ + } + + m3 = (m3 & 0x3F) + (m3 >> 6); /* max: 78 */ + m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 18 */ + m3 = ((m3 * 43) >> 5) & 3; + + m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 271 */ + m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 31 */ + m5 -= 20 & -GT(m5, 19); + m5 -= 10 & -GT(m5, 9); + m5 -= 5 & -GT(m5, 4); + + m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 82 */ + m7 = (m7 & 0x07) + (m7 >> 3); /* max: 16 */ + m7 = ((m7 * 147) >> 7) & 7; + + /* + * 2^5 = 32 = -1 mod 11. + */ + m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 1536 */ + m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 1023 */ + m11 = (m11 & 0x1F) + 33 - (m11 >> 5); /* max: 64 */ + m11 -= 44 & -GT(m11, 43); + m11 -= 22 & -GT(m11, 21); + m11 -= 11 & -GT(m11, 10); + + /* + * If any of these modulo is 0, then the candidate is + * not prime. Also, if pubexp is 3, 5, 7 or 11, and the + * corresponding modulus is 1, then the candidate must + * be rejected, because we need e to be invertible + * modulo p-1. We can use simple comparisons here + * because they won't leak information on a candidate + * that we keep, only on one that we reject (and is thus + * not secret). + */ + if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) { + continue; + } + if ((pubexp == 3 && m3 == 1) + || (pubexp == 5 && m5 == 1) + || (pubexp == 7 && m7 == 1) + || (pubexp == 11 && m11 == 1)) + { + continue; + } + + /* + * More trial divisions. + */ + if (!trial_divisions(x, t)) { + continue; + } + + /* + * Miller-Rabin algorithm. Since we selected a random + * integer, not a maliciously crafted integer, we can use + * relatively few rounds to lower the risk of a false + * positive (i.e. declaring prime a non-prime) under + * 2^(-80). It is not useful to lower the probability much + * below that, since that would be substantially below + * the probability of the hardware misbehaving. Sufficient + * numbers of rounds are extracted from the Handbook of + * Applied Cryptography, note 4.49 (page 149). + * + * Since we work on the encoded size (esize), we need to + * compare with encoded thresholds. + */ + if (esize < 309) { + rounds = 12; + } else if (esize < 464) { + rounds = 9; + } else if (esize < 670) { + rounds = 6; + } else if (esize < 877) { + rounds = 4; + } else if (esize < 1341) { + rounds = 3; + } else { + rounds = 2; + } + + if (miller_rabin(rng, x, rounds, t, tlen, mp31)) { + return; + } + } +} + +/* + * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided + * as parameter (with announced bit length equal to that of p). This + * function computes d = 1/e mod p-1 (for an odd integer e). Returned + * value is 1 on success, 0 on error (an error is reported if e is not + * invertible modulo p-1). + * + * The temporary buffer (t) must have room for at least 4 integers of + * the size of p. + */ +static uint32_t +invert_pubexp(uint32_t *d, const uint32_t *m, uint32_t e, uint32_t *t) +{ + uint32_t *f; + uint32_t r; + + f = t; + t += 1 + ((m[0] + 31) >> 5); + + /* + * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd. + */ + br_i31_zero(d, m[0]); + d[1] = 1; + br_i31_zero(f, m[0]); + f[1] = e & 0x7FFFFFFF; + f[2] = e >> 31; + r = br_i31_moddiv(d, f, m, br_i31_ninv31(m[1]), t); + + /* + * We really want d = 1/e mod p-1, with p = 2m. By the CRT, + * the result is either the d we got, or d + m. + * + * Let's write e*d = 1 + k*m, for some integer k. Integers e + * and m are odd. If d is odd, then e*d is odd, which implies + * that k must be even; in that case, e*d = 1 + (k/2)*2m, and + * thus d is already fine. Conversely, if d is even, then k + * is odd, and we must add m to d in order to get the correct + * result. + */ + br_i31_add(d, m, (uint32_t)(1 - (d[1] & 1))); + + return r; +} + +/* + * Swap two buffers in RAM. They must be disjoint. + */ +static void +bufswap(void *b1, void *b2, size_t len) +{ + size_t u; + unsigned char *buf1, *buf2; + + buf1 = b1; + buf2 = b2; + for (u = 0; u < len; u ++) { + unsigned w; + + w = buf1[u]; + buf1[u] = buf2[u]; + buf2[u] = w; + } +} + +/* see inner.h */ +uint32_t +br_rsa_i31_keygen_inner(const br_prng_class **rng, + br_rsa_private_key *sk, void *kbuf_priv, + br_rsa_public_key *pk, void *kbuf_pub, + unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31) +{ + uint32_t esize_p, esize_q; + size_t plen, qlen, tlen; + uint32_t *p, *q, *t; + union { + uint32_t t32[TEMPS]; + uint64_t t64[TEMPS >> 1]; /* for 64-bit alignment */ + } tmp; + uint32_t r; + + if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) { + return 0; + } + if (pubexp == 0) { + pubexp = 3; + } else if (pubexp == 1 || (pubexp & 1) == 0) { + return 0; + } + + esize_p = (size + 1) >> 1; + esize_q = size - esize_p; + sk->n_bitlen = size; + sk->p = kbuf_priv; + sk->plen = (esize_p + 7) >> 3; + sk->q = sk->p + sk->plen; + sk->qlen = (esize_q + 7) >> 3; + sk->dp = sk->q + sk->qlen; + sk->dplen = sk->plen; + sk->dq = sk->dp + sk->dplen; + sk->dqlen = sk->qlen; + sk->iq = sk->dq + sk->dqlen; + sk->iqlen = sk->plen; + + if (pk != NULL) { + pk->n = kbuf_pub; + pk->nlen = (size + 7) >> 3; + pk->e = pk->n + pk->nlen; + pk->elen = 4; + br_enc32be(pk->e, pubexp); + while (*pk->e == 0) { + pk->e ++; + pk->elen --; + } + } + + /* + * We now switch to encoded sizes. + * + * floor((x * 16913) / (2^19)) is equal to floor(x/31) for all + * integers x from 0 to 34966; the intermediate product fits on + * 30 bits, thus we can use MUL31(). + */ + esize_p += MUL31(esize_p, 16913) >> 19; + esize_q += MUL31(esize_q, 16913) >> 19; + plen = (esize_p + 31) >> 5; + qlen = (esize_q + 31) >> 5; + p = tmp.t32; + q = p + 1 + plen; + t = q + 1 + qlen; + tlen = ((sizeof tmp.t32) / sizeof(uint32_t)) - (2 + plen + qlen); + + /* + * When looking for primes p and q, we temporarily divide + * candidates by 2, in order to compute the inverse of the + * public exponent. + */ + + for (;;) { + mkprime(rng, p, esize_p, pubexp, t, tlen, mp31); + br_i31_rshift(p, 1); + if (invert_pubexp(t, p, pubexp, t + 1 + plen)) { + br_i31_add(p, p, 1); + p[1] |= 1; + br_i31_encode(sk->p, sk->plen, p); + br_i31_encode(sk->dp, sk->dplen, t); + break; + } + } + + for (;;) { + mkprime(rng, q, esize_q, pubexp, t, tlen, mp31); + br_i31_rshift(q, 1); + if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) { + br_i31_add(q, q, 1); + q[1] |= 1; + br_i31_encode(sk->q, sk->qlen, q); + br_i31_encode(sk->dq, sk->dqlen, t); + break; + } + } + + /* + * If p and q have the same size, then it is possible that q > p + * (when the target modulus size is odd, we generate p with a + * greater bit length than q). If q > p, we want to swap p and q + * (and also dp and dq) for two reasons: + * - The final step below (inversion of q modulo p) is easier if + * p > q. + * - While BearSSL's RSA code is perfectly happy with RSA keys such + * that p < q, some other implementations have restrictions and + * require p > q. + * + * Note that we can do a simple non-constant-time swap here, + * because the only information we leak here is that we insist on + * returning p and q such that p > q, which is not a secret. + */ + if (esize_p == esize_q && br_i31_sub(p, q, 0) == 1) { + bufswap(p, q, (1 + plen) * sizeof *p); + bufswap(sk->p, sk->q, sk->plen); + bufswap(sk->dp, sk->dq, sk->dplen); + } + + /* + * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p. + * + * We ensured that p >= q, so this is just a matter of updating the + * header word for q (and possibly adding an extra word). + * + * Theoretically, the call below may fail, in case we were + * extraordinarily unlucky, and p = q. Another failure case is if + * Miller-Rabin failed us _twice_, and p and q are non-prime and + * have a factor is common. We report the error mostly because it + * is cheap and we can, but in practice this never happens (or, at + * least, it happens way less often than hardware glitches). + */ + q[0] = p[0]; + if (plen > qlen) { + q[plen] = 0; + t ++; + tlen --; + } + br_i31_zero(t, p[0]); + t[1] = 1; + r = br_i31_moddiv(t, q, p, br_i31_ninv31(p[1]), t + 1 + plen); + br_i31_encode(sk->iq, sk->iqlen, t); + + /* + * Compute the public modulus too, if required. + */ + if (pk != NULL) { + br_i31_zero(t, p[0]); + br_i31_mulacc(t, p, q); + br_i31_encode(pk->n, pk->nlen, t); + } + + return r; +} diff --git a/third_party/bearssl/src/rsa_i31_modulus.c b/third_party/bearssl/src/rsa_i31_modulus.c new file mode 100644 index 0000000..f5f997f --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_modulus.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i31_compute_modulus(void *n, const br_rsa_private_key *sk) +{ + uint32_t tmp[4 * (((BR_MAX_RSA_SIZE / 2) + 30) / 31) + 5]; + uint32_t *t, *p, *q; + const unsigned char *pbuf, *qbuf; + size_t nlen, plen, qlen, tlen; + + /* + * Compute actual byte and lengths for p and q. + */ + pbuf = sk->p; + plen = sk->plen; + while (plen > 0 && *pbuf == 0) { + pbuf ++; + plen --; + } + qbuf = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *qbuf == 0) { + qbuf ++; + qlen --; + } + + t = tmp; + tlen = (sizeof tmp) / (sizeof tmp[0]); + + /* + * Decode p. + */ + if ((31 * tlen) < (plen << 3) + 31) { + return 0; + } + br_i31_decode(t, pbuf, plen); + p = t; + plen = (p[0] + 63) >> 5; + t += plen; + tlen -= plen; + + /* + * Decode q. + */ + if ((31 * tlen) < (qlen << 3) + 31) { + return 0; + } + br_i31_decode(t, qbuf, qlen); + q = t; + qlen = (q[0] + 63) >> 5; + t += qlen; + tlen -= qlen; + + /* + * Computation can proceed only if we have enough room for the + * modulus. + */ + if (tlen < (plen + qlen + 1)) { + return 0; + } + + /* + * Private key already contains the modulus bit length, from which + * we can infer the output length. Even if n is NULL, we still had + * to decode p and q to make sure that the product can be computed. + */ + nlen = (sk->n_bitlen + 7) >> 3; + if (n != NULL) { + br_i31_zero(t, p[0]); + br_i31_mulacc(t, p, q); + br_i31_encode(n, nlen, t); + } + return nlen; +} diff --git a/third_party/bearssl/src/rsa_i31_oaep_decrypt.c b/third_party/bearssl/src/rsa_i31_oaep_decrypt.c new file mode 100644 index 0000000..06fdd93 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_oaep_decrypt.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_oaep_decrypt(const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_private_key *sk, void *data, size_t *len) +{ + uint32_t r; + + if (*len != ((sk->n_bitlen + 7) >> 3)) { + return 0; + } + r = br_rsa_i31_private(data, sk); + r &= br_rsa_oaep_unpad(dig, label, label_len, data, len); + return r; +} diff --git a/third_party/bearssl/src/rsa_i31_oaep_encrypt.c b/third_party/bearssl/src/rsa_i31_oaep_encrypt.c new file mode 100644 index 0000000..367008c --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_oaep_encrypt.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i31_oaep_encrypt( + const br_prng_class **rnd, const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_public_key *pk, + void *dst, size_t dst_max_len, + const void *src, size_t src_len) +{ + size_t dlen; + + dlen = br_rsa_oaep_pad(rnd, dig, label, label_len, + pk, dst, dst_max_len, src, src_len); + if (dlen == 0) { + return 0; + } + return dlen & -(size_t)br_rsa_i31_public(dst, dlen, pk); +} diff --git a/third_party/bearssl/src/rsa_i31_pkcs1_sign.c b/third_party/bearssl/src/rsa_i31_pkcs1_sign.c new file mode 100644 index 0000000..784d3c2 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_pkcs1_sign.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; + } + return br_rsa_i31_private(x, sk); +} diff --git a/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c new file mode 100644 index 0000000..e79a002 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_pkcs1_vrfy(const unsigned char *x, size_t xlen, + const unsigned char *hash_oid, size_t hash_len, + const br_rsa_public_key *pk, unsigned char *hash_out) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i31_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); +} diff --git a/third_party/bearssl/src/rsa_i31_priv.c b/third_party/bearssl/src/rsa_i31_priv.c new file mode 100644 index 0000000..b1e1244 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_priv.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define U (2 + ((BR_MAX_RSA_FACTOR + 30) / 31)) +#define TLEN (8 * U) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk) +{ + const unsigned char *p, *q; + size_t plen, qlen; + size_t fwlen; + uint32_t p0i, q0i; + size_t xlen, u; + uint32_t tmp[1 + TLEN]; + long z; + uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + uint32_t r; + + /* + * Compute the actual lengths of p and q, in bytes. + * These lengths are not considered secret (we cannot really hide + * them anyway in constant-time code). + */ + p = sk->p; + plen = sk->plen; + while (plen > 0 && *p == 0) { + p ++; + plen --; + } + q = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *q == 0) { + q ++; + qlen --; + } + + /* + * Compute the maximum factor length, in words. + */ + z = (long)(plen > qlen ? plen : qlen) << 3; + fwlen = 1; + while (z > 0) { + z -= 31; + fwlen ++; + } + + /* + * Round up the word length to an even number. + */ + fwlen += (fwlen & 1); + + /* + * We need to fit at least 6 values in the stack buffer. + */ + if (6 * fwlen > TLEN) { + return 0; + } + + /* + * Compute modulus length (in bytes). + */ + xlen = (sk->n_bitlen + 7) >> 3; + + /* + * Decode q. + */ + mq = tmp; + br_i31_decode(mq, q, qlen); + + /* + * Decode p. + */ + t1 = mq + fwlen; + br_i31_decode(t1, p, plen); + + /* + * Compute the modulus (product of the two factors), to compare + * it with the source value. We use br_i31_mulacc(), since it's + * already used later on. + */ + t2 = mq + 2 * fwlen; + br_i31_zero(t2, mq[0]); + br_i31_mulacc(t2, mq, t1); + + /* + * We encode the modulus into bytes, to perform the comparison + * with bytes. We know that the product length, in bytes, is + * exactly xlen. + * The comparison actually computes the carry when subtracting + * the modulus from the source value; that carry must be 1 for + * a value in the correct range. We keep it in r, which is our + * accumulator for the error code. + */ + t3 = mq + 4 * fwlen; + br_i31_encode(t3, xlen, t2); + u = xlen; + r = 0; + while (u > 0) { + uint32_t wn, wx; + + u --; + wn = ((unsigned char *)t3)[u]; + wx = x[u]; + r = ((wx - (wn + r)) >> 8) & 1; + } + + /* + * Move the decoded p to another temporary buffer. + */ + mp = mq + 2 * fwlen; + memmove(mp, t1, fwlen * sizeof *t1); + + /* + * Compute s2 = x^dq mod q. + */ + q0i = br_i31_ninv31(mq[1]); + s2 = mq + fwlen; + br_i31_decode_reduce(s2, x, xlen, mq); + r &= br_i31_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i, + mq + 3 * fwlen, TLEN - 3 * fwlen); + + /* + * Compute s1 = x^dp mod p. + */ + p0i = br_i31_ninv31(mp[1]); + s1 = mq + 3 * fwlen; + br_i31_decode_reduce(s1, x, xlen, mp); + r &= br_i31_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i, + mq + 4 * fwlen, TLEN - 4 * fwlen); + + /* + * Compute: + * h = (s1 - s2)*(1/q) mod p + * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is + * unclear about whether p may be lower than q (some existing, + * widely deployed implementations of RSA don't tolerate p < q), + * but we want to support that occurrence, so we need to use the + * reduction function. + * + * Since we use br_i31_decode_reduce() for iq (purportedly, the + * inverse of q modulo p), we also tolerate improperly large + * values for this parameter. + */ + t1 = mq + 4 * fwlen; + t2 = mq + 5 * fwlen; + br_i31_reduce(t2, s2, mp); + br_i31_add(s1, mp, br_i31_sub(s1, t2, 1)); + br_i31_to_monty(s1, mp); + br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp); + br_i31_montymul(t2, s1, t1, mp, p0i); + + /* + * h is now in t2. We compute the final result: + * s = s2 + q*h + * All these operations are non-modular. + * + * We need mq, s2 and t2. We use the t3 buffer as destination. + * The buffers mp, s1 and t1 are no longer needed, so we can + * reuse them for t3. Moreover, the first step of the computation + * is to copy s2 into t3, after which s2 is not needed. Right + * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5. + * Therefore, we have ample room for t3 by simply using s2. + */ + t3 = s2; + br_i31_mulacc(t3, mq, t2); + + /* + * Encode the result. Since we already checked the value of xlen, + * we can just use it right away. + */ + br_i31_encode(x, xlen, t3); + + /* + * The only error conditions remaining at that point are invalid + * values for p and q (even integers). + */ + return p0i & q0i & r; +} diff --git a/third_party/bearssl/src/rsa_i31_privexp.c b/third_party/bearssl/src/rsa_i31_privexp.c new file mode 100644 index 0000000..eee62a0 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_privexp.c @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i31_compute_privexp(void *d, + const br_rsa_private_key *sk, uint32_t e) +{ + /* + * We want to invert e modulo phi = (p-1)(q-1). This first + * requires computing phi, which is easy since we have the factors + * p and q in the private key structure. + * + * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer. + * We could invert e modulo phi/4 then patch the result to + * modulo phi, but this would involve assembling three modulus-wide + * values (phi/4, 1 and e) and calling moddiv, that requires + * three more temporaries, for a total of six big integers, or + * slightly more than 3 kB of stack space for RSA-4096. This + * exceeds our stack requirements. + * + * Instead, we first use one step of the extended GCD: + * + * - We compute phi = k*e + r (Euclidean division of phi by e). + * If public exponent e is correct, then r != 0 (e must be + * invertible modulo phi). We also have k != 0 since we + * enforce non-ridiculously-small factors. + * + * - We find small u, v such that u*e - v*r = 1 (using a + * binary GCD; we can arrange for u < r and v < e, i.e. all + * values fit on 32 bits). + * + * - Solution is: d = u + v*k + * This last computation is exact: since u < r and v < e, + * the above implies d < r + e*((phi-r)/e) = phi + */ + + uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12]; + uint32_t *p, *q, *k, *m, *z, *phi; + const unsigned char *pbuf, *qbuf; + size_t plen, qlen, u, len, dlen; + uint32_t r, a, b, u0, v0, u1, v1, he, hr; + int i; + + /* + * Check that e is correct. + */ + if (e < 3 || (e & 1) == 0) { + return 0; + } + + /* + * Check lengths of p and q, and that they are both odd. + */ + pbuf = sk->p; + plen = sk->plen; + while (plen > 0 && *pbuf == 0) { + pbuf ++; + plen --; + } + if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8) + || (pbuf[plen - 1] & 1) != 1) + { + return 0; + } + qbuf = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *qbuf == 0) { + qbuf ++; + qlen --; + } + if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8) + || (qbuf[qlen - 1] & 1) != 1) + { + return 0; + } + + /* + * Output length is that of the modulus. + */ + dlen = (sk->n_bitlen + 7) >> 3; + if (d == NULL) { + return dlen; + } + + p = tmp; + br_i31_decode(p, pbuf, plen); + plen = (p[0] + 31) >> 5; + q = p + 1 + plen; + br_i31_decode(q, qbuf, qlen); + qlen = (q[0] + 31) >> 5; + + /* + * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that + * we do not need anymore). The mulacc function sets the announced + * bit length of t to be the sum of the announced bit lengths of + * p-1 and q-1, which is usually exact but may overshoot by one 1 + * bit in some cases; we readjust it to its true length. + */ + p[1] --; + q[1] --; + phi = q + 1 + qlen; + br_i31_zero(phi, p[0]); + br_i31_mulacc(phi, p, q); + len = (phi[0] + 31) >> 5; + memmove(tmp, phi, (1 + len) * sizeof *phi); + phi = tmp; + phi[0] = br_i31_bit_length(phi + 1, len); + len = (phi[0] + 31) >> 5; + + /* + * Divide phi by public exponent e. The final remainder r must be + * non-zero (otherwise, the key is invalid). The quotient is k, + * which we write over phi, since we don't need phi after that. + */ + r = 0; + for (u = len; u >= 1; u --) { + /* + * Upon entry, r < e, and phi[u] < 2^31; hence, + * hi:lo < e*2^31. Thus, the produced word k[u] + * must be lower than 2^31, and the new remainder r + * is lower than e. + */ + uint32_t hi, lo; + + hi = r >> 1; + lo = (r << 31) + phi[u]; + phi[u] = br_divrem(hi, lo, e, &r); + } + if (r == 0) { + return 0; + } + k = phi; + + /* + * Compute u and v such that u*e - v*r = GCD(e,r). We use + * a binary GCD algorithm, with 6 extra integers a, b, + * u0, u1, v0 and v1. Initial values are: + * a = e u0 = 1 v0 = 0 + * b = r u1 = r v1 = e-1 + * The following invariants are maintained: + * a = u0*e - v0*r + * b = u1*e - v1*r + * 0 < a <= e + * 0 < b <= r + * 0 <= u0 <= r + * 0 <= v0 <= e + * 0 <= u1 <= r + * 0 <= v1 <= e + * + * At each iteration, we reduce either a or b by one bit, and + * adjust u0, u1, v0 and v1 to maintain the invariants: + * - if a is even, then a <- a/2 + * - otherwise, if b is even, then b <- b/2 + * - otherwise, if a > b, then a <- (a-b)/2 + * - otherwise, if b > a, then b <- (b-a)/2 + * Algorithm stops when a = b. At that point, the common value + * is the GCD of e and r; it must be 1 (otherwise, the private + * key or public exponent is not valid). The (u0,v0) or (u1,v1) + * pairs are the solution we are looking for. + * + * Since either a or b is reduced by at least 1 bit at each + * iteration, 62 iterations are enough to reach the end + * condition. + * + * To maintain the invariants, we must compute the same operations + * on the u* and v* values that we do on a and b: + * - When a is divided by 2, u0 and v0 must be divided by 2. + * - When b is divided by 2, u1 and v1 must be divided by 2. + * - When b is subtracted from a, u1 and v1 are subtracted from + * u0 and v0, respectively. + * - When a is subtracted from b, u0 and v0 are subtracted from + * u1 and v1, respectively. + * + * However, we want to keep the u* and v* values in their proper + * ranges. The following remarks apply: + * + * - When a is divided by 2, then a is even. Therefore: + * + * * If r is odd, then u0 and v0 must have the same parity; + * if they are both odd, then adding r to u0 and e to v0 + * makes them both even, and the division by 2 brings them + * back to the proper range. + * + * * If r is even, then u0 must be even; if v0 is odd, then + * adding r to u0 and e to v0 makes them both even, and the + * division by 2 brings them back to the proper range. + * + * Thus, all we need to do is to look at the parity of v0, + * and add (r,e) to (u0,v0) when v0 is odd. In order to avoid + * a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the + * division (r+1 does not overflow since r < e; and (e/2)+1 + * is equal to (e+1)/2 since e is odd). + * + * - When we subtract b from a, three cases may occur: + * + * * u1 <= u0 and v1 <= v0: just do the subtractions + * + * * u1 > u0 and v1 > v0: compute: + * (u0, v0) <- (u0 + r - u1, v0 + e - v1) + * + * * u1 <= u0 and v1 > v0: compute: + * (u0, v0) <- (u0 + r - u1, v0 + e - v1) + * + * The fourth case (u1 > u0 and v1 <= v0) is not possible + * because it would contradict "b < a" (which is the reason + * why we subtract b from a). + * + * The tricky case is the third one: from the equations, it + * seems that u0 may go out of range. However, the invariants + * and ranges of other values imply that, in that case, the + * new u0 does not actually exceed the range. + * + * We can thus handle the subtraction by adding (r,e) based + * solely on the comparison between v0 and v1. + */ + a = e; + b = r; + u0 = 1; + v0 = 0; + u1 = r; + v1 = e - 1; + hr = (r + 1) >> 1; + he = (e >> 1) + 1; + for (i = 0; i < 62; i ++) { + uint32_t oa, ob, agtb, bgta; + uint32_t sab, sba, da, db; + uint32_t ctl; + + oa = a & 1; /* 1 if a is odd */ + ob = b & 1; /* 1 if b is odd */ + agtb = GT(a, b); /* 1 if a > b */ + bgta = GT(b, a); /* 1 if b > a */ + + sab = oa & ob & agtb; /* 1 if a <- a-b */ + sba = oa & ob & bgta; /* 1 if b <- b-a */ + + /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */ + ctl = GT(v1, v0); + a -= b & -sab; + u0 -= (u1 - (r & -ctl)) & -sab; + v0 -= (v1 - (e & -ctl)) & -sab; + + /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */ + ctl = GT(v0, v1); + b -= a & -sba; + u1 -= (u0 - (r & -ctl)) & -sba; + v1 -= (v0 - (e & -ctl)) & -sba; + + da = NOT(oa) | sab; /* 1 if a <- a/2 */ + db = (oa & NOT(ob)) | sba; /* 1 if b <- b/2 */ + + /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */ + ctl = v0 & 1; + a ^= (a ^ (a >> 1)) & -da; + u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da; + v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da; + + /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */ + ctl = v1 & 1; + b ^= (b ^ (b >> 1)) & -db; + u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db; + v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db; + } + + /* + * Check that the GCD is indeed 1. If not, then the key is invalid + * (and there's no harm in leaking that piece of information). + */ + if (a != 1) { + return 0; + } + + /* + * Now we have u0*e - v0*r = 1. Let's compute the result as: + * d = u0 + v0*k + * We still have k in the tmp[] array, and its announced bit + * length is that of phi. + */ + m = k + 1 + len; + m[0] = (1 << 5) + 1; /* bit length is 32 bits, encoded */ + m[1] = v0 & 0x7FFFFFFF; + m[2] = v0 >> 31; + z = m + 3; + br_i31_zero(z, k[0]); + z[1] = u0 & 0x7FFFFFFF; + z[2] = u0 >> 31; + br_i31_mulacc(z, k, m); + + /* + * Encode the result. + */ + br_i31_encode(d, dlen, z); + return dlen; +} diff --git a/third_party/bearssl/src/rsa_i31_pss_sign.c b/third_party/bearssl/src/rsa_i31_pss_sign.c new file mode 100644 index 0000000..b06f3e2 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_pss_sign.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_pss_sign(const br_prng_class **rng, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash, + salt_len, sk->n_bitlen, x)) + { + return 0; + } + return br_rsa_i31_private(x, sk); +} diff --git a/third_party/bearssl/src/rsa_i31_pss_vrfy.c b/third_party/bearssl/src/rsa_i31_pss_vrfy.c new file mode 100644 index 0000000..77a9b28 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_pss_vrfy.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_pss_vrfy(const unsigned char *x, size_t xlen, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const void *hash, size_t salt_len, const br_rsa_public_key *pk) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i31_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pss_sig_unpad(hf_data, hf_mgf1, + hash, salt_len, pk, sig); +} diff --git a/third_party/bearssl/src/rsa_i31_pub.c b/third_party/bearssl/src/rsa_i31_pub.c new file mode 100644 index 0000000..d5f3fe2 --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_pub.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * As a strict minimum, we need four buffers that can hold a + * modular integer. + */ +#define TLEN (4 * (2 + ((BR_MAX_RSA_SIZE + 30) / 31))) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_public(unsigned char *x, size_t xlen, + const br_rsa_public_key *pk) +{ + const unsigned char *n; + size_t nlen; + uint32_t tmp[1 + TLEN]; + uint32_t *m, *a, *t; + size_t fwlen; + long z; + uint32_t m0i, r; + + /* + * Get the actual length of the modulus, and see if it fits within + * our stack buffer. We also check that the length of x[] is valid. + */ + n = pk->n; + nlen = pk->nlen; + while (nlen > 0 && *n == 0) { + n ++; + nlen --; + } + if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) { + return 0; + } + z = (long)nlen << 3; + fwlen = 1; + while (z > 0) { + z -= 31; + fwlen ++; + } + /* + * Round up length to an even number. + */ + fwlen += (fwlen & 1); + + /* + * The modulus gets decoded into m[]. + * The value to exponentiate goes into a[]. + * The temporaries for modular exponentiation are in t[]. + */ + m = tmp; + a = m + fwlen; + t = m + 2 * fwlen; + + /* + * Decode the modulus. + */ + br_i31_decode(m, n, nlen); + m0i = br_i31_ninv31(m[1]); + + /* + * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be + * an odd integer. + */ + r = m0i & 1; + + /* + * Decode x[] into a[]; we also check that its value is proper. + */ + r &= br_i31_decode_mod(a, x, xlen, m); + + /* + * Compute the modular exponentiation. + */ + br_i31_modpow_opt(a, pk->e, pk->elen, m, m0i, t, TLEN - 2 * fwlen); + + /* + * Encode the result. + */ + br_i31_encode(x, xlen, a); + return r; +} diff --git a/third_party/bearssl/src/rsa_i31_pubexp.c b/third_party/bearssl/src/rsa_i31_pubexp.c new file mode 100644 index 0000000..f26537d --- /dev/null +++ b/third_party/bearssl/src/rsa_i31_pubexp.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Recompute public exponent, based on factor p and reduced private + * exponent dp. + */ +static uint32_t +get_pubexp(const unsigned char *pbuf, size_t plen, + const unsigned char *dpbuf, size_t dplen) +{ + /* + * dp is the inverse of e modulo p-1. If p = 3 mod 4, then + * p-1 = 2*((p-1)/2). Taken modulo 2, e is odd and has inverse 1; + * thus, dp must be odd. + * + * We compute the inverse of dp modulo (p-1)/2. This requires + * first reducing dp modulo (p-1)/2 (this can be done with a + * conditional subtract, no need to use the generic modular + * reduction function); then, we use moddiv. + */ + + uint32_t tmp[6 * ((BR_MAX_RSA_FACTOR + 61) / 31)]; + uint32_t *p, *dp, *x; + size_t len; + uint32_t e; + + /* + * Compute actual factor length (in bytes) and check that it fits + * under our size constraints. + */ + while (plen > 0 && *pbuf == 0) { + pbuf ++; + plen --; + } + if (plen == 0 || plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)) { + return 0; + } + + /* + * Compute actual reduced exponent length (in bytes) and check that + * it is not longer than p. + */ + while (dplen > 0 && *dpbuf == 0) { + dpbuf ++; + dplen --; + } + if (dplen > plen || dplen == 0 + || (dplen == plen && dpbuf[0] > pbuf[0])) + { + return 0; + } + + /* + * Verify that p = 3 mod 4 and that dp is odd. + */ + if ((pbuf[plen - 1] & 3) != 3 || (dpbuf[dplen - 1] & 1) != 1) { + return 0; + } + + /* + * Decode p and compute (p-1)/2. + */ + p = tmp; + br_i31_decode(p, pbuf, plen); + len = (p[0] + 63) >> 5; + br_i31_rshift(p, 1); + + /* + * Decode dp and make sure its announced bit length matches that of + * p (we already know that the size of dp, in bits, does not exceed + * the size of p, so we just have to copy the header word). + */ + dp = p + len; + memset(dp, 0, len * sizeof *dp); + br_i31_decode(dp, dpbuf, dplen); + dp[0] = p[0]; + + /* + * Subtract (p-1)/2 from dp if necessary. + */ + br_i31_sub(dp, p, NOT(br_i31_sub(dp, p, 0))); + + /* + * If another subtraction is needed, then this means that the + * value was invalid. We don't care to leak information about + * invalid keys. + */ + if (br_i31_sub(dp, p, 0) == 0) { + return 0; + } + + /* + * Invert dp modulo (p-1)/2. If the inversion fails, then the + * key value was invalid. + */ + x = dp + len; + br_i31_zero(x, p[0]); + x[1] = 1; + if (br_i31_moddiv(x, dp, p, br_i31_ninv31(p[1]), x + len) == 0) { + return 0; + } + + /* + * We now have an inverse. We must set it to zero (error) if its + * length is greater than 32 bits and/or if it is an even integer. + * Take care that the bit_length function returns an encoded + * bit length. + */ + e = (uint32_t)x[1] | ((uint32_t)x[2] << 31); + e &= -LT(br_i31_bit_length(x + 1, len - 1), 34); + e &= -(e & 1); + return e; +} + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_compute_pubexp(const br_rsa_private_key *sk) +{ + /* + * Get the public exponent from both p and q. This is the right + * exponent if we get twice the same value. + */ + uint32_t ep, eq; + + ep = get_pubexp(sk->p, sk->plen, sk->dp, sk->dplen); + eq = get_pubexp(sk->q, sk->qlen, sk->dq, sk->dqlen); + return ep & -EQ(ep, eq); +} diff --git a/third_party/bearssl/src/rsa_i32_oaep_decrypt.c b/third_party/bearssl/src/rsa_i32_oaep_decrypt.c new file mode 100644 index 0000000..ecfd92b --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_oaep_decrypt.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_oaep_decrypt(const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_private_key *sk, void *data, size_t *len) +{ + uint32_t r; + + if (*len != ((sk->n_bitlen + 7) >> 3)) { + return 0; + } + r = br_rsa_i32_private(data, sk); + r &= br_rsa_oaep_unpad(dig, label, label_len, data, len); + return r; +} diff --git a/third_party/bearssl/src/rsa_i32_oaep_encrypt.c b/third_party/bearssl/src/rsa_i32_oaep_encrypt.c new file mode 100644 index 0000000..dc17f3f --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_oaep_encrypt.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +size_t +br_rsa_i32_oaep_encrypt( + const br_prng_class **rnd, const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_public_key *pk, + void *dst, size_t dst_max_len, + const void *src, size_t src_len) +{ + size_t dlen; + + dlen = br_rsa_oaep_pad(rnd, dig, label, label_len, + pk, dst, dst_max_len, src, src_len); + if (dlen == 0) { + return 0; + } + return dlen & -(size_t)br_rsa_i32_public(dst, dlen, pk); +} diff --git a/third_party/bearssl/src/rsa_i32_pkcs1_sign.c b/third_party/bearssl/src/rsa_i32_pkcs1_sign.c new file mode 100644 index 0000000..44b6e6d --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_pkcs1_sign.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_pkcs1_sign(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; + } + return br_rsa_i32_private(x, sk); +} diff --git a/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c new file mode 100644 index 0000000..6ee7a19 --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_pkcs1_vrfy(const unsigned char *x, size_t xlen, + const unsigned char *hash_oid, size_t hash_len, + const br_rsa_public_key *pk, unsigned char *hash_out) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i32_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); +} diff --git a/third_party/bearssl/src/rsa_i32_priv.c b/third_party/bearssl/src/rsa_i32_priv.c new file mode 100644 index 0000000..05c22ec --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_priv.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define U (1 + (BR_MAX_RSA_FACTOR >> 5)) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_private(unsigned char *x, const br_rsa_private_key *sk) +{ + const unsigned char *p, *q; + size_t plen, qlen; + uint32_t tmp[6 * U]; + uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + uint32_t p0i, q0i; + size_t xlen, u; + uint32_t r; + + /* + * All our temporary buffers are from the tmp[] array. + * + * The mp, mq, s1, s2, t1 and t2 buffers are large enough to + * contain a RSA factor. The t3 buffer can contain a complete + * RSA modulus. t3 shares its storage space with s2, s1 and t1, + * in that order (this is important, see below). + */ + mq = tmp; + mp = tmp + U; + t2 = tmp + 2 * U; + s2 = tmp + 3 * U; + s1 = tmp + 4 * U; + t1 = tmp + 5 * U; + t3 = s2; + + /* + * Compute the actual lengths (in bytes) of p and q, and check + * that they fit within our stack buffers. + */ + p = sk->p; + plen = sk->plen; + while (plen > 0 && *p == 0) { + p ++; + plen --; + } + q = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *q == 0) { + q ++; + qlen --; + } + if (plen > (BR_MAX_RSA_FACTOR >> 3) + || qlen > (BR_MAX_RSA_FACTOR >> 3)) + { + return 0; + } + + /* + * Decode p and q. + */ + br_i32_decode(mp, p, plen); + br_i32_decode(mq, q, qlen); + + /* + * Recompute modulus, to compare with the source value. + */ + br_i32_zero(t2, mp[0]); + br_i32_mulacc(t2, mp, mq); + xlen = (sk->n_bitlen + 7) >> 3; + br_i32_encode(t2 + 2 * U, xlen, t2); + u = xlen; + r = 0; + while (u > 0) { + uint32_t wn, wx; + + u --; + wn = ((unsigned char *)(t2 + 2 * U))[u]; + wx = x[u]; + r = ((wx - (wn + r)) >> 8) & 1; + } + + /* + * Compute s1 = x^dp mod p. + */ + p0i = br_i32_ninv32(mp[1]); + br_i32_decode_reduce(s1, x, xlen, mp); + br_i32_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2); + + /* + * Compute s2 = x^dq mod q. + */ + q0i = br_i32_ninv32(mq[1]); + br_i32_decode_reduce(s2, x, xlen, mq); + br_i32_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2); + + /* + * Compute: + * h = (s1 - s2)*(1/q) mod p + * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is + * unclear about whether p may be lower than q (some existing, + * widely deployed implementations of RSA don't tolerate p < q), + * but we want to support that occurrence, so we need to use the + * reduction function. + * + * Since we use br_i32_decode_reduce() for iq (purportedly, the + * inverse of q modulo p), we also tolerate improperly large + * values for this parameter. + */ + br_i32_reduce(t2, s2, mp); + br_i32_add(s1, mp, br_i32_sub(s1, t2, 1)); + br_i32_to_monty(s1, mp); + br_i32_decode_reduce(t1, sk->iq, sk->iqlen, mp); + br_i32_montymul(t2, s1, t1, mp, p0i); + + /* + * h is now in t2. We compute the final result: + * s = s2 + q*h + * All these operations are non-modular. + * + * We need mq, s2 and t2. We use the t3 buffer as destination. + * The buffers mp, s1 and t1 are no longer needed. Moreover, + * the first step is to copy s2 into the destination buffer t3. + * We thus arranged for t3 to actually share space with s2, and + * to be followed by the space formerly used by s1 and t1. + */ + br_i32_mulacc(t3, mq, t2); + + /* + * Encode the result. Since we already checked the value of xlen, + * we can just use it right away. + */ + br_i32_encode(x, xlen, t3); + + /* + * The only error conditions remaining at that point are invalid + * values for p and q (even integers). + */ + return p0i & q0i & r; +} diff --git a/third_party/bearssl/src/rsa_i32_pss_sign.c b/third_party/bearssl/src/rsa_i32_pss_sign.c new file mode 100644 index 0000000..0f72f92 --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_pss_sign.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_pss_sign(const br_prng_class **rng, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash, + salt_len, sk->n_bitlen, x)) + { + return 0; + } + return br_rsa_i32_private(x, sk); +} diff --git a/third_party/bearssl/src/rsa_i32_pss_vrfy.c b/third_party/bearssl/src/rsa_i32_pss_vrfy.c new file mode 100644 index 0000000..2e70d23 --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_pss_vrfy.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_pss_vrfy(const unsigned char *x, size_t xlen, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const void *hash, size_t salt_len, const br_rsa_public_key *pk) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i32_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pss_sig_unpad(hf_data, hf_mgf1, + hash, salt_len, pk, sig); +} diff --git a/third_party/bearssl/src/rsa_i32_pub.c b/third_party/bearssl/src/rsa_i32_pub.c new file mode 100644 index 0000000..6e8d8e3 --- /dev/null +++ b/third_party/bearssl/src/rsa_i32_pub.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i32_public(unsigned char *x, size_t xlen, + const br_rsa_public_key *pk) +{ + const unsigned char *n; + size_t nlen; + uint32_t m[1 + (BR_MAX_RSA_SIZE >> 5)]; + uint32_t a[1 + (BR_MAX_RSA_SIZE >> 5)]; + uint32_t t1[1 + (BR_MAX_RSA_SIZE >> 5)]; + uint32_t t2[1 + (BR_MAX_RSA_SIZE >> 5)]; + uint32_t m0i, r; + + /* + * Get the actual length of the modulus, and see if it fits within + * our stack buffer. We also check that the length of x[] is valid. + */ + n = pk->n; + nlen = pk->nlen; + while (nlen > 0 && *n == 0) { + n ++; + nlen --; + } + if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) { + return 0; + } + br_i32_decode(m, n, nlen); + m0i = br_i32_ninv32(m[1]); + + /* + * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be + * an odd integer. + */ + r = m0i & 1; + + /* + * Decode x[] into a[]; we also check that its value is proper. + */ + r &= br_i32_decode_mod(a, x, xlen, m); + + /* + * Compute the modular exponentiation. + */ + br_i32_modpow(a, pk->e, pk->elen, m, m0i, t1, t2); + + /* + * Encode the result. + */ + br_i32_encode(x, xlen, a); + return r; +} diff --git a/third_party/bearssl/src/rsa_i62_keygen.c b/third_party/bearssl/src/rsa_i62_keygen.c new file mode 100644 index 0000000..992fe97 --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_keygen.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_keygen(const br_prng_class **rng, + br_rsa_private_key *sk, void *kbuf_priv, + br_rsa_public_key *pk, void *kbuf_pub, + unsigned size, uint32_t pubexp) +{ + return br_rsa_i31_keygen_inner(rng, + sk, kbuf_priv, pk, kbuf_pub, size, pubexp, + &br_i62_modpow_opt_as_i31); +} + +/* see bearssl_rsa.h */ +br_rsa_keygen +br_rsa_i62_keygen_get(void) +{ + return &br_rsa_i62_keygen; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_keygen +br_rsa_i62_keygen_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_oaep_decrypt.c b/third_party/bearssl/src/rsa_i62_oaep_decrypt.c new file mode 100644 index 0000000..38470dd --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_oaep_decrypt.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_oaep_decrypt(const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_private_key *sk, void *data, size_t *len) +{ + uint32_t r; + + if (*len != ((sk->n_bitlen + 7) >> 3)) { + return 0; + } + r = br_rsa_i62_private(data, sk); + r &= br_rsa_oaep_unpad(dig, label, label_len, data, len); + return r; +} + +/* see bearssl_rsa.h */ +br_rsa_oaep_decrypt +br_rsa_i62_oaep_decrypt_get(void) +{ + return &br_rsa_i62_oaep_decrypt; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_oaep_decrypt +br_rsa_i62_oaep_decrypt_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_oaep_encrypt.c b/third_party/bearssl/src/rsa_i62_oaep_encrypt.c new file mode 100644 index 0000000..cf41ecb --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_oaep_encrypt.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +size_t +br_rsa_i62_oaep_encrypt( + const br_prng_class **rnd, const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_public_key *pk, + void *dst, size_t dst_max_len, + const void *src, size_t src_len) +{ + size_t dlen; + + dlen = br_rsa_oaep_pad(rnd, dig, label, label_len, + pk, dst, dst_max_len, src, src_len); + if (dlen == 0) { + return 0; + } + return dlen & -(size_t)br_rsa_i62_public(dst, dlen, pk); +} + +/* see bearssl_rsa.h */ +br_rsa_oaep_encrypt +br_rsa_i62_oaep_encrypt_get(void) +{ + return &br_rsa_i62_oaep_encrypt; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_oaep_encrypt +br_rsa_i62_oaep_encrypt_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_pkcs1_sign.c b/third_party/bearssl/src/rsa_i62_pkcs1_sign.c new file mode 100644 index 0000000..a20a084 --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_pkcs1_sign.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_pkcs1_sign(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; + } + return br_rsa_i62_private(x, sk); +} + +/* see bearssl_rsa.h */ +br_rsa_pkcs1_sign +br_rsa_i62_pkcs1_sign_get(void) +{ + return &br_rsa_i62_pkcs1_sign; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_pkcs1_sign +br_rsa_i62_pkcs1_sign_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c new file mode 100644 index 0000000..6519161 --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_pkcs1_vrfy(const unsigned char *x, size_t xlen, + const unsigned char *hash_oid, size_t hash_len, + const br_rsa_public_key *pk, unsigned char *hash_out) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i62_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); +} + +/* see bearssl_rsa.h */ +br_rsa_pkcs1_vrfy +br_rsa_i62_pkcs1_vrfy_get(void) +{ + return &br_rsa_i62_pkcs1_vrfy; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_pkcs1_vrfy +br_rsa_i62_pkcs1_vrfy_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_priv.c b/third_party/bearssl/src/rsa_i62_priv.c new file mode 100644 index 0000000..f0da600 --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_priv.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +#define U (2 + ((BR_MAX_RSA_FACTOR + 30) / 31)) +#define TLEN (4 * U) /* TLEN is counted in 64-bit words */ + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_private(unsigned char *x, const br_rsa_private_key *sk) +{ + const unsigned char *p, *q; + size_t plen, qlen; + size_t fwlen; + uint32_t p0i, q0i; + size_t xlen, u; + uint64_t tmp[TLEN]; + long z; + uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + uint32_t r; + + /* + * Compute the actual lengths of p and q, in bytes. + * These lengths are not considered secret (we cannot really hide + * them anyway in constant-time code). + */ + p = sk->p; + plen = sk->plen; + while (plen > 0 && *p == 0) { + p ++; + plen --; + } + q = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *q == 0) { + q ++; + qlen --; + } + + /* + * Compute the maximum factor length, in words. + */ + z = (long)(plen > qlen ? plen : qlen) << 3; + fwlen = 1; + while (z > 0) { + z -= 31; + fwlen ++; + } + + /* + * Convert size to 62-bit words. + */ + fwlen = (fwlen + 1) >> 1; + + /* + * We need to fit at least 6 values in the stack buffer. + */ + if (6 * fwlen > TLEN) { + return 0; + } + + /* + * Compute signature length (in bytes). + */ + xlen = (sk->n_bitlen + 7) >> 3; + + /* + * Decode q. + */ + mq = (uint32_t *)tmp; + br_i31_decode(mq, q, qlen); + + /* + * Decode p. + */ + t1 = (uint32_t *)(tmp + fwlen); + br_i31_decode(t1, p, plen); + + /* + * Compute the modulus (product of the two factors), to compare + * it with the source value. We use br_i31_mulacc(), since it's + * already used later on. + */ + t2 = (uint32_t *)(tmp + 2 * fwlen); + br_i31_zero(t2, mq[0]); + br_i31_mulacc(t2, mq, t1); + + /* + * We encode the modulus into bytes, to perform the comparison + * with bytes. We know that the product length, in bytes, is + * exactly xlen. + * The comparison actually computes the carry when subtracting + * the modulus from the source value; that carry must be 1 for + * a value in the correct range. We keep it in r, which is our + * accumulator for the error code. + */ + t3 = (uint32_t *)(tmp + 4 * fwlen); + br_i31_encode(t3, xlen, t2); + u = xlen; + r = 0; + while (u > 0) { + uint32_t wn, wx; + + u --; + wn = ((unsigned char *)t3)[u]; + wx = x[u]; + r = ((wx - (wn + r)) >> 8) & 1; + } + + /* + * Move the decoded p to another temporary buffer. + */ + mp = (uint32_t *)(tmp + 2 * fwlen); + memmove(mp, t1, 2 * fwlen * sizeof *t1); + + /* + * Compute s2 = x^dq mod q. + */ + q0i = br_i31_ninv31(mq[1]); + s2 = (uint32_t *)(tmp + fwlen); + br_i31_decode_reduce(s2, x, xlen, mq); + r &= br_i62_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i, + tmp + 3 * fwlen, TLEN - 3 * fwlen); + + /* + * Compute s1 = x^dp mod p. + */ + p0i = br_i31_ninv31(mp[1]); + s1 = (uint32_t *)(tmp + 3 * fwlen); + br_i31_decode_reduce(s1, x, xlen, mp); + r &= br_i62_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i, + tmp + 4 * fwlen, TLEN - 4 * fwlen); + + /* + * Compute: + * h = (s1 - s2)*(1/q) mod p + * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is + * unclear about whether p may be lower than q (some existing, + * widely deployed implementations of RSA don't tolerate p < q), + * but we want to support that occurrence, so we need to use the + * reduction function. + * + * Since we use br_i31_decode_reduce() for iq (purportedly, the + * inverse of q modulo p), we also tolerate improperly large + * values for this parameter. + */ + t1 = (uint32_t *)(tmp + 4 * fwlen); + t2 = (uint32_t *)(tmp + 5 * fwlen); + br_i31_reduce(t2, s2, mp); + br_i31_add(s1, mp, br_i31_sub(s1, t2, 1)); + br_i31_to_monty(s1, mp); + br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp); + br_i31_montymul(t2, s1, t1, mp, p0i); + + /* + * h is now in t2. We compute the final result: + * s = s2 + q*h + * All these operations are non-modular. + * + * We need mq, s2 and t2. We use the t3 buffer as destination. + * The buffers mp, s1 and t1 are no longer needed, so we can + * reuse them for t3. Moreover, the first step of the computation + * is to copy s2 into t3, after which s2 is not needed. Right + * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5. + * Therefore, we have ample room for t3 by simply using s2. + */ + t3 = s2; + br_i31_mulacc(t3, mq, t2); + + /* + * Encode the result. Since we already checked the value of xlen, + * we can just use it right away. + */ + br_i31_encode(x, xlen, t3); + + /* + * The only error conditions remaining at that point are invalid + * values for p and q (even integers). + */ + return p0i & q0i & r; +} + +/* see bearssl_rsa.h */ +br_rsa_private +br_rsa_i62_private_get(void) +{ + return &br_rsa_i62_private; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_private +br_rsa_i62_private_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_pss_sign.c b/third_party/bearssl/src/rsa_i62_pss_sign.c new file mode 100644 index 0000000..7232f6d --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_pss_sign.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_pss_sign(const br_prng_class **rng, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash, + salt_len, sk->n_bitlen, x)) + { + return 0; + } + return br_rsa_i62_private(x, sk); +} + +/* see bearssl_rsa.h */ +br_rsa_pss_sign +br_rsa_i62_pss_sign_get(void) +{ + return &br_rsa_i62_pss_sign; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_pss_sign +br_rsa_i62_pss_sign_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_pss_vrfy.c b/third_party/bearssl/src/rsa_i62_pss_vrfy.c new file mode 100644 index 0000000..e726e82 --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_pss_vrfy.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_pss_vrfy(const unsigned char *x, size_t xlen, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const void *hash, size_t salt_len, const br_rsa_public_key *pk) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i62_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pss_sig_unpad(hf_data, hf_mgf1, + hash, salt_len, pk, sig); +} + +/* see bearssl_rsa.h */ +br_rsa_pss_vrfy +br_rsa_i62_pss_vrfy_get(void) +{ + return &br_rsa_i62_pss_vrfy; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_pss_vrfy +br_rsa_i62_pss_vrfy_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_i62_pub.c b/third_party/bearssl/src/rsa_i62_pub.c new file mode 100644 index 0000000..70cf61b --- /dev/null +++ b/third_party/bearssl/src/rsa_i62_pub.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* + * As a strict minimum, we need four buffers that can hold a + * modular integer. But TLEN is expressed in 64-bit words. + */ +#define TLEN (2 * (2 + ((BR_MAX_RSA_SIZE + 30) / 31))) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_public(unsigned char *x, size_t xlen, + const br_rsa_public_key *pk) +{ + const unsigned char *n; + size_t nlen; + uint64_t tmp[TLEN]; + uint32_t *m, *a; + size_t fwlen; + long z; + uint32_t m0i, r; + + /* + * Get the actual length of the modulus, and see if it fits within + * our stack buffer. We also check that the length of x[] is valid. + */ + n = pk->n; + nlen = pk->nlen; + while (nlen > 0 && *n == 0) { + n ++; + nlen --; + } + if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) { + return 0; + } + z = (long)nlen << 3; + fwlen = 1; + while (z > 0) { + z -= 31; + fwlen ++; + } + /* + * Convert fwlen to a count in 62-bit words. + */ + fwlen = (fwlen + 1) >> 1; + + /* + * The modulus gets decoded into m[]. + * The value to exponentiate goes into a[]. + */ + m = (uint32_t *)tmp; + a = (uint32_t *)(tmp + fwlen); + + /* + * Decode the modulus. + */ + br_i31_decode(m, n, nlen); + m0i = br_i31_ninv31(m[1]); + + /* + * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be + * an odd integer. + */ + r = m0i & 1; + + /* + * Decode x[] into a[]; we also check that its value is proper. + */ + r &= br_i31_decode_mod(a, x, xlen, m); + + /* + * Compute the modular exponentiation. + */ + br_i62_modpow_opt(a, pk->e, pk->elen, m, m0i, + tmp + 2 * fwlen, TLEN - 2 * fwlen); + + /* + * Encode the result. + */ + br_i31_encode(x, xlen, a); + return r; +} + +/* see bearssl_rsa.h */ +br_rsa_public +br_rsa_i62_public_get(void) +{ + return &br_rsa_i62_public; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_public +br_rsa_i62_public_get(void) +{ + return 0; +} + +#endif diff --git a/third_party/bearssl/src/rsa_oaep_pad.c b/third_party/bearssl/src/rsa_oaep_pad.c new file mode 100644 index 0000000..5327dc2 --- /dev/null +++ b/third_party/bearssl/src/rsa_oaep_pad.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Hash some data. This is put as a separate function so that stack + * allocation of the hash function context is done only for the duration + * of the hash. + */ +static void +hash_data(const br_hash_class *dig, void *dst, const void *src, size_t len) +{ + br_hash_compat_context hc; + + hc.vtable = dig; + dig->init(&hc.vtable); + dig->update(&hc.vtable, src, len); + dig->out(&hc.vtable, dst); +} + +/* see inner.h */ +size_t +br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig, + const void *label, size_t label_len, + const br_rsa_public_key *pk, + void *dst, size_t dst_max_len, + const void *src, size_t src_len) +{ + size_t k, hlen; + unsigned char *buf; + + hlen = br_digest_size(dig); + + /* + * Compute actual modulus length (in bytes). + */ + k = pk->nlen; + while (k > 0 && pk->n[k - 1] == 0) { + k --; + } + + /* + * An error is reported if: + * - the modulus is too short; + * - the source message length is too long; + * - the destination buffer is too short. + */ + if (k < ((hlen << 1) + 2) + || src_len > (k - (hlen << 1) - 2) + || dst_max_len < k) + { + return 0; + } + + /* + * Apply padding. At this point, things cannot fail. + */ + buf = dst; + + /* + * Assemble: DB = lHash || PS || 0x01 || M + * We first place the source message M with memmove(), so that + * overlaps between source and destination buffers are supported. + */ + memmove(buf + k - src_len, src, src_len); + hash_data(dig, buf + 1 + hlen, label, label_len); + memset(buf + 1 + (hlen << 1), 0, k - src_len - (hlen << 1) - 2); + buf[k - src_len - 1] = 0x01; + + /* + * Make the random seed. + */ + (*rnd)->generate(rnd, buf + 1, hlen); + + /* + * Mask DB with the mask generated from the seed. + */ + br_mgf1_xor(buf + 1 + hlen, k - hlen - 1, dig, buf + 1, hlen); + + /* + * Mask the seed with the mask generated from the masked DB. + */ + br_mgf1_xor(buf + 1, hlen, dig, buf + 1 + hlen, k - hlen - 1); + + /* + * Padding result: EM = 0x00 || maskedSeed || maskedDB. + */ + buf[0] = 0x00; + return k; +} diff --git a/third_party/bearssl/src/rsa_oaep_unpad.c b/third_party/bearssl/src/rsa_oaep_unpad.c new file mode 100644 index 0000000..7c4be6a --- /dev/null +++ b/third_party/bearssl/src/rsa_oaep_unpad.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Hash some data and XOR the result into the provided buffer. This is put + * as a separate function so that stack allocation of the hash function + * context is done only for the duration of the hash. + */ +static void +xor_hash_data(const br_hash_class *dig, void *dst, const void *src, size_t len) +{ + br_hash_compat_context hc; + unsigned char tmp[64]; + unsigned char *buf; + size_t u, hlen; + + hc.vtable = dig; + dig->init(&hc.vtable); + dig->update(&hc.vtable, src, len); + dig->out(&hc.vtable, tmp); + buf = dst; + hlen = br_digest_size(dig); + for (u = 0; u < hlen; u ++) { + buf[u] ^= tmp[u]; + } +} + +/* see inner.h */ +uint32_t +br_rsa_oaep_unpad(const br_hash_class *dig, + const void *label, size_t label_len, + void *data, size_t *len) +{ + size_t u, k, hlen; + unsigned char *buf; + uint32_t r, s, zlen; + + hlen = br_digest_size(dig); + k = *len; + buf = data; + + /* + * There must be room for the padding. + */ + if (k < ((hlen << 1) + 2)) { + return 0; + } + + /* + * Unmask the seed, then the DB value. + */ + br_mgf1_xor(buf + 1, hlen, dig, buf + 1 + hlen, k - hlen - 1); + br_mgf1_xor(buf + 1 + hlen, k - hlen - 1, dig, buf + 1, hlen); + + /* + * Hash the label and XOR it with the value in the array; if + * they are equal then these should yield only zeros. + */ + xor_hash_data(dig, buf + 1 + hlen, label, label_len); + + /* + * At that point, if the padding was correct, when we should + * have: 0x00 || seed || 0x00 ... 0x00 0x01 || M + * Padding is valid as long as: + * - There is at least hlen+1 leading bytes of value 0x00. + * - There is at least one non-zero byte. + * - The first (leftmost) non-zero byte has value 0x01. + * + * Ultimately, we may leak the resulting message length, i.e. + * the position of the byte of value 0x01, but we must take care + * to do so only if the number of zero bytes has been verified + * to be at least hlen+1. + * + * The loop below counts the number of bytes of value 0x00, and + * checks that the next byte has value 0x01, in constant-time. + * + * - If the initial byte (before the seed) is not 0x00, then + * r and s are set to 0, and stay there. + * - Value r is 1 until the first non-zero byte is reached + * (after the seed); it switches to 0 at that point. + * - Value s is set to 1 if and only if the data encountered + * at the time of the transition of r from 1 to 0 has value + * exactly 0x01. + * - Value zlen counts the number of leading bytes of value zero + * (after the seed). + */ + r = 1 - ((buf[0] + 0xFF) >> 8); + s = 0; + zlen = 0; + for (u = hlen + 1; u < k; u ++) { + uint32_t w, nz; + + w = buf[u]; + + /* + * nz == 1 only for the first non-zero byte. + */ + nz = r & ((w + 0xFF) >> 8); + s |= nz & EQ(w, 0x01); + r &= NOT(nz); + zlen += r; + } + + /* + * Padding is correct only if s == 1, _and_ zlen >= hlen. + */ + s &= GE(zlen, (uint32_t)hlen); + + /* + * At that point, padding was verified, and we are now allowed + * to make conditional jumps. + */ + if (s) { + size_t plen; + + plen = 2 + hlen + zlen; + k -= plen; + memmove(buf, buf + plen, k); + *len = k; + } + return s; +} diff --git a/third_party/bearssl/src/rsa_pkcs1_sig_pad.c b/third_party/bearssl/src/rsa_pkcs1_sig_pad.c new file mode 100644 index 0000000..06c3bd7 --- /dev/null +++ b/third_party/bearssl/src/rsa_pkcs1_sig_pad.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + uint32_t n_bitlen, unsigned char *x) +{ + size_t u, x3, xlen; + + /* + * Padded hash value has format: + * 00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH + * + * with the following rules: + * + * -- Total length is equal to the modulus length (unsigned + * encoding). + * + * -- There must be at least eight bytes of value 0xFF. + * + * -- x4 is equal to the hash length (hash_len). + * + * -- x3 is equal to the encoded OID value length (hash_oid[0]). + * + * -- x2 = x3 + 4. + * + * -- x1 = x2 + x4 + 4 = x3 + x4 + 8. + * + * Note: the "05 00" is optional (signatures with and without + * that sequence exist in practice), but notes in PKCS#1 seem to + * indicate that the presence of that sequence (specifically, + * an ASN.1 NULL value for the hash parameters) may be slightly + * more "standard" than the opposite. + */ + xlen = (n_bitlen + 7) >> 3; + + if (hash_oid == NULL) { + if (xlen < hash_len + 11) { + return 0; + } + x[0] = 0x00; + x[1] = 0x01; + u = xlen - hash_len; + memset(x + 2, 0xFF, u - 3); + x[u - 1] = 0x00; + } else { + x3 = hash_oid[0]; + + /* + * Check that there is enough room for all the elements, + * including at least eight bytes of value 0xFF. + */ + if (xlen < (x3 + hash_len + 21)) { + return 0; + } + x[0] = 0x00; + x[1] = 0x01; + u = xlen - x3 - hash_len - 11; + memset(x + 2, 0xFF, u - 2); + x[u] = 0x00; + x[u + 1] = 0x30; + x[u + 2] = x3 + hash_len + 8; + x[u + 3] = 0x30; + x[u + 4] = x3 + 4; + x[u + 5] = 0x06; + memcpy(x + u + 6, hash_oid, x3 + 1); + u += x3 + 7; + x[u ++] = 0x05; + x[u ++] = 0x00; + x[u ++] = 0x04; + x[u ++] = hash_len; + } + memcpy(x + u, hash, hash_len); + return 1; +} diff --git a/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c b/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c new file mode 100644 index 0000000..c8ae08f --- /dev/null +++ b/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len, + const unsigned char *hash_oid, size_t hash_len, + unsigned char *hash_out) +{ + static const unsigned char pad1[] = { + 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF + }; + + unsigned char pad2[43]; + size_t u, x2, x3, pad_len, zlen; + + if (sig_len < 11) { + return 0; + } + + /* + * Expected format: + * 00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH + * + * with the following rules: + * + * -- Total length is that of the modulus and the signature + * (this was already verified by br_rsa_i31_public()). + * + * -- There are at least eight bytes of value 0xFF. + * + * -- x4 is equal to the hash length (hash_len). + * + * -- x3 is equal to the encoded OID value length (so x3 is the + * first byte of hash_oid[]). + * + * -- If the "05 00" is present, then x2 == x3 + 4; otherwise, + * x2 == x3 + 2. + * + * -- x1 == x2 + x4 + 4. + * + * So the total length after the last "FF" is either x3 + x4 + 11 + * (with the "05 00") or x3 + x4 + 9 (without the "05 00"). + */ + + /* + * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes. + * The comparison is valid because we made sure that the signature + * is at least 11 bytes long. + */ + if (memcmp(sig, pad1, sizeof pad1) != 0) { + return 0; + } + for (u = sizeof pad1; u < sig_len; u ++) { + if (sig[u] != 0xFF) { + break; + } + } + + /* + * Remaining length is sig_len - u bytes (including the 00 just + * after the last FF). This must be equal to one of the two + * possible values (depending on whether the "05 00" sequence is + * present or not). + */ + if (hash_oid == NULL) { + if (sig_len - u != hash_len + 1 || sig[u] != 0x00) { + return 0; + } + } else { + x3 = hash_oid[0]; + pad_len = x3 + 9; + memset(pad2, 0, pad_len); + zlen = sig_len - u - hash_len; + if (zlen == pad_len) { + x2 = x3 + 2; + } else if (zlen == pad_len + 2) { + x2 = x3 + 4; + pad_len = zlen; + pad2[pad_len - 4] = 0x05; + } else { + return 0; + } + pad2[1] = 0x30; + pad2[2] = x2 + hash_len + 4; + pad2[3] = 0x30; + pad2[4] = x2; + pad2[5] = 0x06; + memcpy(pad2 + 6, hash_oid, x3 + 1); + pad2[pad_len - 2] = 0x04; + pad2[pad_len - 1] = hash_len; + if (memcmp(pad2, sig + u, pad_len) != 0) { + return 0; + } + } + memcpy(hash_out, sig + sig_len - hash_len, hash_len); + return 1; +} diff --git a/third_party/bearssl/src/rsa_pss_sig_pad.c b/third_party/bearssl/src/rsa_pss_sig_pad.c new file mode 100644 index 0000000..13e9027 --- /dev/null +++ b/third_party/bearssl/src/rsa_pss_sig_pad.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_rsa_pss_sig_pad(const br_prng_class **rng, + const br_hash_class *hf_data, const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + uint32_t n_bitlen, unsigned char *x) +{ + size_t xlen, hash_len; + br_hash_compat_context hc; + unsigned char *salt, *seed; + + hash_len = br_digest_size(hf_data); + + /* + * The padded string is one bit smaller than the modulus; + * notably, if the modulus length is equal to 1 modulo 8, then + * the padded string will be one _byte_ smaller, and the first + * byte will be set to 0. We apply these transformations here. + */ + n_bitlen --; + if ((n_bitlen & 7) == 0) { + *x ++ = 0; + } + xlen = (n_bitlen + 7) >> 3; + + /* + * Check that the modulus is large enough for the hash value + * length combined with the intended salt length. + */ + if (hash_len > xlen || salt_len > xlen + || (hash_len + salt_len + 2) > xlen) + { + return 0; + } + + /* + * Produce a random salt. + */ + salt = x + xlen - hash_len - salt_len - 1; + if (salt_len != 0) { + (*rng)->generate(rng, salt, salt_len); + } + + /* + * Compute the seed for MGF1. + */ + seed = x + xlen - hash_len - 1; + hf_data->init(&hc.vtable); + memset(seed, 0, 8); + hf_data->update(&hc.vtable, seed, 8); + hf_data->update(&hc.vtable, hash, hash_len); + hf_data->update(&hc.vtable, salt, salt_len); + hf_data->out(&hc.vtable, seed); + + /* + * Prepare string PS (padded salt). The salt is already at the + * right place. + */ + memset(x, 0, xlen - salt_len - hash_len - 2); + x[xlen - salt_len - hash_len - 2] = 0x01; + + /* + * Generate the mask and XOR it into PS. + */ + br_mgf1_xor(x, xlen - hash_len - 1, hf_mgf1, seed, hash_len); + + /* + * Clear the top bits to ensure the value is lower than the + * modulus. + */ + x[0] &= 0xFF >> (((uint32_t)xlen << 3) - n_bitlen); + + /* + * The seed (H) is already in the right place. We just set the + * last byte. + */ + x[xlen - 1] = 0xBC; + + return 1; +} diff --git a/third_party/bearssl/src/rsa_pss_sig_unpad.c b/third_party/bearssl/src/rsa_pss_sig_unpad.c new file mode 100644 index 0000000..0c6ae99 --- /dev/null +++ b/third_party/bearssl/src/rsa_pss_sig_unpad.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_rsa_pss_sig_unpad(const br_hash_class *hf_data, + const br_hash_class *hf_mgf1, + const unsigned char *hash, size_t salt_len, + const br_rsa_public_key *pk, unsigned char *x) +{ + size_t u, xlen, hash_len; + br_hash_compat_context hc; + unsigned char *seed, *salt; + unsigned char tmp[64]; + uint32_t r, n_bitlen; + + hash_len = br_digest_size(hf_data); + + /* + * Value r will be set to a non-zero value is any test fails. + */ + r = 0; + + /* + * The value bit length (as an integer) must be strictly less than + * that of the modulus. + */ + for (u = 0; u < pk->nlen; u ++) { + if (pk->n[u] != 0) { + break; + } + } + if (u == pk->nlen) { + return 0; + } + n_bitlen = BIT_LENGTH(pk->n[u]) + ((uint32_t)(pk->nlen - u - 1) << 3); + n_bitlen --; + if ((n_bitlen & 7) == 0) { + r |= *x ++; + } else { + r |= x[0] & (0xFF << (n_bitlen & 7)); + } + xlen = (n_bitlen + 7) >> 3; + + /* + * Check that the modulus is large enough for the hash value + * length combined with the intended salt length. + */ + if (hash_len > xlen || salt_len > xlen + || (hash_len + salt_len + 2) > xlen) + { + return 0; + } + + /* + * Check value of rightmost byte. + */ + r |= x[xlen - 1] ^ 0xBC; + + /* + * Generate the mask and XOR it into the first bytes to reveal PS; + * we must also mask out the leading bits. + */ + seed = x + xlen - hash_len - 1; + br_mgf1_xor(x, xlen - hash_len - 1, hf_mgf1, seed, hash_len); + if ((n_bitlen & 7) != 0) { + x[0] &= 0xFF >> (8 - (n_bitlen & 7)); + } + + /* + * Check that all padding bytes have the expected value. + */ + for (u = 0; u < (xlen - hash_len - salt_len - 2); u ++) { + r |= x[u]; + } + r |= x[xlen - hash_len - salt_len - 2] ^ 0x01; + + /* + * Recompute H. + */ + salt = x + xlen - hash_len - salt_len - 1; + hf_data->init(&hc.vtable); + memset(tmp, 0, 8); + hf_data->update(&hc.vtable, tmp, 8); + hf_data->update(&hc.vtable, hash, hash_len); + hf_data->update(&hc.vtable, salt, salt_len); + hf_data->out(&hc.vtable, tmp); + + /* + * Check that the recomputed H value matches the one appearing + * in the string. + */ + for (u = 0; u < hash_len; u ++) { + r |= tmp[u] ^ x[(xlen - hash_len - 1) + u]; + } + + return EQ0(r); +} diff --git a/third_party/bearssl/src/rsa_ssl_decrypt.c b/third_party/bearssl/src/rsa_ssl_decrypt.c new file mode 100644 index 0000000..047eb18 --- /dev/null +++ b/third_party/bearssl/src/rsa_ssl_decrypt.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_ssl_decrypt(br_rsa_private core, const br_rsa_private_key *sk, + unsigned char *data, size_t len) +{ + uint32_t x; + size_t u; + + /* + * A first check on length. Since this test works only on the + * buffer length, it needs not (and cannot) be constant-time. + */ + if (len < 59 || len != (sk->n_bitlen + 7) >> 3) { + return 0; + } + x = core(data, sk); + + x &= EQ(data[0], 0x00); + x &= EQ(data[1], 0x02); + for (u = 2; u < (len - 49); u ++) { + x &= NEQ(data[u], 0); + } + x &= EQ(data[len - 49], 0x00); + memmove(data, data + len - 48, 48); + return x; +} diff --git a/third_party/bearssl/src/settings.c b/third_party/bearssl/src/settings.c new file mode 100644 index 0000000..309271c --- /dev/null +++ b/third_party/bearssl/src/settings.c @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static const br_config_option config[] = { + { "BR_64", +#if BR_64 + 1 +#else + 0 +#endif + }, + { "BR_AES_X86NI", +#if BR_AES_X86NI + 1 +#else + 0 +#endif + }, + { "BR_amd64", +#if BR_amd64 + 1 +#else + 0 +#endif + }, + { "BR_ARMEL_CORTEXM_GCC", +#if BR_ARMEL_CORTEXM_GCC + 1 +#else + 0 +#endif + }, + { "BR_BE_UNALIGNED", +#if BR_BE_UNALIGNED + 1 +#else + 0 +#endif + }, + { "BR_CLANG", +#if BR_CLANG + 1 +#else + 0 +#endif + }, + { "BR_CLANG_3_7", +#if BR_CLANG_3_7 + 1 +#else + 0 +#endif + }, + { "BR_CLANG_3_8", +#if BR_CLANG_3_8 + 1 +#else + 0 +#endif + }, + { "BR_CT_MUL15", +#if BR_CT_MUL15 + 1 +#else + 0 +#endif + }, + { "BR_CT_MUL31", +#if BR_CT_MUL31 + 1 +#else + 0 +#endif + }, + { "BR_GCC", +#if BR_GCC + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_4", +#if BR_GCC_4_4 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_5", +#if BR_GCC_4_5 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_6", +#if BR_GCC_4_6 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_7", +#if BR_GCC_4_7 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_8", +#if BR_GCC_4_8 + 1 +#else + 0 +#endif + }, + { "BR_GCC_4_9", +#if BR_GCC_4_9 + 1 +#else + 0 +#endif + }, + { "BR_GCC_5_0", +#if BR_GCC_5_0 + 1 +#else + 0 +#endif + }, + { "BR_i386", +#if BR_i386 + 1 +#else + 0 +#endif + }, + { "BR_INT128", +#if BR_INT128 + 1 +#else + 0 +#endif + }, + { "BR_LE_UNALIGNED", +#if BR_LE_UNALIGNED + 1 +#else + 0 +#endif + }, + { "BR_LOMUL", +#if BR_LOMUL + 1 +#else + 0 +#endif + }, + { "BR_MAX_EC_SIZE", BR_MAX_EC_SIZE }, + { "BR_MAX_RSA_SIZE", BR_MAX_RSA_SIZE }, + { "BR_MAX_RSA_FACTOR", BR_MAX_RSA_FACTOR }, + { "BR_MSC", +#if BR_MSC + 1 +#else + 0 +#endif + }, + { "BR_MSC_2005", +#if BR_MSC_2005 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2008", +#if BR_MSC_2008 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2010", +#if BR_MSC_2010 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2012", +#if BR_MSC_2012 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2013", +#if BR_MSC_2013 + 1 +#else + 0 +#endif + }, + { "BR_MSC_2015", +#if BR_MSC_2015 + 1 +#else + 0 +#endif + }, + { "BR_POWER8", +#if BR_POWER8 + 1 +#else + 0 +#endif + }, + { "BR_RDRAND", +#if BR_RDRAND + 1 +#else + 0 +#endif + }, + { "BR_SLOW_MUL", +#if BR_SLOW_MUL + 1 +#else + 0 +#endif + }, + { "BR_SLOW_MUL15", +#if BR_SLOW_MUL15 + 1 +#else + 0 +#endif + }, + { "BR_SSE2", +#if BR_SSE2 + 1 +#else + 0 +#endif + }, + { "BR_UMUL128", +#if BR_UMUL128 + 1 +#else + 0 +#endif + }, + { "BR_USE_UNIX_TIME", +#if BR_USE_UNIX_TIME + 1 +#else + 0 +#endif + }, + { "BR_USE_WIN32_RAND", +#if BR_USE_WIN32_RAND + 1 +#else + 0 +#endif + }, + { "BR_USE_WIN32_TIME", +#if BR_USE_WIN32_TIME + 1 +#else + 0 +#endif + }, + + { NULL, 0 } +}; + +/* see bearssl.h */ +const br_config_option * +br_get_config(void) +{ + return config; +} diff --git a/third_party/bearssl/src/sha1.c b/third_party/bearssl/src/sha1.c new file mode 100644 index 0000000..4f65d84 --- /dev/null +++ b/third_party/bearssl/src/sha1.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define F(B, C, D) ((((C) ^ (D)) & (B)) ^ (D)) +#define G(B, C, D) ((B) ^ (C) ^ (D)) +#define H(B, C, D) (((D) & (C)) | (((D) | (C)) & (B))) +#define I(B, C, D) G(B, C, D) + +#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +#define K1 ((uint32_t)0x5A827999) +#define K2 ((uint32_t)0x6ED9EBA1) +#define K3 ((uint32_t)0x8F1BBCDC) +#define K4 ((uint32_t)0xCA62C1D6) + +/* see inner.h */ +const uint32_t br_sha1_IV[5] = { + 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 +}; + +/* see inner.h */ +void +br_sha1_round(const unsigned char *buf, uint32_t *val) +{ + uint32_t m[80]; + uint32_t a, b, c, d, e; + int i; + + a = val[0]; + b = val[1]; + c = val[2]; + d = val[3]; + e = val[4]; + br_range_dec32be(m, 16, buf); + for (i = 16; i < 80; i ++) { + uint32_t x = m[i - 3] ^ m[i - 8] ^ m[i - 14] ^ m[i - 16]; + m[i] = ROTL(x, 1); + } + + for (i = 0; i < 20; i += 5) { + e += ROTL(a, 5) + F(b, c, d) + K1 + m[i + 0]; b = ROTL(b, 30); + d += ROTL(e, 5) + F(a, b, c) + K1 + m[i + 1]; a = ROTL(a, 30); + c += ROTL(d, 5) + F(e, a, b) + K1 + m[i + 2]; e = ROTL(e, 30); + b += ROTL(c, 5) + F(d, e, a) + K1 + m[i + 3]; d = ROTL(d, 30); + a += ROTL(b, 5) + F(c, d, e) + K1 + m[i + 4]; c = ROTL(c, 30); + } + for (i = 20; i < 40; i += 5) { + e += ROTL(a, 5) + G(b, c, d) + K2 + m[i + 0]; b = ROTL(b, 30); + d += ROTL(e, 5) + G(a, b, c) + K2 + m[i + 1]; a = ROTL(a, 30); + c += ROTL(d, 5) + G(e, a, b) + K2 + m[i + 2]; e = ROTL(e, 30); + b += ROTL(c, 5) + G(d, e, a) + K2 + m[i + 3]; d = ROTL(d, 30); + a += ROTL(b, 5) + G(c, d, e) + K2 + m[i + 4]; c = ROTL(c, 30); + } + for (i = 40; i < 60; i += 5) { + e += ROTL(a, 5) + H(b, c, d) + K3 + m[i + 0]; b = ROTL(b, 30); + d += ROTL(e, 5) + H(a, b, c) + K3 + m[i + 1]; a = ROTL(a, 30); + c += ROTL(d, 5) + H(e, a, b) + K3 + m[i + 2]; e = ROTL(e, 30); + b += ROTL(c, 5) + H(d, e, a) + K3 + m[i + 3]; d = ROTL(d, 30); + a += ROTL(b, 5) + H(c, d, e) + K3 + m[i + 4]; c = ROTL(c, 30); + } + for (i = 60; i < 80; i += 5) { + e += ROTL(a, 5) + I(b, c, d) + K4 + m[i + 0]; b = ROTL(b, 30); + d += ROTL(e, 5) + I(a, b, c) + K4 + m[i + 1]; a = ROTL(a, 30); + c += ROTL(d, 5) + I(e, a, b) + K4 + m[i + 2]; e = ROTL(e, 30); + b += ROTL(c, 5) + I(d, e, a) + K4 + m[i + 3]; d = ROTL(d, 30); + a += ROTL(b, 5) + I(c, d, e) + K4 + m[i + 4]; c = ROTL(c, 30); + } + + val[0] += a; + val[1] += b; + val[2] += c; + val[3] += d; + val[4] += e; +} + +/* see bearssl.h */ +void +br_sha1_init(br_sha1_context *cc) +{ + cc->vtable = &br_sha1_vtable; + memcpy(cc->val, br_sha1_IV, sizeof cc->val); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_sha1_update(br_sha1_context *cc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t ptr; + + buf = data; + ptr = (size_t)cc->count & 63; + while (len > 0) { + size_t clen; + + clen = 64 - ptr; + if (clen > len) { + clen = len; + } + memcpy(cc->buf + ptr, buf, clen); + ptr += clen; + buf += clen; + len -= clen; + cc->count += (uint64_t)clen; + if (ptr == 64) { + br_sha1_round(cc->buf, cc->val); + ptr = 0; + } + } +} + +/* see bearssl.h */ +void +br_sha1_out(const br_sha1_context *cc, void *dst) +{ + unsigned char buf[64]; + uint32_t val[5]; + size_t ptr; + + ptr = (size_t)cc->count & 63; + memcpy(buf, cc->buf, ptr); + memcpy(val, cc->val, sizeof val); + buf[ptr ++] = 0x80; + if (ptr > 56) { + memset(buf + ptr, 0, 64 - ptr); + br_sha1_round(buf, val); + memset(buf, 0, 56); + } else { + memset(buf + ptr, 0, 56 - ptr); + } + br_enc64be(buf + 56, cc->count << 3); + br_sha1_round(buf, val); + br_range_enc32be(dst, val, 5); +} + +/* see bearssl.h */ +uint64_t +br_sha1_state(const br_sha1_context *cc, void *dst) +{ + br_range_enc32be(dst, cc->val, 5); + return cc->count; +} + +/* see bearssl.h */ +void +br_sha1_set_state(br_sha1_context *cc, const void *stb, uint64_t count) +{ + br_range_dec32be(cc->val, 5, stb); + cc->count = count; +} + +/* see bearssl.h */ +const br_hash_class br_sha1_vtable = { + sizeof(br_sha1_context), + BR_HASHDESC_ID(br_sha1_ID) + | BR_HASHDESC_OUT(20) + | BR_HASHDESC_STATE(20) + | BR_HASHDESC_LBLEN(6) + | BR_HASHDESC_MD_PADDING + | BR_HASHDESC_MD_PADDING_BE, + (void (*)(const br_hash_class **))&br_sha1_init, + (void (*)(const br_hash_class **, const void *, size_t))&br_sha1_update, + (void (*)(const br_hash_class *const *, void *))&br_sha1_out, + (uint64_t (*)(const br_hash_class *const *, void *))&br_sha1_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_sha1_set_state +}; diff --git a/third_party/bearssl/src/sha2big.c b/third_party/bearssl/src/sha2big.c new file mode 100644 index 0000000..5be92ed --- /dev/null +++ b/third_party/bearssl/src/sha2big.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X))) + +#define ROTR(x, n) (((uint64_t)(x) << (64 - (n))) | ((uint64_t)(x) >> (n))) + +#define BSG5_0(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39)) +#define BSG5_1(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41)) +#define SSG5_0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ (uint64_t)((x) >> 7)) +#define SSG5_1(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ (uint64_t)((x) >> 6)) + +static const uint64_t IV384[8] = { + 0xCBBB9D5DC1059ED8, 0x629A292A367CD507, + 0x9159015A3070DD17, 0x152FECD8F70E5939, + 0x67332667FFC00B31, 0x8EB44A8768581511, + 0xDB0C2E0D64F98FA7, 0x47B5481DBEFA4FA4 +}; + +static const uint64_t IV512[8] = { + 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 +}; + +static const uint64_t K[80] = { + 0x428A2F98D728AE22, 0x7137449123EF65CD, + 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, + 0x3956C25BF348B538, 0x59F111F1B605D019, + 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, + 0xD807AA98A3030242, 0x12835B0145706FBE, + 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, + 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, + 0x9BDC06A725C71235, 0xC19BF174CF692694, + 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, + 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, + 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, + 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, + 0x983E5152EE66DFAB, 0xA831C66D2DB43210, + 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, + 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, + 0x06CA6351E003826F, 0x142929670A0E6E70, + 0x27B70A8546D22FFC, 0x2E1B21385C26C926, + 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, + 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, + 0x81C2C92E47EDAEE6, 0x92722C851482353B, + 0xA2BFE8A14CF10364, 0xA81A664BBC423001, + 0xC24B8B70D0F89791, 0xC76C51A30654BE30, + 0xD192E819D6EF5218, 0xD69906245565A910, + 0xF40E35855771202A, 0x106AA07032BBD1B8, + 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, + 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, + 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, + 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, + 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, + 0x84C87814A1F0AB72, 0x8CC702081A6439EC, + 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, + 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, + 0xCA273ECEEA26619C, 0xD186B8C721C0C207, + 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, + 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, + 0x113F9804BEF90DAE, 0x1B710B35131C471B, + 0x28DB77F523047D84, 0x32CAAB7B40C72493, + 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, + 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, + 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 +}; + +static void +sha2big_round(const unsigned char *buf, uint64_t *val) +{ + +#define SHA2BIG_STEP(A, B, C, D, E, F, G, H, j) do { \ + uint64_t T1, T2; \ + T1 = H + BSG5_1(E) + CH(E, F, G) + K[j] + w[j]; \ + T2 = BSG5_0(A) + MAJ(A, B, C); \ + D += T1; \ + H = T1 + T2; \ + } while (0) + + int i; + uint64_t a, b, c, d, e, f, g, h; + uint64_t w[80]; + + br_range_dec64be(w, 16, buf); + for (i = 16; i < 80; i ++) { + w[i] = SSG5_1(w[i - 2]) + w[i - 7] + + SSG5_0(w[i - 15]) + w[i - 16]; + } + a = val[0]; + b = val[1]; + c = val[2]; + d = val[3]; + e = val[4]; + f = val[5]; + g = val[6]; + h = val[7]; + for (i = 0; i < 80; i += 8) { + SHA2BIG_STEP(a, b, c, d, e, f, g, h, i + 0); + SHA2BIG_STEP(h, a, b, c, d, e, f, g, i + 1); + SHA2BIG_STEP(g, h, a, b, c, d, e, f, i + 2); + SHA2BIG_STEP(f, g, h, a, b, c, d, e, i + 3); + SHA2BIG_STEP(e, f, g, h, a, b, c, d, i + 4); + SHA2BIG_STEP(d, e, f, g, h, a, b, c, i + 5); + SHA2BIG_STEP(c, d, e, f, g, h, a, b, i + 6); + SHA2BIG_STEP(b, c, d, e, f, g, h, a, i + 7); + } + val[0] += a; + val[1] += b; + val[2] += c; + val[3] += d; + val[4] += e; + val[5] += f; + val[6] += g; + val[7] += h; +} + +static void +sha2big_update(br_sha384_context *cc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t ptr; + + buf = data; + ptr = (size_t)cc->count & 127; + cc->count += (uint64_t)len; + while (len > 0) { + size_t clen; + + clen = 128 - ptr; + if (clen > len) { + clen = len; + } + memcpy(cc->buf + ptr, buf, clen); + ptr += clen; + buf += clen; + len -= clen; + if (ptr == 128) { + sha2big_round(cc->buf, cc->val); + ptr = 0; + } + } +} + +static void +sha2big_out(const br_sha384_context *cc, void *dst, int num) +{ + unsigned char buf[128]; + uint64_t val[8]; + size_t ptr; + + ptr = (size_t)cc->count & 127; + memcpy(buf, cc->buf, ptr); + memcpy(val, cc->val, sizeof val); + buf[ptr ++] = 0x80; + if (ptr > 112) { + memset(buf + ptr, 0, 128 - ptr); + sha2big_round(buf, val); + memset(buf, 0, 112); + } else { + memset(buf + ptr, 0, 112 - ptr); + } + br_enc64be(buf + 112, cc->count >> 61); + br_enc64be(buf + 120, cc->count << 3); + sha2big_round(buf, val); + br_range_enc64be(dst, val, num); +} + +/* see bearssl.h */ +void +br_sha384_init(br_sha384_context *cc) +{ + cc->vtable = &br_sha384_vtable; + memcpy(cc->val, IV384, sizeof IV384); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_sha384_update(br_sha384_context *cc, const void *data, size_t len) +{ + sha2big_update(cc, data, len); +} + +/* see bearssl.h */ +void +br_sha384_out(const br_sha384_context *cc, void *dst) +{ + sha2big_out(cc, dst, 6); +} + +/* see bearssl.h */ +uint64_t +br_sha384_state(const br_sha384_context *cc, void *dst) +{ + br_range_enc64be(dst, cc->val, 8); + return cc->count; +} + +/* see bearssl.h */ +void +br_sha384_set_state(br_sha384_context *cc, const void *stb, uint64_t count) +{ + br_range_dec64be(cc->val, 8, stb); + cc->count = count; +} + +/* see bearssl.h */ +void +br_sha512_init(br_sha512_context *cc) +{ + cc->vtable = &br_sha512_vtable; + memcpy(cc->val, IV512, sizeof IV512); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_sha512_out(const br_sha512_context *cc, void *dst) +{ + sha2big_out(cc, dst, 8); +} + +/* see bearssl.h */ +const br_hash_class br_sha384_vtable = { + sizeof(br_sha384_context), + BR_HASHDESC_ID(br_sha384_ID) + | BR_HASHDESC_OUT(48) + | BR_HASHDESC_STATE(64) + | BR_HASHDESC_LBLEN(7) + | BR_HASHDESC_MD_PADDING + | BR_HASHDESC_MD_PADDING_BE + | BR_HASHDESC_MD_PADDING_128, + (void (*)(const br_hash_class **))&br_sha384_init, + (void (*)(const br_hash_class **, const void *, size_t)) + &br_sha384_update, + (void (*)(const br_hash_class *const *, void *))&br_sha384_out, + (uint64_t (*)(const br_hash_class *const *, void *))&br_sha384_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_sha384_set_state +}; + +/* see bearssl.h */ +const br_hash_class br_sha512_vtable = { + sizeof(br_sha512_context), + BR_HASHDESC_ID(br_sha512_ID) + | BR_HASHDESC_OUT(64) + | BR_HASHDESC_STATE(64) + | BR_HASHDESC_LBLEN(7) + | BR_HASHDESC_MD_PADDING + | BR_HASHDESC_MD_PADDING_BE + | BR_HASHDESC_MD_PADDING_128, + (void (*)(const br_hash_class **))&br_sha512_init, + (void (*)(const br_hash_class **, const void *, size_t)) + &br_sha512_update, + (void (*)(const br_hash_class *const *, void *))&br_sha512_out, + (uint64_t (*)(const br_hash_class *const *, void *))&br_sha512_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_sha512_set_state +}; diff --git a/third_party/bearssl/src/sha2small.c b/third_party/bearssl/src/sha2small.c new file mode 100644 index 0000000..ca19655 --- /dev/null +++ b/third_party/bearssl/src/sha2small.c @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X))) + +#define ROTR(x, n) (((uint32_t)(x) << (32 - (n))) | ((uint32_t)(x) >> (n))) + +#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (uint32_t)((x) >> 3)) +#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (uint32_t)((x) >> 10)) + +/* see inner.h */ +const uint32_t br_sha224_IV[8] = { + 0xC1059ED8, 0x367CD507, 0x3070DD17, 0xF70E5939, + 0xFFC00B31, 0x68581511, 0x64F98FA7, 0xBEFA4FA4 +}; + +/* see inner.h */ +const uint32_t br_sha256_IV[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 +}; + +static const uint32_t K[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 +}; + +/* see inner.h */ +void +br_sha2small_round(const unsigned char *buf, uint32_t *val) +{ + +#define SHA2_STEP(A, B, C, D, E, F, G, H, j) do { \ + uint32_t T1, T2; \ + T1 = H + BSG2_1(E) + CH(E, F, G) + K[j] + w[j]; \ + T2 = BSG2_0(A) + MAJ(A, B, C); \ + D += T1; \ + H = T1 + T2; \ + } while (0) + + int i; + uint32_t a, b, c, d, e, f, g, h; + uint32_t w[64]; + + br_range_dec32be(w, 16, buf); + for (i = 16; i < 64; i ++) { + w[i] = SSG2_1(w[i - 2]) + w[i - 7] + + SSG2_0(w[i - 15]) + w[i - 16]; + } + a = val[0]; + b = val[1]; + c = val[2]; + d = val[3]; + e = val[4]; + f = val[5]; + g = val[6]; + h = val[7]; + for (i = 0; i < 64; i += 8) { + SHA2_STEP(a, b, c, d, e, f, g, h, i + 0); + SHA2_STEP(h, a, b, c, d, e, f, g, i + 1); + SHA2_STEP(g, h, a, b, c, d, e, f, i + 2); + SHA2_STEP(f, g, h, a, b, c, d, e, i + 3); + SHA2_STEP(e, f, g, h, a, b, c, d, i + 4); + SHA2_STEP(d, e, f, g, h, a, b, c, i + 5); + SHA2_STEP(c, d, e, f, g, h, a, b, i + 6); + SHA2_STEP(b, c, d, e, f, g, h, a, i + 7); + } + val[0] += a; + val[1] += b; + val[2] += c; + val[3] += d; + val[4] += e; + val[5] += f; + val[6] += g; + val[7] += h; + +#if 0 +/* obsolete */ +#define SHA2_MEXP1(pc) do { \ + W[pc] = br_dec32be(buf + ((pc) << 2)); \ + } while (0) + +#define SHA2_MEXP2(pc) do { \ + W[(pc) & 0x0F] = SSG2_1(W[((pc) - 2) & 0x0F]) \ + + W[((pc) - 7) & 0x0F] \ + + SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]; \ + } while (0) + +#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, pc) do { \ + uint32_t t1, t2; \ + SHA2_MEXP ## n(pc); \ + t1 = h + BSG2_1(e) + CH(e, f, g) \ + + K[pcount + (pc)] + W[(pc) & 0x0F]; \ + t2 = BSG2_0(a) + MAJ(a, b, c); \ + d += t1; \ + h = t1 + t2; \ + } while (0) + +#define SHA2_STEP1(a, b, c, d, e, f, g, h, pc) \ + SHA2_STEPn(1, a, b, c, d, e, f, g, h, pc) +#define SHA2_STEP2(a, b, c, d, e, f, g, h, pc) \ + SHA2_STEPn(2, a, b, c, d, e, f, g, h, pc) + + uint32_t A, B, C, D, E, F, G, H; + uint32_t W[16]; + unsigned pcount; + + A = val[0]; + B = val[1]; + C = val[2]; + D = val[3]; + E = val[4]; + F = val[5]; + G = val[6]; + H = val[7]; + pcount = 0; + SHA2_STEP1(A, B, C, D, E, F, G, H, 0); + SHA2_STEP1(H, A, B, C, D, E, F, G, 1); + SHA2_STEP1(G, H, A, B, C, D, E, F, 2); + SHA2_STEP1(F, G, H, A, B, C, D, E, 3); + SHA2_STEP1(E, F, G, H, A, B, C, D, 4); + SHA2_STEP1(D, E, F, G, H, A, B, C, 5); + SHA2_STEP1(C, D, E, F, G, H, A, B, 6); + SHA2_STEP1(B, C, D, E, F, G, H, A, 7); + SHA2_STEP1(A, B, C, D, E, F, G, H, 8); + SHA2_STEP1(H, A, B, C, D, E, F, G, 9); + SHA2_STEP1(G, H, A, B, C, D, E, F, 10); + SHA2_STEP1(F, G, H, A, B, C, D, E, 11); + SHA2_STEP1(E, F, G, H, A, B, C, D, 12); + SHA2_STEP1(D, E, F, G, H, A, B, C, 13); + SHA2_STEP1(C, D, E, F, G, H, A, B, 14); + SHA2_STEP1(B, C, D, E, F, G, H, A, 15); + for (pcount = 16; pcount < 64; pcount += 16) { + SHA2_STEP2(A, B, C, D, E, F, G, H, 0); + SHA2_STEP2(H, A, B, C, D, E, F, G, 1); + SHA2_STEP2(G, H, A, B, C, D, E, F, 2); + SHA2_STEP2(F, G, H, A, B, C, D, E, 3); + SHA2_STEP2(E, F, G, H, A, B, C, D, 4); + SHA2_STEP2(D, E, F, G, H, A, B, C, 5); + SHA2_STEP2(C, D, E, F, G, H, A, B, 6); + SHA2_STEP2(B, C, D, E, F, G, H, A, 7); + SHA2_STEP2(A, B, C, D, E, F, G, H, 8); + SHA2_STEP2(H, A, B, C, D, E, F, G, 9); + SHA2_STEP2(G, H, A, B, C, D, E, F, 10); + SHA2_STEP2(F, G, H, A, B, C, D, E, 11); + SHA2_STEP2(E, F, G, H, A, B, C, D, 12); + SHA2_STEP2(D, E, F, G, H, A, B, C, 13); + SHA2_STEP2(C, D, E, F, G, H, A, B, 14); + SHA2_STEP2(B, C, D, E, F, G, H, A, 15); + } + val[0] += A; + val[1] += B; + val[2] += C; + val[3] += D; + val[4] += E; + val[5] += F; + val[6] += G; + val[7] += H; +#endif +} + +static void +sha2small_update(br_sha224_context *cc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t ptr; + + buf = data; + ptr = (size_t)cc->count & 63; + cc->count += (uint64_t)len; + while (len > 0) { + size_t clen; + + clen = 64 - ptr; + if (clen > len) { + clen = len; + } + memcpy(cc->buf + ptr, buf, clen); + ptr += clen; + buf += clen; + len -= clen; + if (ptr == 64) { + br_sha2small_round(cc->buf, cc->val); + ptr = 0; + } + } +} + +static void +sha2small_out(const br_sha224_context *cc, void *dst, int num) +{ + unsigned char buf[64]; + uint32_t val[8]; + size_t ptr; + + ptr = (size_t)cc->count & 63; + memcpy(buf, cc->buf, ptr); + memcpy(val, cc->val, sizeof val); + buf[ptr ++] = 0x80; + if (ptr > 56) { + memset(buf + ptr, 0, 64 - ptr); + br_sha2small_round(buf, val); + memset(buf, 0, 56); + } else { + memset(buf + ptr, 0, 56 - ptr); + } + br_enc64be(buf + 56, cc->count << 3); + br_sha2small_round(buf, val); + br_range_enc32be(dst, val, num); +} + +/* see bearssl.h */ +void +br_sha224_init(br_sha224_context *cc) +{ + cc->vtable = &br_sha224_vtable; + memcpy(cc->val, br_sha224_IV, sizeof cc->val); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_sha224_update(br_sha224_context *cc, const void *data, size_t len) +{ + sha2small_update(cc, data, len); +} + +/* see bearssl.h */ +void +br_sha224_out(const br_sha224_context *cc, void *dst) +{ + sha2small_out(cc, dst, 7); +} + +/* see bearssl.h */ +uint64_t +br_sha224_state(const br_sha224_context *cc, void *dst) +{ + br_range_enc32be(dst, cc->val, 8); + return cc->count; +} + +/* see bearssl.h */ +void +br_sha224_set_state(br_sha224_context *cc, const void *stb, uint64_t count) +{ + br_range_dec32be(cc->val, 8, stb); + cc->count = count; +} + +/* see bearssl.h */ +void +br_sha256_init(br_sha256_context *cc) +{ + cc->vtable = &br_sha256_vtable; + memcpy(cc->val, br_sha256_IV, sizeof cc->val); + cc->count = 0; +} + +/* see bearssl.h */ +void +br_sha256_out(const br_sha256_context *cc, void *dst) +{ + sha2small_out(cc, dst, 8); +} + +/* see bearssl.h */ +const br_hash_class br_sha224_vtable = { + sizeof(br_sha224_context), + BR_HASHDESC_ID(br_sha224_ID) + | BR_HASHDESC_OUT(28) + | BR_HASHDESC_STATE(32) + | BR_HASHDESC_LBLEN(6) + | BR_HASHDESC_MD_PADDING + | BR_HASHDESC_MD_PADDING_BE, + (void (*)(const br_hash_class **))&br_sha224_init, + (void (*)(const br_hash_class **, + const void *, size_t))&br_sha224_update, + (void (*)(const br_hash_class *const *, void *))&br_sha224_out, + (uint64_t (*)(const br_hash_class *const *, void *))&br_sha224_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_sha224_set_state +}; + +/* see bearssl.h */ +const br_hash_class br_sha256_vtable = { + sizeof(br_sha256_context), + BR_HASHDESC_ID(br_sha256_ID) + | BR_HASHDESC_OUT(32) + | BR_HASHDESC_STATE(32) + | BR_HASHDESC_LBLEN(6) + | BR_HASHDESC_MD_PADDING + | BR_HASHDESC_MD_PADDING_BE, + (void (*)(const br_hash_class **))&br_sha256_init, + (void (*)(const br_hash_class **, + const void *, size_t))&br_sha256_update, + (void (*)(const br_hash_class *const *, void *))&br_sha256_out, + (uint64_t (*)(const br_hash_class *const *, void *))&br_sha256_state, + (void (*)(const br_hash_class **, const void *, uint64_t)) + &br_sha256_set_state +}; diff --git a/third_party/bearssl/src/shake.c b/third_party/bearssl/src/shake.c new file mode 100644 index 0000000..80d7176 --- /dev/null +++ b/third_party/bearssl/src/shake.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Round constants. + */ +static const uint64_t RC[] = { + 0x0000000000000001, 0x0000000000008082, + 0x800000000000808A, 0x8000000080008000, + 0x000000000000808B, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, + 0x000000000000008A, 0x0000000000000088, + 0x0000000080008009, 0x000000008000000A, + 0x000000008000808B, 0x800000000000008B, + 0x8000000000008089, 0x8000000000008003, + 0x8000000000008002, 0x8000000000000080, + 0x000000000000800A, 0x800000008000000A, + 0x8000000080008081, 0x8000000000008080, + 0x0000000080000001, 0x8000000080008008 +}; + +/* + * XOR a block of data into the provided state. This supports only + * blocks whose length is a multiple of 64 bits. + */ +static void +xor_block(uint64_t *A, const void *data, size_t rate) +{ + size_t u; + + for (u = 0; u < rate; u += 8) { + A[u >> 3] ^= br_dec64le((const unsigned char *)data + u); + } +} + +/* + * Process a block with the provided data. The data length must be a + * multiple of 8 (in bytes); normally, this is the "rate". + */ +static void +process_block(uint64_t *A) +{ + uint64_t t0, t1, t2, t3, t4; + uint64_t tt0, tt1, tt2, tt3; + uint64_t t, kt; + uint64_t c0, c1, c2, c3, c4, bnn; + int j; + + /* + * Compute the 24 rounds. This loop is partially unrolled (each + * iteration computes two rounds). + */ + for (j = 0; j < 24; j += 2) { + + tt0 = A[ 1] ^ A[ 6]; + tt1 = A[11] ^ A[16]; + tt0 ^= A[21] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 4] ^ A[ 9]; + tt3 = A[14] ^ A[19]; + tt0 ^= A[24]; + tt2 ^= tt3; + t0 = tt0 ^ tt2; + + tt0 = A[ 2] ^ A[ 7]; + tt1 = A[12] ^ A[17]; + tt0 ^= A[22] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 0] ^ A[ 5]; + tt3 = A[10] ^ A[15]; + tt0 ^= A[20]; + tt2 ^= tt3; + t1 = tt0 ^ tt2; + + tt0 = A[ 3] ^ A[ 8]; + tt1 = A[13] ^ A[18]; + tt0 ^= A[23] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 1] ^ A[ 6]; + tt3 = A[11] ^ A[16]; + tt0 ^= A[21]; + tt2 ^= tt3; + t2 = tt0 ^ tt2; + + tt0 = A[ 4] ^ A[ 9]; + tt1 = A[14] ^ A[19]; + tt0 ^= A[24] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 2] ^ A[ 7]; + tt3 = A[12] ^ A[17]; + tt0 ^= A[22]; + tt2 ^= tt3; + t3 = tt0 ^ tt2; + + tt0 = A[ 0] ^ A[ 5]; + tt1 = A[10] ^ A[15]; + tt0 ^= A[20] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 3] ^ A[ 8]; + tt3 = A[13] ^ A[18]; + tt0 ^= A[23]; + tt2 ^= tt3; + t4 = tt0 ^ tt2; + + A[ 0] = A[ 0] ^ t0; + A[ 5] = A[ 5] ^ t0; + A[10] = A[10] ^ t0; + A[15] = A[15] ^ t0; + A[20] = A[20] ^ t0; + A[ 1] = A[ 1] ^ t1; + A[ 6] = A[ 6] ^ t1; + A[11] = A[11] ^ t1; + A[16] = A[16] ^ t1; + A[21] = A[21] ^ t1; + A[ 2] = A[ 2] ^ t2; + A[ 7] = A[ 7] ^ t2; + A[12] = A[12] ^ t2; + A[17] = A[17] ^ t2; + A[22] = A[22] ^ t2; + A[ 3] = A[ 3] ^ t3; + A[ 8] = A[ 8] ^ t3; + A[13] = A[13] ^ t3; + A[18] = A[18] ^ t3; + A[23] = A[23] ^ t3; + A[ 4] = A[ 4] ^ t4; + A[ 9] = A[ 9] ^ t4; + A[14] = A[14] ^ t4; + A[19] = A[19] ^ t4; + A[24] = A[24] ^ t4; + A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36)); + A[10] = (A[10] << 3) | (A[10] >> (64 - 3)); + A[15] = (A[15] << 41) | (A[15] >> (64 - 41)); + A[20] = (A[20] << 18) | (A[20] >> (64 - 18)); + A[ 1] = (A[ 1] << 1) | (A[ 1] >> (64 - 1)); + A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44)); + A[11] = (A[11] << 10) | (A[11] >> (64 - 10)); + A[16] = (A[16] << 45) | (A[16] >> (64 - 45)); + A[21] = (A[21] << 2) | (A[21] >> (64 - 2)); + A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62)); + A[ 7] = (A[ 7] << 6) | (A[ 7] >> (64 - 6)); + A[12] = (A[12] << 43) | (A[12] >> (64 - 43)); + A[17] = (A[17] << 15) | (A[17] >> (64 - 15)); + A[22] = (A[22] << 61) | (A[22] >> (64 - 61)); + A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28)); + A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55)); + A[13] = (A[13] << 25) | (A[13] >> (64 - 25)); + A[18] = (A[18] << 21) | (A[18] >> (64 - 21)); + A[23] = (A[23] << 56) | (A[23] >> (64 - 56)); + A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27)); + A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20)); + A[14] = (A[14] << 39) | (A[14] >> (64 - 39)); + A[19] = (A[19] << 8) | (A[19] >> (64 - 8)); + A[24] = (A[24] << 14) | (A[24] >> (64 - 14)); + bnn = ~A[12]; + kt = A[ 6] | A[12]; + c0 = A[ 0] ^ kt; + kt = bnn | A[18]; + c1 = A[ 6] ^ kt; + kt = A[18] & A[24]; + c2 = A[12] ^ kt; + kt = A[24] | A[ 0]; + c3 = A[18] ^ kt; + kt = A[ 0] & A[ 6]; + c4 = A[24] ^ kt; + A[ 0] = c0; + A[ 6] = c1; + A[12] = c2; + A[18] = c3; + A[24] = c4; + bnn = ~A[22]; + kt = A[ 9] | A[10]; + c0 = A[ 3] ^ kt; + kt = A[10] & A[16]; + c1 = A[ 9] ^ kt; + kt = A[16] | bnn; + c2 = A[10] ^ kt; + kt = A[22] | A[ 3]; + c3 = A[16] ^ kt; + kt = A[ 3] & A[ 9]; + c4 = A[22] ^ kt; + A[ 3] = c0; + A[ 9] = c1; + A[10] = c2; + A[16] = c3; + A[22] = c4; + bnn = ~A[19]; + kt = A[ 7] | A[13]; + c0 = A[ 1] ^ kt; + kt = A[13] & A[19]; + c1 = A[ 7] ^ kt; + kt = bnn & A[20]; + c2 = A[13] ^ kt; + kt = A[20] | A[ 1]; + c3 = bnn ^ kt; + kt = A[ 1] & A[ 7]; + c4 = A[20] ^ kt; + A[ 1] = c0; + A[ 7] = c1; + A[13] = c2; + A[19] = c3; + A[20] = c4; + bnn = ~A[17]; + kt = A[ 5] & A[11]; + c0 = A[ 4] ^ kt; + kt = A[11] | A[17]; + c1 = A[ 5] ^ kt; + kt = bnn | A[23]; + c2 = A[11] ^ kt; + kt = A[23] & A[ 4]; + c3 = bnn ^ kt; + kt = A[ 4] | A[ 5]; + c4 = A[23] ^ kt; + A[ 4] = c0; + A[ 5] = c1; + A[11] = c2; + A[17] = c3; + A[23] = c4; + bnn = ~A[ 8]; + kt = bnn & A[14]; + c0 = A[ 2] ^ kt; + kt = A[14] | A[15]; + c1 = bnn ^ kt; + kt = A[15] & A[21]; + c2 = A[14] ^ kt; + kt = A[21] | A[ 2]; + c3 = A[15] ^ kt; + kt = A[ 2] & A[ 8]; + c4 = A[21] ^ kt; + A[ 2] = c0; + A[ 8] = c1; + A[14] = c2; + A[15] = c3; + A[21] = c4; + A[ 0] = A[ 0] ^ RC[j + 0]; + + tt0 = A[ 6] ^ A[ 9]; + tt1 = A[ 7] ^ A[ 5]; + tt0 ^= A[ 8] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[24] ^ A[22]; + tt3 = A[20] ^ A[23]; + tt0 ^= A[21]; + tt2 ^= tt3; + t0 = tt0 ^ tt2; + + tt0 = A[12] ^ A[10]; + tt1 = A[13] ^ A[11]; + tt0 ^= A[14] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 0] ^ A[ 3]; + tt3 = A[ 1] ^ A[ 4]; + tt0 ^= A[ 2]; + tt2 ^= tt3; + t1 = tt0 ^ tt2; + + tt0 = A[18] ^ A[16]; + tt1 = A[19] ^ A[17]; + tt0 ^= A[15] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 6] ^ A[ 9]; + tt3 = A[ 7] ^ A[ 5]; + tt0 ^= A[ 8]; + tt2 ^= tt3; + t2 = tt0 ^ tt2; + + tt0 = A[24] ^ A[22]; + tt1 = A[20] ^ A[23]; + tt0 ^= A[21] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[12] ^ A[10]; + tt3 = A[13] ^ A[11]; + tt0 ^= A[14]; + tt2 ^= tt3; + t3 = tt0 ^ tt2; + + tt0 = A[ 0] ^ A[ 3]; + tt1 = A[ 1] ^ A[ 4]; + tt0 ^= A[ 2] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[18] ^ A[16]; + tt3 = A[19] ^ A[17]; + tt0 ^= A[15]; + tt2 ^= tt3; + t4 = tt0 ^ tt2; + + A[ 0] = A[ 0] ^ t0; + A[ 3] = A[ 3] ^ t0; + A[ 1] = A[ 1] ^ t0; + A[ 4] = A[ 4] ^ t0; + A[ 2] = A[ 2] ^ t0; + A[ 6] = A[ 6] ^ t1; + A[ 9] = A[ 9] ^ t1; + A[ 7] = A[ 7] ^ t1; + A[ 5] = A[ 5] ^ t1; + A[ 8] = A[ 8] ^ t1; + A[12] = A[12] ^ t2; + A[10] = A[10] ^ t2; + A[13] = A[13] ^ t2; + A[11] = A[11] ^ t2; + A[14] = A[14] ^ t2; + A[18] = A[18] ^ t3; + A[16] = A[16] ^ t3; + A[19] = A[19] ^ t3; + A[17] = A[17] ^ t3; + A[15] = A[15] ^ t3; + A[24] = A[24] ^ t4; + A[22] = A[22] ^ t4; + A[20] = A[20] ^ t4; + A[23] = A[23] ^ t4; + A[21] = A[21] ^ t4; + A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36)); + A[ 1] = (A[ 1] << 3) | (A[ 1] >> (64 - 3)); + A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41)); + A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18)); + A[ 6] = (A[ 6] << 1) | (A[ 6] >> (64 - 1)); + A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44)); + A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10)); + A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45)); + A[ 8] = (A[ 8] << 2) | (A[ 8] >> (64 - 2)); + A[12] = (A[12] << 62) | (A[12] >> (64 - 62)); + A[10] = (A[10] << 6) | (A[10] >> (64 - 6)); + A[13] = (A[13] << 43) | (A[13] >> (64 - 43)); + A[11] = (A[11] << 15) | (A[11] >> (64 - 15)); + A[14] = (A[14] << 61) | (A[14] >> (64 - 61)); + A[18] = (A[18] << 28) | (A[18] >> (64 - 28)); + A[16] = (A[16] << 55) | (A[16] >> (64 - 55)); + A[19] = (A[19] << 25) | (A[19] >> (64 - 25)); + A[17] = (A[17] << 21) | (A[17] >> (64 - 21)); + A[15] = (A[15] << 56) | (A[15] >> (64 - 56)); + A[24] = (A[24] << 27) | (A[24] >> (64 - 27)); + A[22] = (A[22] << 20) | (A[22] >> (64 - 20)); + A[20] = (A[20] << 39) | (A[20] >> (64 - 39)); + A[23] = (A[23] << 8) | (A[23] >> (64 - 8)); + A[21] = (A[21] << 14) | (A[21] >> (64 - 14)); + bnn = ~A[13]; + kt = A[ 9] | A[13]; + c0 = A[ 0] ^ kt; + kt = bnn | A[17]; + c1 = A[ 9] ^ kt; + kt = A[17] & A[21]; + c2 = A[13] ^ kt; + kt = A[21] | A[ 0]; + c3 = A[17] ^ kt; + kt = A[ 0] & A[ 9]; + c4 = A[21] ^ kt; + A[ 0] = c0; + A[ 9] = c1; + A[13] = c2; + A[17] = c3; + A[21] = c4; + bnn = ~A[14]; + kt = A[22] | A[ 1]; + c0 = A[18] ^ kt; + kt = A[ 1] & A[ 5]; + c1 = A[22] ^ kt; + kt = A[ 5] | bnn; + c2 = A[ 1] ^ kt; + kt = A[14] | A[18]; + c3 = A[ 5] ^ kt; + kt = A[18] & A[22]; + c4 = A[14] ^ kt; + A[18] = c0; + A[22] = c1; + A[ 1] = c2; + A[ 5] = c3; + A[14] = c4; + bnn = ~A[23]; + kt = A[10] | A[19]; + c0 = A[ 6] ^ kt; + kt = A[19] & A[23]; + c1 = A[10] ^ kt; + kt = bnn & A[ 2]; + c2 = A[19] ^ kt; + kt = A[ 2] | A[ 6]; + c3 = bnn ^ kt; + kt = A[ 6] & A[10]; + c4 = A[ 2] ^ kt; + A[ 6] = c0; + A[10] = c1; + A[19] = c2; + A[23] = c3; + A[ 2] = c4; + bnn = ~A[11]; + kt = A[ 3] & A[ 7]; + c0 = A[24] ^ kt; + kt = A[ 7] | A[11]; + c1 = A[ 3] ^ kt; + kt = bnn | A[15]; + c2 = A[ 7] ^ kt; + kt = A[15] & A[24]; + c3 = bnn ^ kt; + kt = A[24] | A[ 3]; + c4 = A[15] ^ kt; + A[24] = c0; + A[ 3] = c1; + A[ 7] = c2; + A[11] = c3; + A[15] = c4; + bnn = ~A[16]; + kt = bnn & A[20]; + c0 = A[12] ^ kt; + kt = A[20] | A[ 4]; + c1 = bnn ^ kt; + kt = A[ 4] & A[ 8]; + c2 = A[20] ^ kt; + kt = A[ 8] | A[12]; + c3 = A[ 4] ^ kt; + kt = A[12] & A[16]; + c4 = A[ 8] ^ kt; + A[12] = c0; + A[16] = c1; + A[20] = c2; + A[ 4] = c3; + A[ 8] = c4; + A[ 0] = A[ 0] ^ RC[j + 1]; + t = A[ 5]; + A[ 5] = A[18]; + A[18] = A[11]; + A[11] = A[10]; + A[10] = A[ 6]; + A[ 6] = A[22]; + A[22] = A[20]; + A[20] = A[12]; + A[12] = A[19]; + A[19] = A[15]; + A[15] = A[24]; + A[24] = A[ 8]; + A[ 8] = t; + t = A[ 1]; + A[ 1] = A[ 9]; + A[ 9] = A[14]; + A[14] = A[ 2]; + A[ 2] = A[13]; + A[13] = A[23]; + A[23] = A[ 4]; + A[ 4] = A[21]; + A[21] = A[16]; + A[16] = A[ 3]; + A[ 3] = A[17]; + A[17] = A[ 7]; + A[ 7] = t; + } +} + +/* see bearssl_kdf.h */ +void +br_shake_init(br_shake_context *sc, int security_level) +{ + sc->rate = 200 - (size_t)(security_level >> 2); + sc->dptr = 0; + memset(sc->A, 0, sizeof sc->A); + sc->A[ 1] = ~(uint64_t)0; + sc->A[ 2] = ~(uint64_t)0; + sc->A[ 8] = ~(uint64_t)0; + sc->A[12] = ~(uint64_t)0; + sc->A[17] = ~(uint64_t)0; + sc->A[20] = ~(uint64_t)0; +} + +/* see bearssl_kdf.h */ +void +br_shake_inject(br_shake_context *sc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t rate, dptr; + + buf = data; + rate = sc->rate; + dptr = sc->dptr; + while (len > 0) { + size_t clen; + + clen = rate - dptr; + if (clen > len) { + clen = len; + } + memcpy(sc->dbuf + dptr, buf, clen); + dptr += clen; + buf += clen; + len -= clen; + if (dptr == rate) { + xor_block(sc->A, sc->dbuf, rate); + process_block(sc->A); + dptr = 0; + } + } + sc->dptr = dptr; +} + +/* see bearssl_kdf.h */ +void +br_shake_flip(br_shake_context *sc) +{ + /* + * We apply padding and pre-XOR the value into the state. We + * set dptr to the end of the buffer, so that first call to + * shake_extract() will process the block. + */ + if ((sc->dptr + 1) == sc->rate) { + sc->dbuf[sc->dptr ++] = 0x9F; + } else { + sc->dbuf[sc->dptr ++] = 0x1F; + memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1); + sc->dbuf[sc->rate - 1] = 0x80; + sc->dptr = sc->rate; + } + xor_block(sc->A, sc->dbuf, sc->rate); +} + +/* see bearssl_kdf.h */ +void +br_shake_produce(br_shake_context *sc, void *out, size_t len) +{ + unsigned char *buf; + size_t dptr, rate; + + buf = out; + dptr = sc->dptr; + rate = sc->rate; + while (len > 0) { + size_t clen; + + if (dptr == rate) { + unsigned char *dbuf; + uint64_t *A; + + A = sc->A; + dbuf = sc->dbuf; + process_block(A); + br_enc64le(dbuf + 0, A[ 0]); + br_enc64le(dbuf + 8, ~A[ 1]); + br_enc64le(dbuf + 16, ~A[ 2]); + br_enc64le(dbuf + 24, A[ 3]); + br_enc64le(dbuf + 32, A[ 4]); + br_enc64le(dbuf + 40, A[ 5]); + br_enc64le(dbuf + 48, A[ 6]); + br_enc64le(dbuf + 56, A[ 7]); + br_enc64le(dbuf + 64, ~A[ 8]); + br_enc64le(dbuf + 72, A[ 9]); + br_enc64le(dbuf + 80, A[10]); + br_enc64le(dbuf + 88, A[11]); + br_enc64le(dbuf + 96, ~A[12]); + br_enc64le(dbuf + 104, A[13]); + br_enc64le(dbuf + 112, A[14]); + br_enc64le(dbuf + 120, A[15]); + br_enc64le(dbuf + 128, A[16]); + br_enc64le(dbuf + 136, ~A[17]); + br_enc64le(dbuf + 144, A[18]); + br_enc64le(dbuf + 152, A[19]); + br_enc64le(dbuf + 160, ~A[20]); + br_enc64le(dbuf + 168, A[21]); + br_enc64le(dbuf + 176, A[22]); + br_enc64le(dbuf + 184, A[23]); + br_enc64le(dbuf + 192, A[24]); + dptr = 0; + } + clen = rate - dptr; + if (clen > len) { + clen = len; + } + memcpy(buf, sc->dbuf + dptr, clen); + dptr += clen; + buf += clen; + len -= clen; + } + sc->dptr = dptr; +} diff --git a/third_party/bearssl/src/skey_decoder.c b/third_party/bearssl/src/skey_decoder.c new file mode 100644 index 0000000..9e285d7 --- /dev/null +++ b/third_party/bearssl/src/skey_decoder.c @@ -0,0 +1,650 @@ +/* Automatically generated code; do not modify directly. */ + +#include <stddef.h> +#include <stdint.h> + +typedef struct { + uint32_t *dp; + uint32_t *rp; + const unsigned char *ip; +} t0_context; + +static uint32_t +t0_parse7E_unsigned(const unsigned char **p) +{ + uint32_t x; + + x = 0; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + return x; + } + } +} + +static int32_t +t0_parse7E_signed(const unsigned char **p) +{ + int neg; + uint32_t x; + + neg = ((**p) >> 6) & 1; + x = (uint32_t)-neg; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + if (neg) { + return -(int32_t)~x - 1; + } else { + return (int32_t)x; + } + } + } +} + +#define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80) +#define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F) +#define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8) +#define T0_INT1(x) T0_FBYTE(x, 0) +#define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) + +/* static const unsigned char t0_datablock[]; */ + + +void br_skey_decoder_init_main(void *t0ctx); + +void br_skey_decoder_run(void *t0ctx); + + + +#include "inner.h" + + + + + +#include "inner.h" + +#define CTX ((br_skey_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_skey_decoder_context, cpu))) +#define CONTEXT_NAME br_skey_decoder_context + +/* see bearssl_x509.h */ +void +br_skey_decoder_init(br_skey_decoder_context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + ctx->cpu.dp = &ctx->dp_stack[0]; + ctx->cpu.rp = &ctx->rp_stack[0]; + br_skey_decoder_init_main(&ctx->cpu); + br_skey_decoder_run(&ctx->cpu); +} + +/* see bearssl_x509.h */ +void +br_skey_decoder_push(br_skey_decoder_context *ctx, + const void *data, size_t len) +{ + ctx->hbuf = data; + ctx->hlen = len; + br_skey_decoder_run(&ctx->cpu); +} + + + +static const unsigned char t0_datablock[] = { + 0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x07, + 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE, + 0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2B, + 0x81, 0x04, 0x00, 0x23 +}; + +static const unsigned char t0_codeblock[] = { + 0x00, 0x01, 0x01, 0x07, 0x00, 0x00, 0x01, 0x01, 0x08, 0x00, 0x00, 0x13, + 0x13, 0x00, 0x00, 0x01, T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, + 0x01, T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INVALID_VALUE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01, + T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA), + 0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_data)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_type)), 0x00, 0x00, + 0x33, 0x48, 0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, pad)), + 0x00, 0x00, 0x01, 0x13, 0x00, 0x00, 0x01, 0x1C, 0x00, 0x00, 0x01, 0x22, + 0x00, 0x00, 0x05, 0x02, 0x2C, 0x16, 0x00, 0x00, 0x06, 0x02, 0x2D, 0x16, + 0x00, 0x00, 0x01, 0x10, 0x3D, 0x00, 0x00, 0x0D, 0x05, 0x02, 0x2F, 0x16, + 0x3A, 0x00, 0x00, 0x0D, 0x05, 0x02, 0x2F, 0x16, 0x3B, 0x00, 0x00, 0x06, + 0x02, 0x27, 0x16, 0x00, 0x01, 0x03, 0x00, 0x54, 0x57, 0x01, 0x02, 0x3E, + 0x55, 0x23, 0x06, 0x02, 0x30, 0x16, 0x57, 0x01, 0x04, 0x3E, 0x02, 0x00, + 0x41, 0x3F, 0x00, 0x02, 0x03, 0x00, 0x53, 0x14, 0x14, 0x03, 0x01, 0x48, + 0x0E, 0x06, 0x02, 0x30, 0x16, 0x33, 0x4C, 0x58, 0x01, 0x7F, 0x19, 0x0D, + 0x06, 0x04, 0x13, 0x13, 0x04, 0x29, 0x01, 0x20, 0x19, 0x0D, 0x06, 0x16, + 0x13, 0x3A, 0x53, 0x4D, 0x02, 0x00, 0x06, 0x09, 0x02, 0x00, 0x0C, 0x06, + 0x02, 0x2A, 0x16, 0x04, 0x02, 0x03, 0x00, 0x3F, 0x04, 0x0D, 0x01, 0x21, + 0x19, 0x0D, 0x06, 0x04, 0x13, 0x3A, 0x04, 0x03, 0x30, 0x16, 0x13, 0x5D, + 0x02, 0x00, 0x05, 0x02, 0x30, 0x16, 0x02, 0x00, 0x02, 0x01, 0x1D, 0x00, + 0x02, 0x53, 0x4B, 0x05, 0x02, 0x30, 0x16, 0x5B, 0x15, 0x06, 0x07, 0x5D, + 0x01, 0x7F, 0x03, 0x01, 0x04, 0x16, 0x46, 0x15, 0x06, 0x10, 0x01, 0x00, + 0x03, 0x01, 0x14, 0x06, 0x03, 0x4D, 0x04, 0x02, 0x01, 0x00, 0x03, 0x00, + 0x04, 0x02, 0x30, 0x16, 0x3F, 0x57, 0x01, 0x04, 0x3E, 0x53, 0x02, 0x01, + 0x06, 0x03, 0x43, 0x04, 0x03, 0x02, 0x00, 0x40, 0x3F, 0x5D, 0x02, 0x01, + 0x06, 0x03, 0x32, 0x04, 0x01, 0x31, 0x00, 0x00, 0x54, 0x57, 0x01, 0x02, + 0x3E, 0x55, 0x06, 0x02, 0x30, 0x16, 0x57, 0x01, 0x02, 0x3E, 0x44, 0x3F, + 0x00, 0x07, 0x35, 0x50, 0x14, 0x05, 0x02, 0x2F, 0x16, 0x23, 0x01, 0x03, + 0x0B, 0x33, 0x17, 0x47, 0x07, 0x03, 0x00, 0x4F, 0x4F, 0x35, 0x4E, 0x14, + 0x14, 0x03, 0x01, 0x03, 0x02, 0x51, 0x14, 0x03, 0x03, 0x02, 0x02, 0x07, + 0x14, 0x03, 0x02, 0x51, 0x14, 0x03, 0x04, 0x02, 0x02, 0x07, 0x14, 0x03, + 0x02, 0x51, 0x14, 0x03, 0x05, 0x02, 0x02, 0x07, 0x14, 0x03, 0x02, 0x51, + 0x03, 0x06, 0x02, 0x00, 0x02, 0x01, 0x02, 0x03, 0x02, 0x04, 0x02, 0x05, + 0x02, 0x06, 0x1E, 0x00, 0x00, 0x19, 0x19, 0x00, 0x00, 0x01, 0x0B, 0x00, + 0x00, 0x01, 0x00, 0x20, 0x14, 0x06, 0x08, 0x01, 0x01, 0x21, 0x20, 0x22, + 0x20, 0x04, 0x75, 0x13, 0x00, 0x00, 0x01, + T0_INT2(3 * BR_X509_BUFSIZE_SIG), 0x00, 0x01, 0x01, 0x87, 0xFF, 0xFF, + 0x7F, 0x54, 0x57, 0x01, 0x02, 0x3E, 0x55, 0x01, 0x01, 0x0E, 0x06, 0x02, + 0x30, 0x16, 0x57, 0x01, 0x02, 0x19, 0x0D, 0x06, 0x06, 0x13, 0x3B, 0x44, + 0x32, 0x04, 0x1C, 0x01, 0x04, 0x19, 0x0D, 0x06, 0x08, 0x13, 0x3B, 0x01, + 0x00, 0x41, 0x31, 0x04, 0x0E, 0x01, 0x10, 0x19, 0x0D, 0x06, 0x05, 0x13, + 0x3A, 0x42, 0x04, 0x03, 0x30, 0x16, 0x13, 0x03, 0x00, 0x3F, 0x02, 0x00, + 0x34, 0x1F, 0x5A, 0x27, 0x16, 0x00, 0x01, 0x45, 0x0A, 0x06, 0x02, 0x29, + 0x16, 0x14, 0x03, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x57, 0x01, 0x06, + 0x3E, 0x56, 0x00, 0x00, 0x20, 0x14, 0x06, 0x07, 0x1A, 0x14, 0x06, 0x01, + 0x12, 0x04, 0x76, 0x24, 0x00, 0x00, 0x4B, 0x05, 0x02, 0x30, 0x16, 0x37, + 0x15, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x38, 0x15, 0x06, 0x04, 0x01, + 0x18, 0x04, 0x0A, 0x39, 0x15, 0x06, 0x04, 0x01, 0x19, 0x04, 0x02, 0x30, + 0x16, 0x00, 0x00, 0x1C, 0x57, 0x01, 0x02, 0x3E, 0x09, 0x50, 0x00, 0x00, + 0x35, 0x4E, 0x13, 0x00, 0x03, 0x14, 0x03, 0x00, 0x03, 0x01, 0x03, 0x02, + 0x53, 0x59, 0x14, 0x01, 0x81, 0x00, 0x0F, 0x06, 0x02, 0x2E, 0x16, 0x14, + 0x01, 0x00, 0x0D, 0x06, 0x0B, 0x13, 0x14, 0x05, 0x04, 0x13, 0x01, 0x00, + 0x00, 0x59, 0x04, 0x6F, 0x02, 0x01, 0x14, 0x05, 0x02, 0x2B, 0x16, 0x23, + 0x03, 0x01, 0x02, 0x02, 0x1F, 0x02, 0x02, 0x22, 0x03, 0x02, 0x14, 0x06, + 0x03, 0x59, 0x04, 0x68, 0x13, 0x02, 0x00, 0x02, 0x01, 0x08, 0x00, 0x00, + 0x14, 0x35, 0x1C, 0x08, 0x20, 0x1C, 0x07, 0x20, 0x4E, 0x00, 0x01, 0x59, + 0x14, 0x01, 0x81, 0x00, 0x0A, 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x08, + 0x14, 0x05, 0x02, 0x28, 0x16, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, + 0x00, 0x0E, 0x06, 0x19, 0x02, 0x00, 0x23, 0x03, 0x00, 0x14, 0x01, 0x83, + 0xFF, 0xFF, 0x7F, 0x0E, 0x06, 0x02, 0x29, 0x16, 0x01, 0x08, 0x0B, 0x20, + 0x59, 0x1C, 0x07, 0x04, 0x60, 0x00, 0x00, 0x52, 0x4A, 0x00, 0x00, 0x57, + 0x3C, 0x53, 0x00, 0x01, 0x53, 0x14, 0x05, 0x02, 0x2E, 0x16, 0x59, 0x14, + 0x01, 0x81, 0x00, 0x0F, 0x06, 0x02, 0x2E, 0x16, 0x03, 0x00, 0x14, 0x06, + 0x16, 0x59, 0x02, 0x00, 0x14, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x0F, 0x06, + 0x02, 0x2E, 0x16, 0x01, 0x08, 0x0B, 0x07, 0x03, 0x00, 0x04, 0x67, 0x13, + 0x02, 0x00, 0x00, 0x00, 0x53, 0x14, 0x01, 0x81, 0x7F, 0x0E, 0x06, 0x08, + 0x5C, 0x01, 0x00, 0x36, 0x1F, 0x01, 0x00, 0x00, 0x14, 0x36, 0x1F, 0x36, + 0x22, 0x4C, 0x01, 0x7F, 0x00, 0x01, 0x59, 0x03, 0x00, 0x02, 0x00, 0x01, + 0x05, 0x10, 0x01, 0x01, 0x11, 0x18, 0x02, 0x00, 0x01, 0x06, 0x10, 0x14, + 0x01, 0x01, 0x11, 0x06, 0x02, 0x25, 0x16, 0x01, 0x04, 0x0B, 0x02, 0x00, + 0x01, 0x1F, 0x11, 0x14, 0x01, 0x1F, 0x0D, 0x06, 0x02, 0x26, 0x16, 0x07, + 0x00, 0x00, 0x14, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, 0x00, 0x57, 0x00, + 0x00, 0x14, 0x05, 0x02, 0x29, 0x16, 0x23, 0x5A, 0x00, 0x00, 0x1B, 0x14, + 0x01, 0x00, 0x0F, 0x06, 0x01, 0x00, 0x13, 0x12, 0x04, 0x74, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x5D, 0x13, 0x00, 0x00, 0x14, 0x06, 0x07, 0x5E, 0x14, + 0x06, 0x01, 0x12, 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1A, 0x09, + 0x24, 0x00 +}; + +static const uint16_t t0_caddr[] = { + 0, + 5, + 10, + 14, + 18, + 22, + 26, + 30, + 34, + 38, + 42, + 46, + 50, + 54, + 58, + 62, + 66, + 70, + 75, + 80, + 84, + 89, + 93, + 97, + 101, + 107, + 113, + 118, + 126, + 134, + 140, + 163, + 244, + 311, + 329, + 404, + 408, + 412, + 429, + 434, + 505, + 519, + 526, + 540, + 573, + 582, + 587, + 654, + 665, + 721, + 725, + 730, + 778, + 804, + 848, + 859, + 868, + 881, + 885, + 889, + 901 +}; + +#define T0_INTERPRETED 34 + +#define T0_ENTER(ip, rp, slot) do { \ + const unsigned char *t0_newip; \ + uint32_t t0_lnum; \ + t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \ + t0_lnum = t0_parse7E_unsigned(&t0_newip); \ + (rp) += t0_lnum; \ + *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \ + (ip) = t0_newip; \ + } while (0) + +#define T0_DEFENTRY(name, slot) \ +void \ +name(void *ctx) \ +{ \ + t0_context *t0ctx = ctx; \ + t0ctx->ip = &t0_codeblock[0]; \ + T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \ +} + +T0_DEFENTRY(br_skey_decoder_init_main, 73) + +#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++) + +void +br_skey_decoder_run(void *t0ctx) +{ + uint32_t *dp, *rp; + const unsigned char *ip; + +#define T0_LOCAL(x) (*(rp - 2 - (x))) +#define T0_POP() (*-- dp) +#define T0_POPi() (*(int32_t *)(-- dp)) +#define T0_PEEK(x) (*(dp - 1 - (x))) +#define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x))) +#define T0_PUSH(v) do { *dp = (v); dp ++; } while (0) +#define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0) +#define T0_RPOP() (*-- rp) +#define T0_RPOPi() (*(int32_t *)(-- rp)) +#define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0) +#define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0) +#define T0_ROLL(x) do { \ + size_t t0len = (size_t)(x); \ + uint32_t t0tmp = *(dp - 1 - t0len); \ + memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_SWAP() do { \ + uint32_t t0tmp = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_ROT() do { \ + uint32_t t0tmp = *(dp - 3); \ + *(dp - 3) = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_NROT() do { \ + uint32_t t0tmp = *(dp - 1); \ + *(dp - 1) = *(dp - 2); \ + *(dp - 2) = *(dp - 3); \ + *(dp - 3) = t0tmp; \ +} while (0) +#define T0_PICK(x) do { \ + uint32_t t0depth = (x); \ + T0_PUSH(T0_PEEK(t0depth)); \ +} while (0) +#define T0_CO() do { \ + goto t0_exit; \ +} while (0) +#define T0_RET() goto t0_next + + dp = ((t0_context *)t0ctx)->dp; + rp = ((t0_context *)t0ctx)->rp; + ip = ((t0_context *)t0ctx)->ip; + goto t0_next; + for (;;) { + uint32_t t0x; + + t0_next: + t0x = T0_NEXT(&ip); + if (t0x < T0_INTERPRETED) { + switch (t0x) { + int32_t t0off; + + case 0: /* ret */ + t0x = T0_RPOP(); + rp -= (t0x >> 16); + t0x &= 0xFFFF; + if (t0x == 0) { + ip = NULL; + goto t0_exit; + } + ip = &t0_codeblock[t0x]; + break; + case 1: /* literal constant */ + T0_PUSHi(t0_parse7E_signed(&ip)); + break; + case 2: /* read local */ + T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip))); + break; + case 3: /* write local */ + T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP(); + break; + case 4: /* jump */ + t0off = t0_parse7E_signed(&ip); + ip += t0off; + break; + case 5: /* jump if */ + t0off = t0_parse7E_signed(&ip); + if (T0_POP()) { + ip += t0off; + } + break; + case 6: /* jump if not */ + t0off = t0_parse7E_signed(&ip); + if (!T0_POP()) { + ip += t0off; + } + break; + case 7: { + /* + */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a + b); + + } + break; + case 8: { + /* - */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a - b); + + } + break; + case 9: { + /* -rot */ + T0_NROT(); + } + break; + case 10: { + /* < */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a < b)); + + } + break; + case 11: { + /* << */ + + int c = (int)T0_POPi(); + uint32_t x = T0_POP(); + T0_PUSH(x << c); + + } + break; + case 12: { + /* <> */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a != b)); + + } + break; + case 13: { + /* = */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a == b)); + + } + break; + case 14: { + /* > */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a > b)); + + } + break; + case 15: { + /* >= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a >= b)); + + } + break; + case 16: { + /* >> */ + + int c = (int)T0_POPi(); + int32_t x = T0_POPi(); + T0_PUSHi(x >> c); + + } + break; + case 17: { + /* and */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a & b); + + } + break; + case 18: { + /* co */ + T0_CO(); + } + break; + case 19: { + /* drop */ + (void)T0_POP(); + } + break; + case 20: { + /* dup */ + T0_PUSH(T0_PEEK(0)); + } + break; + case 21: { + /* eqOID */ + + const unsigned char *a2 = &t0_datablock[T0_POP()]; + const unsigned char *a1 = &CTX->pad[0]; + size_t len = a1[0]; + int x; + if (len == a2[0]) { + x = -(memcmp(a1 + 1, a2 + 1, len) == 0); + } else { + x = 0; + } + T0_PUSH((uint32_t)x); + + } + break; + case 22: { + /* fail */ + + CTX->err = T0_POPi(); + T0_CO(); + + } + break; + case 23: { + /* get8 */ + + uint32_t addr = T0_POP(); + T0_PUSH(*((unsigned char *)CTX + addr)); + + } + break; + case 24: { + /* neg */ + + uint32_t a = T0_POP(); + T0_PUSH(-a); + + } + break; + case 25: { + /* over */ + T0_PUSH(T0_PEEK(1)); + } + break; + case 26: { + /* read-blob-inner */ + + uint32_t len = T0_POP(); + uint32_t addr = T0_POP(); + size_t clen = CTX->hlen; + if (clen > len) { + clen = (size_t)len; + } + if (addr != 0) { + memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen); + } + CTX->hbuf += clen; + CTX->hlen -= clen; + T0_PUSH(addr + clen); + T0_PUSH(len - clen); + + } + break; + case 27: { + /* read8-low */ + + if (CTX->hlen == 0) { + T0_PUSHi(-1); + } else { + CTX->hlen --; + T0_PUSH(*CTX->hbuf ++); + } + + } + break; + case 28: { + /* rot */ + T0_ROT(); + } + break; + case 29: { + /* set-ec-key */ + + size_t xlen = T0_POP(); + uint32_t curve = T0_POP(); + CTX->key.ec.curve = curve; + CTX->key.ec.x = CTX->key_data; + CTX->key.ec.xlen = xlen; + + } + break; + case 30: { + /* set-rsa-key */ + + size_t iqlen = T0_POP(); + size_t dqlen = T0_POP(); + size_t dplen = T0_POP(); + size_t qlen = T0_POP(); + size_t plen = T0_POP(); + uint32_t n_bitlen = T0_POP(); + size_t off; + + CTX->key.rsa.n_bitlen = n_bitlen; + CTX->key.rsa.p = CTX->key_data; + CTX->key.rsa.plen = plen; + off = plen; + CTX->key.rsa.q = CTX->key_data + off; + CTX->key.rsa.qlen = qlen; + off += qlen; + CTX->key.rsa.dp = CTX->key_data + off; + CTX->key.rsa.dplen = dplen; + off += dplen; + CTX->key.rsa.dq = CTX->key_data + off; + CTX->key.rsa.dqlen = dqlen; + off += dqlen; + CTX->key.rsa.iq = CTX->key_data + off; + CTX->key.rsa.iqlen = iqlen; + + } + break; + case 31: { + /* set8 */ + + uint32_t addr = T0_POP(); + *((unsigned char *)CTX + addr) = (unsigned char)T0_POP(); + + } + break; + case 32: { + /* swap */ + T0_SWAP(); + } + break; + case 33: { + /* u>> */ + + int c = (int)T0_POPi(); + uint32_t x = T0_POP(); + T0_PUSH(x >> c); + + } + break; + } + + } else { + T0_ENTER(ip, rp, t0x); + } + } +t0_exit: + ((t0_context *)t0ctx)->dp = dp; + ((t0_context *)t0ctx)->rp = rp; + ((t0_context *)t0ctx)->ip = ip; +} diff --git a/third_party/bearssl/src/ssl_ccert_single_ec.c b/third_party/bearssl/src/ssl_ccert_single_ec.c new file mode 100644 index 0000000..93ebcde --- /dev/null +++ b/third_party/bearssl/src/ssl_ccert_single_ec.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static void +cc_none0(const br_ssl_client_certificate_class **pctx) +{ + (void)pctx; +} + +static void +cc_none1(const br_ssl_client_certificate_class **pctx, size_t len) +{ + (void)pctx; + (void)len; +} + +static void +cc_none2(const br_ssl_client_certificate_class **pctx, + const unsigned char *data, size_t len) +{ + (void)pctx; + (void)data; + (void)len; +} + +static void +cc_choose(const br_ssl_client_certificate_class **pctx, + const br_ssl_client_context *cc, uint32_t auth_types, + br_ssl_client_certificate *choices) +{ + br_ssl_client_certificate_ec_context *zc; + int x; + int scurve; + + zc = (br_ssl_client_certificate_ec_context *)pctx; + scurve = br_ssl_client_get_server_curve(cc); + + if ((zc->allowed_usages & BR_KEYTYPE_KEYX) != 0 + && scurve == zc->sk->curve) + { + int x; + + x = (zc->issuer_key_type == BR_KEYTYPE_RSA) ? 16 : 17; + if (((auth_types >> x) & 1) != 0) { + choices->auth_type = BR_AUTH_ECDH; + choices->hash_id = -1; + choices->chain = zc->chain; + choices->chain_len = zc->chain_len; + } + } + + /* + * For ECDSA authentication, we must choose an appropriate + * hash function. + */ + x = br_ssl_choose_hash((unsigned)(auth_types >> 8)); + if (x == 0 || (zc->allowed_usages & BR_KEYTYPE_SIGN) == 0) { + memset(choices, 0, sizeof *choices); + return; + } + choices->auth_type = BR_AUTH_ECDSA; + choices->hash_id = x; + choices->chain = zc->chain; + choices->chain_len = zc->chain_len; +} + +static uint32_t +cc_do_keyx(const br_ssl_client_certificate_class **pctx, + unsigned char *data, size_t *len) +{ + br_ssl_client_certificate_ec_context *zc; + uint32_t r; + size_t xoff, xlen; + + zc = (br_ssl_client_certificate_ec_context *)pctx; + r = zc->iec->mul(data, *len, zc->sk->x, zc->sk->xlen, zc->sk->curve); + xoff = zc->iec->xoff(zc->sk->curve, &xlen); + memmove(data, data + xoff, xlen); + *len = xlen; + return r; +} + +static size_t +cc_do_sign(const br_ssl_client_certificate_class **pctx, + int hash_id, size_t hv_len, unsigned char *data, size_t len) +{ + br_ssl_client_certificate_ec_context *zc; + unsigned char hv[64]; + const br_hash_class *hc; + + zc = (br_ssl_client_certificate_ec_context *)pctx; + memcpy(hv, data, hv_len); + hc = br_multihash_getimpl(zc->mhash, hash_id); + if (hc == NULL) { + return 0; + } + if (len < 139) { + return 0; + } + return zc->iecdsa(zc->iec, hc, hv, zc->sk, data); +} + +static const br_ssl_client_certificate_class ccert_vtable = { + sizeof(br_ssl_client_certificate_ec_context), + cc_none0, /* start_name_list */ + cc_none1, /* start_name */ + cc_none2, /* append_name */ + cc_none0, /* end_name */ + cc_none0, /* end_name_list */ + cc_choose, + cc_do_keyx, + cc_do_sign +}; + +/* see bearssl_ssl.h */ +void +br_ssl_client_set_single_ec(br_ssl_client_context *cc, + const br_x509_certificate *chain, size_t chain_len, + const br_ec_private_key *sk, unsigned allowed_usages, + unsigned cert_issuer_key_type, + const br_ec_impl *iec, br_ecdsa_sign iecdsa) +{ + cc->client_auth.single_ec.vtable = &ccert_vtable; + cc->client_auth.single_ec.chain = chain; + cc->client_auth.single_ec.chain_len = chain_len; + cc->client_auth.single_ec.sk = sk; + cc->client_auth.single_ec.allowed_usages = allowed_usages; + cc->client_auth.single_ec.issuer_key_type = cert_issuer_key_type; + cc->client_auth.single_ec.mhash = &cc->eng.mhash; + cc->client_auth.single_ec.iec = iec; + cc->client_auth.single_ec.iecdsa = iecdsa; + cc->client_auth_vtable = &cc->client_auth.single_ec.vtable; +} diff --git a/third_party/bearssl/src/ssl_ccert_single_rsa.c b/third_party/bearssl/src/ssl_ccert_single_rsa.c new file mode 100644 index 0000000..690df20 --- /dev/null +++ b/third_party/bearssl/src/ssl_ccert_single_rsa.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static void +cc_none0(const br_ssl_client_certificate_class **pctx) +{ + (void)pctx; +} + +static void +cc_none1(const br_ssl_client_certificate_class **pctx, size_t len) +{ + (void)pctx; + (void)len; +} + +static void +cc_none2(const br_ssl_client_certificate_class **pctx, + const unsigned char *data, size_t len) +{ + (void)pctx; + (void)data; + (void)len; +} + +static void +cc_choose(const br_ssl_client_certificate_class **pctx, + const br_ssl_client_context *cc, uint32_t auth_types, + br_ssl_client_certificate *choices) +{ + br_ssl_client_certificate_rsa_context *zc; + int x; + + (void)cc; + zc = (br_ssl_client_certificate_rsa_context *)pctx; + x = br_ssl_choose_hash((unsigned)auth_types); + if (x == 0 && (auth_types & 1) == 0) { + memset(choices, 0, sizeof *choices); + } + choices->auth_type = BR_AUTH_RSA; + choices->hash_id = x; + choices->chain = zc->chain; + choices->chain_len = zc->chain_len; +} + +/* + * OID for hash functions in RSA signatures. + */ +static const unsigned char HASH_OID_SHA1[] = { + 0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A +}; + +static const unsigned char HASH_OID_SHA224[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04 +}; + +static const unsigned char HASH_OID_SHA256[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01 +}; + +static const unsigned char HASH_OID_SHA384[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02 +}; + +static const unsigned char HASH_OID_SHA512[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03 +}; + +static const unsigned char *HASH_OID[] = { + HASH_OID_SHA1, + HASH_OID_SHA224, + HASH_OID_SHA256, + HASH_OID_SHA384, + HASH_OID_SHA512 +}; + +static size_t +cc_do_sign(const br_ssl_client_certificate_class **pctx, + int hash_id, size_t hv_len, unsigned char *data, size_t len) +{ + br_ssl_client_certificate_rsa_context *zc; + unsigned char hv[64]; + const unsigned char *hash_oid; + size_t sig_len; + + zc = (br_ssl_client_certificate_rsa_context *)pctx; + memcpy(hv, data, hv_len); + if (hash_id == 0) { + hash_oid = NULL; + } else if (hash_id >= 2 && hash_id <= 6) { + hash_oid = HASH_OID[hash_id - 2]; + } else { + return 0; + } + sig_len = (zc->sk->n_bitlen + 7) >> 3; + if (len < sig_len) { + return 0; + } + return zc->irsasign(hash_oid, hv, hv_len, zc->sk, data) ? sig_len : 0; +} + +static const br_ssl_client_certificate_class ccert_vtable = { + sizeof(br_ssl_client_certificate_rsa_context), + cc_none0, /* start_name_list */ + cc_none1, /* start_name */ + cc_none2, /* append_name */ + cc_none0, /* end_name */ + cc_none0, /* end_name_list */ + cc_choose, + 0, + cc_do_sign +}; + +/* see bearssl_ssl.h */ +void +br_ssl_client_set_single_rsa(br_ssl_client_context *cc, + const br_x509_certificate *chain, size_t chain_len, + const br_rsa_private_key *sk, br_rsa_pkcs1_sign irsasign) +{ + cc->client_auth.single_rsa.vtable = &ccert_vtable; + cc->client_auth.single_rsa.chain = chain; + cc->client_auth.single_rsa.chain_len = chain_len; + cc->client_auth.single_rsa.sk = sk; + cc->client_auth.single_rsa.irsasign = irsasign; + cc->client_auth_vtable = &cc->client_auth.single_rsa.vtable; +} diff --git a/third_party/bearssl/src/ssl_client.c b/third_party/bearssl/src/ssl_client.c new file mode 100644 index 0000000..28c404b --- /dev/null +++ b/third_party/bearssl/src/ssl_client.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_client_zero(br_ssl_client_context *cc) +{ + /* + * For really standard C, we should explicitly set to NULL all + * pointers, and 0 all other fields. However, on all our target + * architectures, a direct memset() will work, be faster, and + * use a lot less code. + */ + memset(cc, 0, sizeof *cc); +} + +/* see bearssl_ssl.h */ +int +br_ssl_client_reset(br_ssl_client_context *cc, + const char *server_name, int resume_session) +{ + size_t n; + + br_ssl_engine_set_buffer(&cc->eng, NULL, 0, 0); + cc->eng.version_out = cc->eng.version_min; + if (!resume_session) { + br_ssl_client_forget_session(cc); + } + if (!br_ssl_engine_init_rand(&cc->eng)) { + return 0; + } + + /* + * We always set back the "reneg" flag to 0 because we use it + * to distinguish between first handshake and renegotiation. + * Note that "renegotiation" and "session resumption" are two + * different things. + */ + cc->eng.reneg = 0; + + if (server_name == NULL) { + cc->eng.server_name[0] = 0; + } else { + n = strlen(server_name) + 1; + if (n > sizeof cc->eng.server_name) { + br_ssl_engine_fail(&cc->eng, BR_ERR_BAD_PARAM); + return 0; + } + memcpy(cc->eng.server_name, server_name, n); + } + + br_ssl_engine_hs_reset(&cc->eng, + br_ssl_hs_client_init_main, br_ssl_hs_client_run); + return br_ssl_engine_last_error(&cc->eng) == BR_ERR_OK; +} diff --git a/third_party/bearssl/src/ssl_client_default_rsapub.c b/third_party/bearssl/src/ssl_client_default_rsapub.c new file mode 100644 index 0000000..2cdaab8 --- /dev/null +++ b/third_party/bearssl/src/ssl_client_default_rsapub.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_client_set_default_rsapub(br_ssl_client_context *cc) +{ + br_ssl_client_set_rsapub(cc, br_rsa_public_get_default()); +} diff --git a/third_party/bearssl/src/ssl_client_full.c b/third_party/bearssl/src/ssl_client_full.c new file mode 100644 index 0000000..9814349 --- /dev/null +++ b/third_party/bearssl/src/ssl_client_full.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_client_init_full(br_ssl_client_context *cc, + br_x509_minimal_context *xc, + const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num) +{ + /* + * The "full" profile supports all implemented cipher suites. + * + * Rationale for suite order, from most important to least + * important rule: + * + * -- Don't use 3DES if AES or ChaCha20 is available. + * -- Try to have Forward Secrecy (ECDHE suite) if possible. + * -- When not using Forward Secrecy, ECDH key exchange is + * better than RSA key exchange (slightly more expensive on the + * client, but much cheaper on the server, and it implies smaller + * messages). + * -- ChaCha20+Poly1305 is better than AES/GCM (faster, smaller code). + * -- GCM is better than CCM and CBC. CCM is better than CBC. + * -- CCM is preferable over CCM_8 (with CCM_8, forgeries may succeed + * with probability 2^(-64)). + * -- AES-128 is preferred over AES-256 (AES-128 is already + * strong enough, and AES-256 is 40% more expensive). + */ + static const uint16_t suites[] = { + BR_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, + BR_TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, + BR_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + BR_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM, + BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8, + BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, + BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, + BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, + BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, + BR_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256, + BR_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256, + BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256, + BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256, + BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA, + BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA, + BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA, + BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA, + BR_TLS_RSA_WITH_AES_128_GCM_SHA256, + BR_TLS_RSA_WITH_AES_256_GCM_SHA384, + BR_TLS_RSA_WITH_AES_128_CCM, + BR_TLS_RSA_WITH_AES_256_CCM, + BR_TLS_RSA_WITH_AES_128_CCM_8, + BR_TLS_RSA_WITH_AES_256_CCM_8, + BR_TLS_RSA_WITH_AES_128_CBC_SHA256, + BR_TLS_RSA_WITH_AES_256_CBC_SHA256, + BR_TLS_RSA_WITH_AES_128_CBC_SHA, + BR_TLS_RSA_WITH_AES_256_CBC_SHA, + BR_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA, + BR_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, + BR_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA, + BR_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA, + BR_TLS_RSA_WITH_3DES_EDE_CBC_SHA + }; + + /* + * All hash functions are activated. + * Note: the X.509 validation engine will nonetheless refuse to + * validate signatures that use MD5 as hash function. + */ + static const br_hash_class *hashes[] = { + &br_md5_vtable, + &br_sha1_vtable, + &br_sha224_vtable, + &br_sha256_vtable, + &br_sha384_vtable, + &br_sha512_vtable + }; + + int id; + + /* + * Reset client context and set supported versions from TLS-1.0 + * to TLS-1.2 (inclusive). + */ + br_ssl_client_zero(cc); + br_ssl_engine_set_versions(&cc->eng, BR_TLS10, BR_TLS12); + + /* + * X.509 engine uses SHA-256 to hash certificate DN (for + * comparisons). + */ + br_x509_minimal_init(xc, &br_sha256_vtable, + trust_anchors, trust_anchors_num); + + /* + * Set suites and asymmetric crypto implementations. We use the + * "i31" code for RSA (it is somewhat faster than the "i32" + * implementation). + * TODO: change that when better implementations are made available. + */ + br_ssl_engine_set_suites(&cc->eng, suites, + (sizeof suites) / (sizeof suites[0])); + br_ssl_client_set_default_rsapub(cc); + br_ssl_engine_set_default_rsavrfy(&cc->eng); + br_ssl_engine_set_default_ecdsa(&cc->eng); + br_x509_minimal_set_rsa(xc, br_ssl_engine_get_rsavrfy(&cc->eng)); + br_x509_minimal_set_ecdsa(xc, + br_ssl_engine_get_ec(&cc->eng), + br_ssl_engine_get_ecdsa(&cc->eng)); + + /* + * Set supported hash functions, for the SSL engine and for the + * X.509 engine. + */ + for (id = br_md5_ID; id <= br_sha512_ID; id ++) { + const br_hash_class *hc; + + hc = hashes[id - 1]; + br_ssl_engine_set_hash(&cc->eng, id, hc); + br_x509_minimal_set_hash(xc, id, hc); + } + + /* + * Link the X.509 engine in the SSL engine. + */ + br_ssl_engine_set_x509(&cc->eng, &xc->vtable); + + /* + * Set the PRF implementations. + */ + br_ssl_engine_set_prf10(&cc->eng, &br_tls10_prf); + br_ssl_engine_set_prf_sha256(&cc->eng, &br_tls12_sha256_prf); + br_ssl_engine_set_prf_sha384(&cc->eng, &br_tls12_sha384_prf); + + /* + * Symmetric encryption. We use the "default" implementations + * (fastest among constant-time implementations). + */ + br_ssl_engine_set_default_aes_cbc(&cc->eng); + br_ssl_engine_set_default_aes_ccm(&cc->eng); + br_ssl_engine_set_default_aes_gcm(&cc->eng); + br_ssl_engine_set_default_des_cbc(&cc->eng); + br_ssl_engine_set_default_chapol(&cc->eng); +} diff --git a/third_party/bearssl/src/ssl_engine.c b/third_party/bearssl/src/ssl_engine.c new file mode 100644 index 0000000..f59fe1a --- /dev/null +++ b/third_party/bearssl/src/ssl_engine.c @@ -0,0 +1,1584 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if 0 +/* obsolete */ + +/* + * If BR_USE_URANDOM is not defined, then try to autodetect its presence + * through compiler macros. + */ +#ifndef BR_USE_URANDOM + +/* + * Macro values documented on: + * https://sourceforge.net/p/predef/wiki/OperatingSystems/ + * + * Only the most common systems have been included here for now. This + * should be enriched later on. + */ +#if defined _AIX \ + || defined __ANDROID__ \ + || defined __FreeBSD__ \ + || defined __NetBSD__ \ + || defined __OpenBSD__ \ + || defined __DragonFly__ \ + || defined __linux__ \ + || (defined __sun && (defined __SVR4 || defined __svr4__)) \ + || (defined __APPLE__ && defined __MACH__) +#define BR_USE_URANDOM 1 +#endif + +#endif + +/* + * If BR_USE_WIN32_RAND is not defined, perform autodetection here. + */ +#ifndef BR_USE_WIN32_RAND + +#if defined _WIN32 || defined _WIN64 +#define BR_USE_WIN32_RAND 1 +#endif + +#endif + +#if BR_USE_URANDOM +#include <sys/types.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#endif + +#if BR_USE_WIN32_RAND +#include <windows.h> +#include <wincrypt.h> +#pragma comment(lib, "advapi32") +#endif + +#endif + +/* ==================================================================== */ +/* + * This part of the file does the low-level record management. + */ + +/* + * IMPLEMENTATION NOTES + * ==================== + * + * In this file, we designate by "input" (and the "i" letter) the "recv" + * operations: incoming records from the peer, from which payload data + * is obtained, and must be extracted by the application (or the SSL + * handshake engine). Similarly, "output" (and the "o" letter) is for + * "send": payload data injected by the application (and SSL handshake + * engine), to be wrapped into records, that are then conveyed to the + * peer over the transport medium. + * + * The input and output buffers may be distinct or shared. When + * shared, input and output cannot occur concurrently; the caller + * must make sure that it never needs to output data while input + * data has been received. In practice, a shared buffer prevents + * pipelining of HTTP requests, or similar protocols; however, a + * shared buffer saves RAM. + * + * The input buffer is pointed to by 'ibuf' and has size 'ibuf_len'; + * the output buffer is pointed to by 'obuf' and has size 'obuf_len'. + * From the size of these buffers is derived the maximum fragment + * length, which will be honoured upon sending records; regardless of + * that length, incoming records will be processed as long as they + * fit in the input buffer, and their length still complies with the + * protocol specification (maximum plaintext payload length is 16384 + * bytes). + * + * Three registers are used to manage buffering in ibuf, called ixa, + * ixb and ixc. Similarly, three registers are used to manage buffering + * in obuf, called oxa, oxb and oxc. + * + * + * At any time, the engine is in one of the following modes: + * -- Failed mode: an error occurs, no I/O can happen. + * -- Input mode: the engine can either receive record bytes from the + * transport layer, or it has some buffered payload bytes to yield. + * -- Output mode: the engine can either receive payload bytes, or it + * has some record bytes to send to the transport layer. + * -- Input/Output mode: both input and output modes are active. When + * the buffer is shared, this can happen only when the buffer is empty + * (no buffered payload bytes or record bytes in either direction). + * + * + * Failed mode: + * ------------ + * + * I/O failed for some reason (invalid received data, not enough room + * for the next record...). No I/O may ever occur again for this context, + * until an explicit reset is performed. This mode, and the error code, + * are also used for protocol errors, especially handshake errors. + * + * + * Input mode: + * ----------- + * + * ixa index within ibuf[] for the currently read data + * ixb maximum index within ibuf[] for the currently read data + * ixc number of bytes not yet received for the current record + * + * -- When ixa == ixb, there is no available data for readers. When + * ixa != ixb, there is available data and it starts at offset ixa. + * + * -- When waiting for the next record header, ixa and ixb are equal + * and contain a value ranging from 0 to 4; ixc is equal to 5-ixa. + * + * -- When the header has been received, record data is obtained. The + * ixc field records how many bytes are still needed to reach the + * end of the current record. + * + * ** If encryption is active, then ixa and ixb are kept equal, and + * point to the end of the currently received record bytes. When + * ixc reaches 0, decryption/MAC is applied, and ixa and ixb are + * adjusted. + * + * ** If encryption is not active, then ixa and ixb are distinct + * and data can be read right away. Additional record data is + * obtained only when ixa == ixb. + * + * Note: in input mode and no encryption, records larger than the buffer + * size are allowed. When encryption is active, the complete record must + * fit within the buffer, since it cannot be decrypted/MACed until it + * has been completely received. + * + * -- When receiving the next record header, 'version_in' contains the + * expected input version (0 if not expecting a specific version); on + * mismatch, the mode switches to 'failed'. + * + * -- When the header has been received, 'version_in' contains the received + * version. It is up to the caller to check and adjust the 'version_in' field + * to implement the required semantics. + * + * -- The 'record_type_in' field is updated with the incoming record type + * when the next record header has been received. + * + * + * Output mode: + * ------------ + * + * oxa index within obuf[] for the currently accumulated data + * oxb maximum index within obuf[] for record data + * oxc pointer for start of record data, and for record sending + * + * -- When oxa != oxb, more data can be accumulated into the current + * record; when oxa == oxb, a closed record is being sent. + * + * -- When accumulating data, oxc points to the start of the data. + * + * -- During record sending, oxa (and oxb) point to the next record byte + * to send, and oxc indicates the end of the current record. + * + * Note: sent records must fit within the buffer, since the header is + * adjusted only when the complete record has been assembled. + * + * -- The 'version_out' and 'record_type_out' fields are used to build the + * record header when the mode is switched to 'sending'. + * + * + * Modes: + * ------ + * + * The state register iomode contains one of the following values: + * + * BR_IO_FAILED I/O failed + * BR_IO_IN input mode + * BR_IO_OUT output mode + * BR_IO_INOUT input/output mode + * + * Whether encryption is active on incoming records is indicated by the + * incrypt flag. For outgoing records, there is no such flag; "encryption" + * is always considered active, but initially uses functions that do not + * encrypt anything. The 'incrypt' flag is needed because when there is + * no active encryption, records larger than the I/O buffer are accepted. + * + * Note: we do not support no-encryption modes (MAC only). + * + * TODO: implement GCM support + * + * + * Misc: + * ----- + * + * 'max_frag_len' is the maximum plaintext size for an outgoing record. + * By default, it is set to the maximum value that fits in the provided + * buffers, in the following list: 512, 1024, 2048, 4096, 16384. The + * caller may change it if needed, but the new value MUST still fit in + * the buffers, and it MUST be one of the list above for compatibility + * with the Maximum Fragment Length extension. + * + * For incoming records, only the total buffer length and current + * encryption mode impact the maximum length for incoming records. The + * 'max_frag_len' value is still adjusted so that records up to that + * length can be both received and sent. + * + * + * Offsets and lengths: + * -------------------- + * + * When sending fragments with TLS-1.1+, the maximum overhead is: + * 5 bytes for the record header + * 16 bytes for the explicit IV + * 48 bytes for the MAC (HMAC/SHA-384) + * 16 bytes for the padding (AES) + * so a total of 85 extra bytes. Note that we support block cipher sizes + * up to 16 bytes (AES) and HMAC output sizes up to 48 bytes (SHA-384). + * + * With TLS-1.0 and CBC mode, we apply a 1/n-1 split, for a maximum + * overhead of: + * 5 bytes for the first record header + * 32 bytes for the first record payload (AES-CBC + HMAC/SHA-1) + * 5 bytes for the second record header + * 20 bytes for the MAC (HMAC/SHA-1) + * 16 bytes for the padding (AES) + * -1 byte to account for the payload byte in the first record + * so a total of 77 extra bytes at most, less than the 85 bytes above. + * Note that with TLS-1.0, the MAC is HMAC with either MD5 or SHA-1, but + * no other hash function. + * + * The implementation does not try to send larger records when the current + * encryption mode has less overhead. + * + * Maximum input record overhead is: + * 5 bytes for the record header + * 16 bytes for the explicit IV (TLS-1.1+) + * 48 bytes for the MAC (HMAC/SHA-384) + * 256 bytes for the padding + * so a total of 325 extra bytes. + * + * When receiving the next record header, it is written into the buffer + * bytes 0 to 4 (inclusive). Record data is always written into buf[] + * starting at offset 5. When encryption is active, the plaintext data + * may start at a larger offset (e.g. because of an explicit IV). + */ + +#define MAX_OUT_OVERHEAD 85 +#define MAX_IN_OVERHEAD 325 + +/* see inner.h */ +void +br_ssl_engine_fail(br_ssl_engine_context *rc, int err) +{ + if (rc->iomode != BR_IO_FAILED) { + rc->iomode = BR_IO_FAILED; + rc->err = err; + } +} + +/* + * Adjust registers for a new incoming record. + */ +static void +make_ready_in(br_ssl_engine_context *rc) +{ + rc->ixa = rc->ixb = 0; + rc->ixc = 5; + if (rc->iomode == BR_IO_IN) { + rc->iomode = BR_IO_INOUT; + } +} + +/* + * Adjust registers for a new outgoing record. + */ +static void +make_ready_out(br_ssl_engine_context *rc) +{ + size_t a, b; + + a = 5; + b = rc->obuf_len - a; + rc->out.vtable->max_plaintext(&rc->out.vtable, &a, &b); + if ((b - a) > rc->max_frag_len) { + b = a + rc->max_frag_len; + } + rc->oxa = a; + rc->oxb = b; + rc->oxc = a; + if (rc->iomode == BR_IO_OUT) { + rc->iomode = BR_IO_INOUT; + } +} + +/* see inner.h */ +void +br_ssl_engine_new_max_frag_len(br_ssl_engine_context *rc, unsigned max_frag_len) +{ + size_t nxb; + + rc->max_frag_len = max_frag_len; + nxb = rc->oxc + max_frag_len; + if (rc->oxa < rc->oxb && rc->oxb > nxb && rc->oxa < nxb) { + rc->oxb = nxb; + } +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_buffer(br_ssl_engine_context *rc, + void *buf, size_t buf_len, int bidi) +{ + if (buf == NULL) { + br_ssl_engine_set_buffers_bidi(rc, NULL, 0, NULL, 0); + } else { + /* + * In bidirectional mode, we want to maximise input + * buffer size, since we support arbitrary fragmentation + * when sending, but the peer will not necessarily + * comply to any low fragment length (in particular if + * we are the server, because the maximum fragment + * length extension is under client control). + * + * We keep a minimum size of 512 bytes for the plaintext + * of our outgoing records. + * + * br_ssl_engine_set_buffers_bidi() will compute the maximum + * fragment length for outgoing records by using the minimum + * of allocated spaces for both input and output records, + * rounded down to a standard length. + */ + if (bidi) { + size_t w; + + if (buf_len < (512 + MAX_IN_OVERHEAD + + 512 + MAX_OUT_OVERHEAD)) + { + rc->iomode = BR_IO_FAILED; + rc->err = BR_ERR_BAD_PARAM; + return; + } else if (buf_len < (16384 + MAX_IN_OVERHEAD + + 512 + MAX_OUT_OVERHEAD)) + { + w = 512 + MAX_OUT_OVERHEAD; + } else { + w = buf_len - (16384 + MAX_IN_OVERHEAD); + } + br_ssl_engine_set_buffers_bidi(rc, + buf, buf_len - w, + (unsigned char *)buf + w, w); + } else { + br_ssl_engine_set_buffers_bidi(rc, + buf, buf_len, NULL, 0); + } + } +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_buffers_bidi(br_ssl_engine_context *rc, + void *ibuf, size_t ibuf_len, void *obuf, size_t obuf_len) +{ + rc->iomode = BR_IO_INOUT; + rc->incrypt = 0; + rc->err = BR_ERR_OK; + rc->version_in = 0; + rc->record_type_in = 0; + rc->version_out = 0; + rc->record_type_out = 0; + if (ibuf == NULL) { + if (rc->ibuf == NULL) { + br_ssl_engine_fail(rc, BR_ERR_BAD_PARAM); + } + } else { + unsigned u; + + rc->ibuf = ibuf; + rc->ibuf_len = ibuf_len; + if (obuf == NULL) { + obuf = ibuf; + obuf_len = ibuf_len; + } + rc->obuf = obuf; + rc->obuf_len = obuf_len; + + /* + * Compute the maximum fragment length, that fits for + * both incoming and outgoing records. This length will + * be used in fragment length negotiation, so we must + * honour it both ways. Regardless, larger incoming + * records will be accepted, as long as they fit in the + * actual buffer size. + */ + for (u = 14; u >= 9; u --) { + size_t flen; + + flen = (size_t)1 << u; + if (obuf_len >= flen + MAX_OUT_OVERHEAD + && ibuf_len >= flen + MAX_IN_OVERHEAD) + { + break; + } + } + if (u == 8) { + br_ssl_engine_fail(rc, BR_ERR_BAD_PARAM); + return; + } else if (u == 13) { + u = 12; + } + rc->max_frag_len = (size_t)1 << u; + rc->log_max_frag_len = u; + rc->peer_log_max_frag_len = 0; + } + rc->out.vtable = &br_sslrec_out_clear_vtable; + make_ready_in(rc); + make_ready_out(rc); +} + +/* + * Clear buffers in both directions. + */ +static void +engine_clearbuf(br_ssl_engine_context *rc) +{ + make_ready_in(rc); + make_ready_out(rc); +} + +/* + * Make sure the internal PRNG is initialised (but not necessarily + * seeded properly yet). + */ +static int +rng_init(br_ssl_engine_context *cc) +{ + const br_hash_class *h; + + if (cc->rng_init_done != 0) { + return 1; + } + + /* + * If using TLS-1.2, then SHA-256 or SHA-384 must be present (or + * both); we prefer SHA-256 which is faster for 32-bit systems. + * + * If using TLS-1.0 or 1.1 then SHA-1 must be present. + * + * Though HMAC_DRBG/SHA-1 is, as far as we know, as safe as + * these things can be, we still prefer the SHA-2 functions over + * SHA-1, if only for public relations (known theoretical + * weaknesses of SHA-1 with regards to collisions are mostly + * irrelevant here, but they still make people nervous). + */ + h = br_multihash_getimpl(&cc->mhash, br_sha256_ID); + if (!h) { + h = br_multihash_getimpl(&cc->mhash, br_sha384_ID); + if (!h) { + h = br_multihash_getimpl(&cc->mhash, + br_sha1_ID); + if (!h) { + br_ssl_engine_fail(cc, BR_ERR_BAD_STATE); + return 0; + } + } + } + br_hmac_drbg_init(&cc->rng, h, NULL, 0); + cc->rng_init_done = 1; + return 1; +} + +/* see inner.h */ +int +br_ssl_engine_init_rand(br_ssl_engine_context *cc) +{ + if (!rng_init(cc)) { + return 0; + } + + /* + * We always try OS/hardware seeding once. If it works, then + * we assume proper seeding. If not, then external entropy must + * have been injected; otherwise, we report an error. + */ + if (!cc->rng_os_rand_done) { + br_prng_seeder sd; + + sd = br_prng_seeder_system(NULL); + if (sd != 0 && sd(&cc->rng.vtable)) { + cc->rng_init_done = 2; + } + cc->rng_os_rand_done = 1; + } + if (cc->rng_init_done < 2) { + br_ssl_engine_fail(cc, BR_ERR_NO_RANDOM); + return 0; + } + return 1; +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_inject_entropy(br_ssl_engine_context *cc, + const void *data, size_t len) +{ + /* + * Externally provided entropy is assumed to be "good enough" + * (we cannot really test its quality) so if the RNG structure + * could be initialised at all, then we marked the RNG as + * "properly seeded". + */ + if (!rng_init(cc)) { + return; + } + br_hmac_drbg_update(&cc->rng, data, len); + cc->rng_init_done = 2; +} + +/* + * We define a few internal functions that implement the low-level engine + * API for I/O; the external API (br_ssl_engine_sendapp_buf() and similar + * functions) is built upon these function, with special processing for + * records which are not of type "application data". + * + * recvrec_buf, recvrec_ack receives bytes from transport medium + * sendrec_buf, sendrec_ack send bytes to transport medium + * recvpld_buf, recvpld_ack receives payload data from engine + * sendpld_buf, sendpld_ack send payload data to engine + */ + +static unsigned char * +recvrec_buf(const br_ssl_engine_context *rc, size_t *len) +{ + if (rc->shutdown_recv) { + *len = 0; + return NULL; + } + + /* + * Bytes from the transport can be injected only if the mode is + * compatible (in or in/out), and ixa == ixb; ixc then contains + * the number of bytes that are still expected (but it may + * exceed our buffer size). + * + * We cannot get "stuck" here (buffer is full, but still more + * data is expected) because oversized records are detected when + * their header is processed. + */ + switch (rc->iomode) { + case BR_IO_IN: + case BR_IO_INOUT: + if (rc->ixa == rc->ixb) { + size_t z; + + z = rc->ixc; + if (z > rc->ibuf_len - rc->ixa) { + z = rc->ibuf_len - rc->ixa; + } + *len = z; + return rc->ibuf + rc->ixa; + } + break; + } + *len = 0; + return NULL; +} + +static void +recvrec_ack(br_ssl_engine_context *rc, size_t len) +{ + unsigned char *pbuf; + size_t pbuf_len; + + /* + * Adjust state if necessary (for a shared input/output buffer): + * we got some incoming bytes, so we cannot (temporarily) handle + * outgoing data. + */ + if (rc->iomode == BR_IO_INOUT && rc->ibuf == rc->obuf) { + rc->iomode = BR_IO_IN; + } + + /* + * Adjust data pointers. + */ + rc->ixb = (rc->ixa += len); + rc->ixc -= len; + + /* + * If we are receiving a header and did not fully obtained it + * yet, then just wait for the next bytes. + */ + if (rc->ixa < 5) { + return; + } + + /* + * If we just obtained a full header, process it. + */ + if (rc->ixa == 5) { + unsigned version; + unsigned rlen; + + /* + * Get record type and version. We support only versions + * 3.x (if the version major number does not match, then + * we suppose that the record format is too alien for us + * to process it). + * + * Note: right now, we reject clients that try to send + * a ClientHello in a format compatible with SSL-2.0. It + * is unclear whether this will ever be supported; and + * if we want to support it, then this might be done in + * in the server-specific code, not here. + */ + rc->record_type_in = rc->ibuf[0]; + version = br_dec16be(rc->ibuf + 1); + if ((version >> 8) != 3) { + br_ssl_engine_fail(rc, BR_ERR_UNSUPPORTED_VERSION); + return; + } + + /* + * We ensure that successive records have the same + * version. The handshake code must check and adjust the + * variables when necessary to accommodate the protocol + * negotiation details. + */ + if (rc->version_in != 0 && rc->version_in != version) { + br_ssl_engine_fail(rc, BR_ERR_BAD_VERSION); + return; + } + rc->version_in = version; + + /* + * Decode record length. We must check that the length + * is valid (relatively to the current encryption mode) + * and also (if encryption is active) that the record + * will fit in our buffer. + * + * When no encryption is active, we can process records + * by chunks, and thus accept any record up to the + * maximum allowed plaintext length (16384 bytes). + */ + rlen = br_dec16be(rc->ibuf + 3); + if (rc->incrypt) { + if (!rc->in.vtable->check_length( + &rc->in.vtable, rlen)) + { + br_ssl_engine_fail(rc, BR_ERR_BAD_LENGTH); + return; + } + if (rlen > (rc->ibuf_len - 5)) { + br_ssl_engine_fail(rc, BR_ERR_TOO_LARGE); + return; + } + } else { + if (rlen > 16384) { + br_ssl_engine_fail(rc, BR_ERR_BAD_LENGTH); + return; + } + } + + /* + * If the record is completely empty then we must switch + * to a new record. Note that, in that case, we + * completely ignore the record type, which is fitting + * since we received no actual data of that type. + * + * A completely empty record is technically allowed as + * long as encryption/MAC is not active, i.e. before + * completion of the first handshake. It it still weird; + * it might conceptually be useful as a heartbeat or + * keep-alive mechanism while some lengthy operation is + * going on, e.g. interaction with a human user. + */ + if (rlen == 0) { + make_ready_in(rc); + } else { + rc->ixa = rc->ixb = 5; + rc->ixc = rlen; + } + return; + } + + /* + * If there is no active encryption, then the data can be read + * right away. Note that we do not receive bytes from the + * transport medium when we still have payload bytes to be + * acknowledged. + */ + if (!rc->incrypt) { + rc->ixa = 5; + return; + } + + /* + * Since encryption is active, we must wait for a full record + * before processing it. + */ + if (rc->ixc != 0) { + return; + } + + /* + * We got the full record. Decrypt it. + */ + pbuf_len = rc->ixa - 5; + pbuf = rc->in.vtable->decrypt(&rc->in.vtable, + rc->record_type_in, rc->version_in, rc->ibuf + 5, &pbuf_len); + if (pbuf == 0) { + br_ssl_engine_fail(rc, BR_ERR_BAD_MAC); + return; + } + rc->ixa = (size_t)(pbuf - rc->ibuf); + rc->ixb = rc->ixa + pbuf_len; + + /* + * Decryption may have yielded an empty record, in which case + * we get back to "ready" state immediately. + */ + if (rc->ixa == rc->ixb) { + make_ready_in(rc); + } +} + +/* see inner.h */ +int +br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc) +{ + switch (rc->iomode) { + case BR_IO_IN: + case BR_IO_INOUT: + return rc->ixc == 0 || rc->ixa < 5; + default: + return 1; + } +} + +static unsigned char * +recvpld_buf(const br_ssl_engine_context *rc, size_t *len) +{ + /* + * There is payload data to be read only if the mode is + * compatible, and ixa != ixb. + */ + switch (rc->iomode) { + case BR_IO_IN: + case BR_IO_INOUT: + *len = rc->ixb - rc->ixa; + return (*len == 0) ? NULL : (rc->ibuf + rc->ixa); + default: + *len = 0; + return NULL; + } +} + +static void +recvpld_ack(br_ssl_engine_context *rc, size_t len) +{ + rc->ixa += len; + + /* + * If we read all the available data, then we either expect + * the remainder of the current record (if the current record + * was not finished; this may happen when encryption is not + * active), or go to "ready" state. + */ + if (rc->ixa == rc->ixb) { + if (rc->ixc == 0) { + make_ready_in(rc); + } else { + rc->ixa = rc->ixb = 5; + } + } +} + +static unsigned char * +sendpld_buf(const br_ssl_engine_context *rc, size_t *len) +{ + /* + * Payload data can be injected only if the current mode is + * compatible, and oxa != oxb. + */ + switch (rc->iomode) { + case BR_IO_OUT: + case BR_IO_INOUT: + *len = rc->oxb - rc->oxa; + return (*len == 0) ? NULL : (rc->obuf + rc->oxa); + default: + *len = 0; + return NULL; + } +} + +/* + * If some payload bytes have been accumulated, then wrap them into + * an outgoing record. Otherwise, this function does nothing, unless + * 'force' is non-zero, in which case an empty record is assembled. + * + * The caller must take care not to invoke this function if the engine + * is not currently ready to receive payload bytes to send. + */ +static void +sendpld_flush(br_ssl_engine_context *rc, int force) +{ + size_t xlen; + unsigned char *buf; + + if (rc->oxa == rc->oxb) { + return; + } + xlen = rc->oxa - rc->oxc; + if (xlen == 0 && !force) { + return; + } + buf = rc->out.vtable->encrypt(&rc->out.vtable, + rc->record_type_out, rc->version_out, + rc->obuf + rc->oxc, &xlen); + rc->oxb = rc->oxa = (size_t)(buf - rc->obuf); + rc->oxc = rc->oxa + xlen; +} + +static void +sendpld_ack(br_ssl_engine_context *rc, size_t len) +{ + /* + * If using a shared buffer, then we may have to modify the + * current mode. + */ + if (rc->iomode == BR_IO_INOUT && rc->ibuf == rc->obuf) { + rc->iomode = BR_IO_OUT; + } + rc->oxa += len; + if (rc->oxa >= rc->oxb) { + /* + * Set oxb to one more than oxa so that sendpld_flush() + * does not mistakingly believe that a record is + * already prepared and being sent. + */ + rc->oxb = rc->oxa + 1; + sendpld_flush(rc, 0); + } +} + +static unsigned char * +sendrec_buf(const br_ssl_engine_context *rc, size_t *len) +{ + /* + * When still gathering payload bytes, oxc points to the start + * of the record data, so oxc <= oxa. However, when a full + * record has been completed, oxc points to the end of the record, + * so oxc > oxa. + */ + switch (rc->iomode) { + case BR_IO_OUT: + case BR_IO_INOUT: + if (rc->oxc > rc->oxa) { + *len = rc->oxc - rc->oxa; + return rc->obuf + rc->oxa; + } + break; + } + *len = 0; + return NULL; +} + +static void +sendrec_ack(br_ssl_engine_context *rc, size_t len) +{ + rc->oxb = (rc->oxa += len); + if (rc->oxa == rc->oxc) { + make_ready_out(rc); + } +} + +/* + * Test whether there is some buffered outgoing record that still must + * sent. + */ +static inline int +has_rec_tosend(const br_ssl_engine_context *rc) +{ + return rc->oxa == rc->oxb && rc->oxa != rc->oxc; +} + +/* + * The "no encryption" mode has no overhead. It limits the payload size + * to the maximum size allowed by the standard (16384 bytes); the caller + * is responsible for possibly enforcing a smaller fragment length. + */ +static void +clear_max_plaintext(const br_sslrec_out_clear_context *cc, + size_t *start, size_t *end) +{ + size_t len; + + (void)cc; + len = *end - *start; + if (len > 16384) { + *end = *start + 16384; + } +} + +/* + * In "no encryption" mode, encryption is trivial (a no-operation) so + * we just have to encode the header. + */ +static unsigned char * +clear_encrypt(br_sslrec_out_clear_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + unsigned char *buf; + + (void)cc; + buf = (unsigned char *)data - 5; + buf[0] = record_type; + br_enc16be(buf + 1, version); + br_enc16be(buf + 3, *data_len); + *data_len += 5; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_out_class br_sslrec_out_clear_vtable = { + sizeof(br_sslrec_out_clear_context), + (void (*)(const br_sslrec_out_class *const *, size_t *, size_t *)) + &clear_max_plaintext, + (unsigned char *(*)(const br_sslrec_out_class **, + int, unsigned, void *, size_t *)) + &clear_encrypt +}; + +/* ==================================================================== */ +/* + * In this part of the file, we handle the various record types, and + * communications with the handshake processor. + */ + +/* + * IMPLEMENTATION NOTES + * ==================== + * + * The handshake processor is written in T0 and runs as a coroutine. + * It receives the contents of all records except application data, and + * is responsible for producing the contents of all records except + * application data. + * + * A state flag is maintained, which specifies whether application data + * is acceptable or not. When it is set: + * + * -- Application data can be injected as payload data (provided that + * the output buffer is ready for that). + * + * -- Incoming application data records are accepted, and yield data + * that the caller may retrieve. + * + * When the flag is cleared, application data is not accepted from the + * application, and incoming application data records trigger an error. + * + * + * Records of type handshake, alert or change-cipher-spec are handled + * by the handshake processor. The handshake processor is written in T0 + * and runs as a coroutine; it gets invoked whenever one of the following + * situations is reached: + * + * -- An incoming record has type handshake, alert or change-cipher-spec, + * and yields data that can be read (zero-length records are thus + * ignored). + * + * -- An outgoing record has just finished being sent, and the "application + * data" flag is cleared. + * + * -- The caller wishes to perform a close (call to br_ssl_engine_close()). + * + * -- The caller wishes to perform a renegotiation (call to + * br_ssl_engine_renegotiate()). + * + * Whenever the handshake processor is entered, access to the payload + * buffers is provided, along with some information about explicit + * closures or renegotiations. + */ + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_suites(br_ssl_engine_context *cc, + const uint16_t *suites, size_t suites_num) +{ + if ((suites_num * sizeof *suites) > sizeof cc->suites_buf) { + br_ssl_engine_fail(cc, BR_ERR_BAD_PARAM); + return; + } + memcpy(cc->suites_buf, suites, suites_num * sizeof *suites); + cc->suites_num = suites_num; +} + +/* + * Give control to handshake processor. 'action' is 1 for a close, + * 2 for a renegotiation, or 0 for a jump due to I/O completion. + */ +static void +jump_handshake(br_ssl_engine_context *cc, int action) +{ + /* + * We use a loop because the handshake processor actions may + * allow for more actions; namely, if the processor reads all + * input data, then it may allow for output data to be produced, + * in case of a shared in/out buffer. + */ + for (;;) { + size_t hlen_in, hlen_out; + + /* + * Get input buffer. We do not want to provide + * application data to the handshake processor (we could + * get called with an explicit close or renegotiation + * while there is application data ready to be read). + */ + cc->hbuf_in = recvpld_buf(cc, &hlen_in); + if (cc->hbuf_in != NULL + && cc->record_type_in == BR_SSL_APPLICATION_DATA) + { + hlen_in = 0; + } + + /* + * Get output buffer. The handshake processor never + * leaves an unfinished outgoing record, so if there is + * buffered output, then it MUST be some application + * data, so the processor cannot write to it. + */ + cc->saved_hbuf_out = cc->hbuf_out = sendpld_buf(cc, &hlen_out); + if (cc->hbuf_out != NULL && br_ssl_engine_has_pld_to_send(cc)) { + hlen_out = 0; + } + + /* + * Note: hlen_in and hlen_out can be both non-zero only if + * the input and output buffers are disjoint. Thus, we can + * offer both buffers to the handshake code. + */ + + cc->hlen_in = hlen_in; + cc->hlen_out = hlen_out; + cc->action = action; + cc->hsrun(&cc->cpu); + if (br_ssl_engine_closed(cc)) { + return; + } + if (cc->hbuf_out != cc->saved_hbuf_out) { + sendpld_ack(cc, cc->hbuf_out - cc->saved_hbuf_out); + } + if (hlen_in != cc->hlen_in) { + recvpld_ack(cc, hlen_in - cc->hlen_in); + if (cc->hlen_in == 0) { + /* + * We read all data bytes, which may have + * released the output buffer in case it + * is shared with the input buffer, and + * the handshake code might be waiting for + * that. + */ + action = 0; + continue; + } + } + break; + } +} + +/* see inner.h */ +void +br_ssl_engine_flush_record(br_ssl_engine_context *cc) +{ + if (cc->hbuf_out != cc->saved_hbuf_out) { + sendpld_ack(cc, cc->hbuf_out - cc->saved_hbuf_out); + } + if (br_ssl_engine_has_pld_to_send(cc)) { + sendpld_flush(cc, 0); + } + cc->saved_hbuf_out = cc->hbuf_out = sendpld_buf(cc, &cc->hlen_out); +} + +/* see bearssl_ssl.h */ +unsigned char * +br_ssl_engine_sendapp_buf(const br_ssl_engine_context *cc, size_t *len) +{ + if (!(cc->application_data & 1)) { + *len = 0; + return NULL; + } + return sendpld_buf(cc, len); +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_sendapp_ack(br_ssl_engine_context *cc, size_t len) +{ + sendpld_ack(cc, len); +} + +/* see bearssl_ssl.h */ +unsigned char * +br_ssl_engine_recvapp_buf(const br_ssl_engine_context *cc, size_t *len) +{ + if (!(cc->application_data & 1) + || cc->record_type_in != BR_SSL_APPLICATION_DATA) + { + *len = 0; + return NULL; + } + return recvpld_buf(cc, len); +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_recvapp_ack(br_ssl_engine_context *cc, size_t len) +{ + recvpld_ack(cc, len); +} + +/* see bearssl_ssl.h */ +unsigned char * +br_ssl_engine_sendrec_buf(const br_ssl_engine_context *cc, size_t *len) +{ + return sendrec_buf(cc, len); +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_sendrec_ack(br_ssl_engine_context *cc, size_t len) +{ + sendrec_ack(cc, len); + if (len != 0 && !has_rec_tosend(cc) + && (cc->record_type_out != BR_SSL_APPLICATION_DATA + || (cc->application_data & 1) == 0)) + { + jump_handshake(cc, 0); + } +} + +/* see bearssl_ssl.h */ +unsigned char * +br_ssl_engine_recvrec_buf(const br_ssl_engine_context *cc, size_t *len) +{ + return recvrec_buf(cc, len); +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_recvrec_ack(br_ssl_engine_context *cc, size_t len) +{ + unsigned char *buf; + + recvrec_ack(cc, len); + if (br_ssl_engine_closed(cc)) { + return; + } + + /* + * We just received some bytes from the peer. This may have + * yielded some payload bytes, in which case we must process + * them according to the record type. + */ + buf = recvpld_buf(cc, &len); + if (buf != NULL) { + switch (cc->record_type_in) { + case BR_SSL_CHANGE_CIPHER_SPEC: + case BR_SSL_ALERT: + case BR_SSL_HANDSHAKE: + jump_handshake(cc, 0); + break; + case BR_SSL_APPLICATION_DATA: + if (cc->application_data == 1) { + break; + } + + /* + * If we are currently closing, and waiting for + * a close_notify from the peer, then incoming + * application data should be discarded. + */ + if (cc->application_data == 2) { + recvpld_ack(cc, len); + break; + } + + /* Fall through */ + default: + br_ssl_engine_fail(cc, BR_ERR_UNEXPECTED); + break; + } + } +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_close(br_ssl_engine_context *cc) +{ + if (!br_ssl_engine_closed(cc)) { + /* + * If we are not already closed, then we need to + * initiate the closure. Once closing, any incoming + * application data is discarded; we should also discard + * application data which is already there but has not + * been acknowledged by the application yet (this mimics + * usual semantics on BSD sockets: you cannot read() + * once you called close(), even if there was some + * unread data already buffered). + */ + size_t len; + + if (br_ssl_engine_recvapp_buf(cc, &len) != NULL && len != 0) { + br_ssl_engine_recvapp_ack(cc, len); + } + jump_handshake(cc, 1); + } +} + +/* see bearssl_ssl.h */ +int +br_ssl_engine_renegotiate(br_ssl_engine_context *cc) +{ + size_t len; + + if (br_ssl_engine_closed(cc) || cc->reneg == 1 + || (cc->flags & BR_OPT_NO_RENEGOTIATION) != 0 + || br_ssl_engine_recvapp_buf(cc, &len) != NULL) + { + return 0; + } + jump_handshake(cc, 2); + return 1; +} + +/* see bearssl.h */ +unsigned +br_ssl_engine_current_state(const br_ssl_engine_context *cc) +{ + unsigned s; + size_t len; + + if (br_ssl_engine_closed(cc)) { + return BR_SSL_CLOSED; + } + + s = 0; + if (br_ssl_engine_sendrec_buf(cc, &len) != NULL) { + s |= BR_SSL_SENDREC; + } + if (br_ssl_engine_recvrec_buf(cc, &len) != NULL) { + s |= BR_SSL_RECVREC; + } + if (br_ssl_engine_sendapp_buf(cc, &len) != NULL) { + s |= BR_SSL_SENDAPP; + } + if (br_ssl_engine_recvapp_buf(cc, &len) != NULL) { + s |= BR_SSL_RECVAPP; + } + return s; +} + +/* see bearssl_ssl.h */ +void +br_ssl_engine_flush(br_ssl_engine_context *cc, int force) +{ + if (!br_ssl_engine_closed(cc) && (cc->application_data & 1) != 0) { + sendpld_flush(cc, force); + } +} + +/* see inner.h */ +void +br_ssl_engine_hs_reset(br_ssl_engine_context *cc, + void (*hsinit)(void *), void (*hsrun)(void *)) +{ + engine_clearbuf(cc); + cc->cpu.dp = cc->dp_stack; + cc->cpu.rp = cc->rp_stack; + hsinit(&cc->cpu); + cc->hsrun = hsrun; + cc->shutdown_recv = 0; + cc->application_data = 0; + cc->alert = 0; + jump_handshake(cc, 0); +} + +/* see inner.h */ +br_tls_prf_impl +br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id) +{ + if (cc->session.version >= BR_TLS12) { + if (prf_id == br_sha384_ID) { + return cc->prf_sha384; + } else { + return cc->prf_sha256; + } + } else { + return cc->prf10; + } +} + +/* see inner.h */ +void +br_ssl_engine_compute_master(br_ssl_engine_context *cc, + int prf_id, const void *pms, size_t pms_len) +{ + br_tls_prf_impl iprf; + br_tls_prf_seed_chunk seed[2] = { + { cc->client_random, sizeof cc->client_random }, + { cc->server_random, sizeof cc->server_random } + }; + + iprf = br_ssl_engine_get_PRF(cc, prf_id); + iprf(cc->session.master_secret, sizeof cc->session.master_secret, + pms, pms_len, "master secret", 2, seed); +} + +/* + * Compute key block. + */ +static void +compute_key_block(br_ssl_engine_context *cc, int prf_id, + size_t half_len, unsigned char *kb) +{ + br_tls_prf_impl iprf; + br_tls_prf_seed_chunk seed[2] = { + { cc->server_random, sizeof cc->server_random }, + { cc->client_random, sizeof cc->client_random } + }; + + iprf = br_ssl_engine_get_PRF(cc, prf_id); + iprf(kb, half_len << 1, + cc->session.master_secret, sizeof cc->session.master_secret, + "key expansion", 2, seed); +} + +/* see inner.h */ +void +br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc, + int is_client, int prf_id, int mac_id, + const br_block_cbcdec_class *bc_impl, size_t cipher_key_len) +{ + unsigned char kb[192]; + unsigned char *cipher_key, *mac_key, *iv; + const br_hash_class *imh; + size_t mac_key_len, mac_out_len, iv_len; + + imh = br_ssl_engine_get_hash(cc, mac_id); + mac_out_len = (imh->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK; + mac_key_len = mac_out_len; + + /* + * TLS 1.1+ uses per-record explicit IV, so no IV to generate here. + */ + if (cc->session.version >= BR_TLS11) { + iv_len = 0; + } else { + iv_len = bc_impl->block_size; + } + compute_key_block(cc, prf_id, + mac_key_len + cipher_key_len + iv_len, kb); + if (is_client) { + mac_key = &kb[mac_key_len]; + cipher_key = &kb[(mac_key_len << 1) + cipher_key_len]; + iv = &kb[((mac_key_len + cipher_key_len) << 1) + iv_len]; + } else { + mac_key = &kb[0]; + cipher_key = &kb[mac_key_len << 1]; + iv = &kb[(mac_key_len + cipher_key_len) << 1]; + } + if (iv_len == 0) { + iv = NULL; + } + cc->icbc_in->init(&cc->in.cbc.vtable, + bc_impl, cipher_key, cipher_key_len, + imh, mac_key, mac_key_len, mac_out_len, iv); + cc->incrypt = 1; +} + +/* see inner.h */ +void +br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc, + int is_client, int prf_id, int mac_id, + const br_block_cbcenc_class *bc_impl, size_t cipher_key_len) +{ + unsigned char kb[192]; + unsigned char *cipher_key, *mac_key, *iv; + const br_hash_class *imh; + size_t mac_key_len, mac_out_len, iv_len; + + imh = br_ssl_engine_get_hash(cc, mac_id); + mac_out_len = (imh->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK; + mac_key_len = mac_out_len; + + /* + * TLS 1.1+ uses per-record explicit IV, so no IV to generate here. + */ + if (cc->session.version >= BR_TLS11) { + iv_len = 0; + } else { + iv_len = bc_impl->block_size; + } + compute_key_block(cc, prf_id, + mac_key_len + cipher_key_len + iv_len, kb); + if (is_client) { + mac_key = &kb[0]; + cipher_key = &kb[mac_key_len << 1]; + iv = &kb[(mac_key_len + cipher_key_len) << 1]; + } else { + mac_key = &kb[mac_key_len]; + cipher_key = &kb[(mac_key_len << 1) + cipher_key_len]; + iv = &kb[((mac_key_len + cipher_key_len) << 1) + iv_len]; + } + if (iv_len == 0) { + iv = NULL; + } + cc->icbc_out->init(&cc->out.cbc.vtable, + bc_impl, cipher_key, cipher_key_len, + imh, mac_key, mac_key_len, mac_out_len, iv); +} + +/* see inner.h */ +void +br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctr_class *bc_impl, size_t cipher_key_len) +{ + unsigned char kb[72]; + unsigned char *cipher_key, *iv; + + compute_key_block(cc, prf_id, cipher_key_len + 4, kb); + if (is_client) { + cipher_key = &kb[cipher_key_len]; + iv = &kb[(cipher_key_len << 1) + 4]; + } else { + cipher_key = &kb[0]; + iv = &kb[cipher_key_len << 1]; + } + cc->igcm_in->init(&cc->in.gcm.vtable.in, + bc_impl, cipher_key, cipher_key_len, cc->ighash, iv); + cc->incrypt = 1; +} + +/* see inner.h */ +void +br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctr_class *bc_impl, size_t cipher_key_len) +{ + unsigned char kb[72]; + unsigned char *cipher_key, *iv; + + compute_key_block(cc, prf_id, cipher_key_len + 4, kb); + if (is_client) { + cipher_key = &kb[0]; + iv = &kb[cipher_key_len << 1]; + } else { + cipher_key = &kb[cipher_key_len]; + iv = &kb[(cipher_key_len << 1) + 4]; + } + cc->igcm_out->init(&cc->out.gcm.vtable.out, + bc_impl, cipher_key, cipher_key_len, cc->ighash, iv); +} + +/* see inner.h */ +void +br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc, + int is_client, int prf_id) +{ + unsigned char kb[88]; + unsigned char *cipher_key, *iv; + + compute_key_block(cc, prf_id, 44, kb); + if (is_client) { + cipher_key = &kb[32]; + iv = &kb[76]; + } else { + cipher_key = &kb[0]; + iv = &kb[64]; + } + cc->ichapol_in->init(&cc->in.chapol.vtable.in, + cc->ichacha, cc->ipoly, cipher_key, iv); + cc->incrypt = 1; +} + +/* see inner.h */ +void +br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc, + int is_client, int prf_id) +{ + unsigned char kb[88]; + unsigned char *cipher_key, *iv; + + compute_key_block(cc, prf_id, 44, kb); + if (is_client) { + cipher_key = &kb[0]; + iv = &kb[64]; + } else { + cipher_key = &kb[32]; + iv = &kb[76]; + } + cc->ichapol_out->init(&cc->out.chapol.vtable.out, + cc->ichacha, cc->ipoly, cipher_key, iv); +} + +/* see inner.h */ +void +br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctrcbc_class *bc_impl, + size_t cipher_key_len, size_t tag_len) +{ + unsigned char kb[72]; + unsigned char *cipher_key, *iv; + + compute_key_block(cc, prf_id, cipher_key_len + 4, kb); + if (is_client) { + cipher_key = &kb[cipher_key_len]; + iv = &kb[(cipher_key_len << 1) + 4]; + } else { + cipher_key = &kb[0]; + iv = &kb[cipher_key_len << 1]; + } + cc->iccm_in->init(&cc->in.ccm.vtable.in, + bc_impl, cipher_key, cipher_key_len, iv, tag_len); + cc->incrypt = 1; +} + +/* see inner.h */ +void +br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc, + int is_client, int prf_id, + const br_block_ctrcbc_class *bc_impl, + size_t cipher_key_len, size_t tag_len) +{ + unsigned char kb[72]; + unsigned char *cipher_key, *iv; + + compute_key_block(cc, prf_id, cipher_key_len + 4, kb); + if (is_client) { + cipher_key = &kb[0]; + iv = &kb[cipher_key_len << 1]; + } else { + cipher_key = &kb[cipher_key_len]; + iv = &kb[(cipher_key_len << 1) + 4]; + } + cc->iccm_out->init(&cc->out.ccm.vtable.out, + bc_impl, cipher_key, cipher_key_len, iv, tag_len); +} diff --git a/third_party/bearssl/src/ssl_engine_default_aescbc.c b/third_party/bearssl/src/ssl_engine_default_aescbc.c new file mode 100644 index 0000000..8c5cdb5 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_aescbc.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc) +{ +#if BR_AES_X86NI || BR_POWER8 + const br_block_cbcenc_class *ienc; + const br_block_cbcdec_class *idec; +#endif + + br_ssl_engine_set_cbc(cc, + &br_sslrec_in_cbc_vtable, + &br_sslrec_out_cbc_vtable); +#if BR_AES_X86NI + ienc = br_aes_x86ni_cbcenc_get_vtable(); + idec = br_aes_x86ni_cbcdec_get_vtable(); + if (ienc != NULL && idec != NULL) { + br_ssl_engine_set_aes_cbc(cc, ienc, idec); + return; + } +#endif +#if BR_POWER8 + ienc = br_aes_pwr8_cbcenc_get_vtable(); + idec = br_aes_pwr8_cbcdec_get_vtable(); + if (ienc != NULL && idec != NULL) { + br_ssl_engine_set_aes_cbc(cc, ienc, idec); + return; + } +#endif +#if BR_64 + br_ssl_engine_set_aes_cbc(cc, + &br_aes_ct64_cbcenc_vtable, + &br_aes_ct64_cbcdec_vtable); +#else + br_ssl_engine_set_aes_cbc(cc, + &br_aes_ct_cbcenc_vtable, + &br_aes_ct_cbcdec_vtable); +#endif +} diff --git a/third_party/bearssl/src/ssl_engine_default_aesccm.c b/third_party/bearssl/src/ssl_engine_default_aesccm.c new file mode 100644 index 0000000..15c0a78 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_aesccm.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc) +{ +#if BR_AES_X86NI || BR_POWER8 + const br_block_ctrcbc_class *ictrcbc; +#endif + + br_ssl_engine_set_ccm(cc, + &br_sslrec_in_ccm_vtable, + &br_sslrec_out_ccm_vtable); +#if BR_AES_X86NI + ictrcbc = br_aes_x86ni_ctrcbc_get_vtable(); + if (ictrcbc != NULL) { + br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc); + } else { +#if BR_64 + br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable); +#else + br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable); +#endif + } +#elif BR_POWER8 + ictrcbc = br_aes_pwr8_ctrcbc_get_vtable(); + if (ictrcbc != NULL) { + br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc); + } else { +#if BR_64 + br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable); +#else + br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable); +#endif + } +#else +#if BR_64 + br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable); +#else + br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable); +#endif +#endif +} diff --git a/third_party/bearssl/src/ssl_engine_default_aesgcm.c b/third_party/bearssl/src/ssl_engine_default_aesgcm.c new file mode 100644 index 0000000..c44a707 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_aesgcm.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc) +{ +#if BR_AES_X86NI || BR_POWER8 + const br_block_ctr_class *ictr; + br_ghash ighash; +#endif + + br_ssl_engine_set_gcm(cc, + &br_sslrec_in_gcm_vtable, + &br_sslrec_out_gcm_vtable); +#if BR_AES_X86NI + ictr = br_aes_x86ni_ctr_get_vtable(); + if (ictr != NULL) { + br_ssl_engine_set_aes_ctr(cc, ictr); + } else { +#if BR_64 + br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable); +#else + br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable); +#endif + } +#elif BR_POWER8 + ictr = br_aes_pwr8_ctr_get_vtable(); + if (ictr != NULL) { + br_ssl_engine_set_aes_ctr(cc, ictr); + } else { +#if BR_64 + br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable); +#else + br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable); +#endif + } +#else +#if BR_64 + br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable); +#else + br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable); +#endif +#endif +#if BR_AES_X86NI + ighash = br_ghash_pclmul_get(); + if (ighash != 0) { + br_ssl_engine_set_ghash(cc, ighash); + return; + } +#endif +#if BR_POWER8 + ighash = br_ghash_pwr8_get(); + if (ighash != 0) { + br_ssl_engine_set_ghash(cc, ighash); + return; + } +#endif +#if BR_LOMUL + br_ssl_engine_set_ghash(cc, &br_ghash_ctmul32); +#elif BR_64 + br_ssl_engine_set_ghash(cc, &br_ghash_ctmul64); +#else + br_ssl_engine_set_ghash(cc, &br_ghash_ctmul); +#endif +} diff --git a/third_party/bearssl/src/ssl_engine_default_chapol.c b/third_party/bearssl/src/ssl_engine_default_chapol.c new file mode 100644 index 0000000..47a0c98 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_chapol.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_chapol(br_ssl_engine_context *cc) +{ +#if BR_INT128 || BR_UMUL128 + br_poly1305_run bp; +#endif +#if BR_SSE2 + br_chacha20_run bc; +#endif + + br_ssl_engine_set_chapol(cc, + &br_sslrec_in_chapol_vtable, + &br_sslrec_out_chapol_vtable); +#if BR_SSE2 + bc = br_chacha20_sse2_get(); + if (bc) { + br_ssl_engine_set_chacha20(cc, bc); + } else { +#endif + br_ssl_engine_set_chacha20(cc, &br_chacha20_ct_run); +#if BR_SSE2 + } +#endif +#if BR_INT128 || BR_UMUL128 + bp = br_poly1305_ctmulq_get(); + if (bp) { + br_ssl_engine_set_poly1305(cc, bp); + } else { +#endif +#if BR_LOMUL + br_ssl_engine_set_poly1305(cc, &br_poly1305_ctmul32_run); +#else + br_ssl_engine_set_poly1305(cc, &br_poly1305_ctmul_run); +#endif +#if BR_INT128 || BR_UMUL128 + } +#endif +} diff --git a/third_party/bearssl/src/ssl_engine_default_descbc.c b/third_party/bearssl/src/ssl_engine_default_descbc.c new file mode 100644 index 0000000..0253cb2 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_descbc.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_des_cbc(br_ssl_engine_context *cc) +{ + br_ssl_engine_set_cbc(cc, + &br_sslrec_in_cbc_vtable, + &br_sslrec_out_cbc_vtable); + br_ssl_engine_set_des_cbc(cc, + &br_des_ct_cbcenc_vtable, + &br_des_ct_cbcdec_vtable); +} diff --git a/third_party/bearssl/src/ssl_engine_default_ec.c b/third_party/bearssl/src/ssl_engine_default_ec.c new file mode 100644 index 0000000..0213ae6 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_ec.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_ec(br_ssl_engine_context *cc) +{ +#if BR_LOMUL + br_ssl_engine_set_ec(cc, &br_ec_all_m15); +#else + br_ssl_engine_set_ec(cc, &br_ec_all_m31); +#endif +} diff --git a/third_party/bearssl/src/ssl_engine_default_ecdsa.c b/third_party/bearssl/src/ssl_engine_default_ecdsa.c new file mode 100644 index 0000000..1304002 --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_ecdsa.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_ecdsa(br_ssl_engine_context *cc) +{ +#if BR_LOMUL + br_ssl_engine_set_ec(cc, &br_ec_all_m15); + br_ssl_engine_set_ecdsa(cc, &br_ecdsa_i15_vrfy_asn1); +#else + br_ssl_engine_set_ec(cc, &br_ec_all_m31); + br_ssl_engine_set_ecdsa(cc, &br_ecdsa_i31_vrfy_asn1); +#endif +} diff --git a/third_party/bearssl/src/ssl_engine_default_rsavrfy.c b/third_party/bearssl/src/ssl_engine_default_rsavrfy.c new file mode 100644 index 0000000..ad0628a --- /dev/null +++ b/third_party/bearssl/src/ssl_engine_default_rsavrfy.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_ssl_engine_set_default_rsavrfy(br_ssl_engine_context *cc) +{ + br_ssl_engine_set_rsavrfy(cc, br_rsa_pkcs1_vrfy_get_default()); +} diff --git a/third_party/bearssl/src/ssl_hashes.c b/third_party/bearssl/src/ssl_hashes.c new file mode 100644 index 0000000..e10a980 --- /dev/null +++ b/third_party/bearssl/src/ssl_hashes.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +int +br_ssl_choose_hash(unsigned bf) +{ + static const unsigned char pref[] = { + br_sha256_ID, br_sha384_ID, br_sha512_ID, + br_sha224_ID, br_sha1_ID + }; + size_t u; + + for (u = 0; u < sizeof pref; u ++) { + int x; + + x = pref[u]; + if ((bf >> x) & 1) { + return x; + } + } + return 0; +} diff --git a/third_party/bearssl/src/ssl_hs_client.c b/third_party/bearssl/src/ssl_hs_client.c new file mode 100644 index 0000000..de36165 --- /dev/null +++ b/third_party/bearssl/src/ssl_hs_client.c @@ -0,0 +1,1915 @@ +/* Automatically generated code; do not modify directly. */ + +#include <stddef.h> +#include <stdint.h> + +typedef struct { + uint32_t *dp; + uint32_t *rp; + const unsigned char *ip; +} t0_context; + +static uint32_t +t0_parse7E_unsigned(const unsigned char **p) +{ + uint32_t x; + + x = 0; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + return x; + } + } +} + +static int32_t +t0_parse7E_signed(const unsigned char **p) +{ + int neg; + uint32_t x; + + neg = ((**p) >> 6) & 1; + x = (uint32_t)-neg; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + if (neg) { + return -(int32_t)~x - 1; + } else { + return (int32_t)x; + } + } + } +} + +#define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80) +#define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F) +#define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8) +#define T0_INT1(x) T0_FBYTE(x, 0) +#define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) + +/* static const unsigned char t0_datablock[]; */ + + +void br_ssl_hs_client_init_main(void *t0ctx); + +void br_ssl_hs_client_run(void *t0ctx); + + + +#include <stddef.h> +#include <string.h> + +#include "inner.h" + +/* + * This macro evaluates to a pointer to the current engine context. + */ +#define ENG ((br_ssl_engine_context *)(void *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu))) + + + + + +/* + * This macro evaluates to a pointer to the client context, under that + * specific name. It must be noted that since the engine context is the + * first field of the br_ssl_client_context structure ('eng'), then + * pointers values of both types are interchangeable, modulo an + * appropriate cast. This also means that "addresses" computed as offsets + * within the structure work for both kinds of context. + */ +#define CTX ((br_ssl_client_context *)ENG) + +/* + * Generate the pre-master secret for RSA key exchange, and encrypt it + * with the server's public key. Returned value is either the encrypted + * data length (in bytes), or -x on error, with 'x' being an error code. + * + * This code assumes that the public key has been already verified (it + * was properly obtained by the X.509 engine, and it has the right type, + * i.e. it is of type RSA and suitable for encryption). + */ +static int +make_pms_rsa(br_ssl_client_context *ctx, int prf_id) +{ + const br_x509_class **xc; + const br_x509_pkey *pk; + const unsigned char *n; + unsigned char *pms; + size_t nlen, u; + + xc = ctx->eng.x509ctx; + pk = (*xc)->get_pkey(xc, NULL); + + /* + * Compute actual RSA key length, in case there are leading zeros. + */ + n = pk->key.rsa.n; + nlen = pk->key.rsa.nlen; + while (nlen > 0 && *n == 0) { + n ++; + nlen --; + } + + /* + * We need at least 59 bytes (48 bytes for pre-master secret, and + * 11 bytes for the PKCS#1 type 2 padding). Note that the X.509 + * minimal engine normally blocks RSA keys shorter than 128 bytes, + * so this is mostly for public keys provided explicitly by the + * caller. + */ + if (nlen < 59) { + return -BR_ERR_X509_WEAK_PUBLIC_KEY; + } + if (nlen > sizeof ctx->eng.pad) { + return -BR_ERR_LIMIT_EXCEEDED; + } + + /* + * Make PMS. + */ + pms = ctx->eng.pad + nlen - 48; + br_enc16be(pms, ctx->eng.version_max); + br_hmac_drbg_generate(&ctx->eng.rng, pms + 2, 46); + br_ssl_engine_compute_master(&ctx->eng, prf_id, pms, 48); + + /* + * Apply PKCS#1 type 2 padding. + */ + ctx->eng.pad[0] = 0x00; + ctx->eng.pad[1] = 0x02; + ctx->eng.pad[nlen - 49] = 0x00; + br_hmac_drbg_generate(&ctx->eng.rng, ctx->eng.pad + 2, nlen - 51); + for (u = 2; u < nlen - 49; u ++) { + while (ctx->eng.pad[u] == 0) { + br_hmac_drbg_generate(&ctx->eng.rng, + &ctx->eng.pad[u], 1); + } + } + + /* + * Compute RSA encryption. + */ + if (!ctx->irsapub(ctx->eng.pad, nlen, &pk->key.rsa)) { + return -BR_ERR_LIMIT_EXCEEDED; + } + return (int)nlen; +} + +/* + * OID for hash functions in RSA signatures. + */ +static const unsigned char *HASH_OID[] = { + BR_HASH_OID_SHA1, + BR_HASH_OID_SHA224, + BR_HASH_OID_SHA256, + BR_HASH_OID_SHA384, + BR_HASH_OID_SHA512 +}; + +/* + * Check the RSA signature on the ServerKeyExchange message. + * + * hash hash function ID (2 to 6), or 0 for MD5+SHA-1 (with RSA only) + * use_rsa non-zero for RSA signature, zero for ECDSA + * sig_len signature length (in bytes); signature value is in the pad + * + * Returned value is 0 on success, or an error code. + */ +static int +verify_SKE_sig(br_ssl_client_context *ctx, + int hash, int use_rsa, size_t sig_len) +{ + const br_x509_class **xc; + const br_x509_pkey *pk; + br_multihash_context mhc; + unsigned char hv[64], head[4]; + size_t hv_len; + + xc = ctx->eng.x509ctx; + pk = (*xc)->get_pkey(xc, NULL); + br_multihash_zero(&mhc); + br_multihash_copyimpl(&mhc, &ctx->eng.mhash); + br_multihash_init(&mhc); + br_multihash_update(&mhc, + ctx->eng.client_random, sizeof ctx->eng.client_random); + br_multihash_update(&mhc, + ctx->eng.server_random, sizeof ctx->eng.server_random); + head[0] = 3; + head[1] = 0; + head[2] = ctx->eng.ecdhe_curve; + head[3] = ctx->eng.ecdhe_point_len; + br_multihash_update(&mhc, head, sizeof head); + br_multihash_update(&mhc, + ctx->eng.ecdhe_point, ctx->eng.ecdhe_point_len); + if (hash) { + hv_len = br_multihash_out(&mhc, hash, hv); + if (hv_len == 0) { + return BR_ERR_INVALID_ALGORITHM; + } + } else { + if (!br_multihash_out(&mhc, br_md5_ID, hv) + || !br_multihash_out(&mhc, br_sha1_ID, hv + 16)) + { + return BR_ERR_INVALID_ALGORITHM; + } + hv_len = 36; + } + if (use_rsa) { + unsigned char tmp[64]; + const unsigned char *hash_oid; + + if (hash) { + hash_oid = HASH_OID[hash - 2]; + } else { + hash_oid = NULL; + } + if (!ctx->eng.irsavrfy(ctx->eng.pad, sig_len, + hash_oid, hv_len, &pk->key.rsa, tmp) + || memcmp(tmp, hv, hv_len) != 0) + { + return BR_ERR_BAD_SIGNATURE; + } + } else { + if (!ctx->eng.iecdsa(ctx->eng.iec, hv, hv_len, &pk->key.ec, + ctx->eng.pad, sig_len)) + { + return BR_ERR_BAD_SIGNATURE; + } + } + return 0; +} + +/* + * Perform client-side ECDH (or ECDHE). The point that should be sent to + * the server is written in the pad; returned value is either the point + * length (in bytes), or -x on error, with 'x' being an error code. + * + * The point _from_ the server is taken from ecdhe_point[] if 'ecdhe' + * is non-zero, or from the X.509 engine context if 'ecdhe' is zero + * (for static ECDH). + */ +static int +make_pms_ecdh(br_ssl_client_context *ctx, unsigned ecdhe, int prf_id) +{ + int curve; + unsigned char key[66], point[133]; + const unsigned char *order, *point_src; + size_t glen, olen, point_len, xoff, xlen; + unsigned char mask; + + if (ecdhe) { + curve = ctx->eng.ecdhe_curve; + point_src = ctx->eng.ecdhe_point; + point_len = ctx->eng.ecdhe_point_len; + } else { + const br_x509_class **xc; + const br_x509_pkey *pk; + + xc = ctx->eng.x509ctx; + pk = (*xc)->get_pkey(xc, NULL); + curve = pk->key.ec.curve; + point_src = pk->key.ec.q; + point_len = pk->key.ec.qlen; + } + if ((ctx->eng.iec->supported_curves & ((uint32_t)1 << curve)) == 0) { + return -BR_ERR_INVALID_ALGORITHM; + } + + /* + * We need to generate our key, as a non-zero random value which + * is lower than the curve order, in a "large enough" range. We + * force top bit to 0 and bottom bit to 1, which guarantees that + * the value is in the proper range. + */ + order = ctx->eng.iec->order(curve, &olen); + mask = 0xFF; + while (mask >= order[0]) { + mask >>= 1; + } + br_hmac_drbg_generate(&ctx->eng.rng, key, olen); + key[0] &= mask; + key[olen - 1] |= 0x01; + + /* + * Compute the common ECDH point, whose X coordinate is the + * pre-master secret. + */ + ctx->eng.iec->generator(curve, &glen); + if (glen != point_len) { + return -BR_ERR_INVALID_ALGORITHM; + } + + memcpy(point, point_src, glen); + if (!ctx->eng.iec->mul(point, glen, key, olen, curve)) { + return -BR_ERR_INVALID_ALGORITHM; + } + + /* + * The pre-master secret is the X coordinate. + */ + xoff = ctx->eng.iec->xoff(curve, &xlen); + br_ssl_engine_compute_master(&ctx->eng, prf_id, point + xoff, xlen); + + ctx->eng.iec->mulgen(point, key, olen, curve); + memcpy(ctx->eng.pad, point, glen); + return (int)glen; +} + +/* + * Perform full static ECDH. This occurs only in the context of client + * authentication with certificates: the server uses an EC public key, + * the cipher suite is of type ECDH (not ECDHE), the server requested a + * client certificate and accepts static ECDH, the client has a + * certificate with an EC public key in the same curve, and accepts + * static ECDH as well. + * + * Returned value is 0 on success, -1 on error. + */ +static int +make_pms_static_ecdh(br_ssl_client_context *ctx, int prf_id) +{ + unsigned char point[133]; + size_t point_len; + const br_x509_class **xc; + const br_x509_pkey *pk; + + xc = ctx->eng.x509ctx; + pk = (*xc)->get_pkey(xc, NULL); + point_len = pk->key.ec.qlen; + if (point_len > sizeof point) { + return -1; + } + memcpy(point, pk->key.ec.q, point_len); + if (!(*ctx->client_auth_vtable)->do_keyx( + ctx->client_auth_vtable, point, &point_len)) + { + return -1; + } + br_ssl_engine_compute_master(&ctx->eng, + prf_id, point, point_len); + return 0; +} + +/* + * Compute the client-side signature. This is invoked only when a + * signature-based client authentication was selected. The computed + * signature is in the pad; its length (in bytes) is returned. On + * error, 0 is returned. + */ +static size_t +make_client_sign(br_ssl_client_context *ctx) +{ + size_t hv_len; + + /* + * Compute hash of handshake messages so far. This "cannot" fail + * because the list of supported hash functions provided to the + * client certificate handler was trimmed to include only the + * hash functions that the multi-hasher supports. + */ + if (ctx->hash_id) { + hv_len = br_multihash_out(&ctx->eng.mhash, + ctx->hash_id, ctx->eng.pad); + } else { + br_multihash_out(&ctx->eng.mhash, + br_md5_ID, ctx->eng.pad); + br_multihash_out(&ctx->eng.mhash, + br_sha1_ID, ctx->eng.pad + 16); + hv_len = 36; + } + return (*ctx->client_auth_vtable)->do_sign( + ctx->client_auth_vtable, ctx->hash_id, hv_len, + ctx->eng.pad, sizeof ctx->eng.pad); +} + + + +static const unsigned char t0_datablock[] = { + 0x00, 0x00, 0x0A, 0x00, 0x24, 0x00, 0x2F, 0x01, 0x24, 0x00, 0x35, 0x02, + 0x24, 0x00, 0x3C, 0x01, 0x44, 0x00, 0x3D, 0x02, 0x44, 0x00, 0x9C, 0x03, + 0x04, 0x00, 0x9D, 0x04, 0x05, 0xC0, 0x03, 0x40, 0x24, 0xC0, 0x04, 0x41, + 0x24, 0xC0, 0x05, 0x42, 0x24, 0xC0, 0x08, 0x20, 0x24, 0xC0, 0x09, 0x21, + 0x24, 0xC0, 0x0A, 0x22, 0x24, 0xC0, 0x0D, 0x30, 0x24, 0xC0, 0x0E, 0x31, + 0x24, 0xC0, 0x0F, 0x32, 0x24, 0xC0, 0x12, 0x10, 0x24, 0xC0, 0x13, 0x11, + 0x24, 0xC0, 0x14, 0x12, 0x24, 0xC0, 0x23, 0x21, 0x44, 0xC0, 0x24, 0x22, + 0x55, 0xC0, 0x25, 0x41, 0x44, 0xC0, 0x26, 0x42, 0x55, 0xC0, 0x27, 0x11, + 0x44, 0xC0, 0x28, 0x12, 0x55, 0xC0, 0x29, 0x31, 0x44, 0xC0, 0x2A, 0x32, + 0x55, 0xC0, 0x2B, 0x23, 0x04, 0xC0, 0x2C, 0x24, 0x05, 0xC0, 0x2D, 0x43, + 0x04, 0xC0, 0x2E, 0x44, 0x05, 0xC0, 0x2F, 0x13, 0x04, 0xC0, 0x30, 0x14, + 0x05, 0xC0, 0x31, 0x33, 0x04, 0xC0, 0x32, 0x34, 0x05, 0xC0, 0x9C, 0x06, + 0x04, 0xC0, 0x9D, 0x07, 0x04, 0xC0, 0xA0, 0x08, 0x04, 0xC0, 0xA1, 0x09, + 0x04, 0xC0, 0xAC, 0x26, 0x04, 0xC0, 0xAD, 0x27, 0x04, 0xC0, 0xAE, 0x28, + 0x04, 0xC0, 0xAF, 0x29, 0x04, 0xCC, 0xA8, 0x15, 0x04, 0xCC, 0xA9, 0x25, + 0x04, 0x00, 0x00 +}; + +static const unsigned char t0_codeblock[] = { + 0x00, 0x01, 0x00, 0x0A, 0x00, 0x00, 0x01, 0x00, 0x0D, 0x00, 0x00, 0x01, + 0x00, 0x0E, 0x00, 0x00, 0x01, 0x00, 0x0F, 0x00, 0x00, 0x01, 0x01, 0x08, + 0x00, 0x00, 0x01, 0x01, 0x09, 0x00, 0x00, 0x01, 0x02, 0x08, 0x00, 0x00, + 0x01, 0x02, 0x09, 0x00, 0x00, 0x25, 0x25, 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_CCS), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_CIPHER_SUITE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_COMPRESSION), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_FINISHED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_FRAGLEN), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_HANDSHAKE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_HELLO_DONE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_PARAM), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_SECRENEG), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_BAD_SNI), 0x00, 0x00, 0x01, T0_INT1(BR_ERR_BAD_VERSION), + 0x00, 0x00, 0x01, T0_INT1(BR_ERR_EXTRA_EXTENSION), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_INVALID_ALGORITHM), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_LIMIT_EXCEEDED), 0x00, 0x00, 0x01, T0_INT1(BR_ERR_OK), + 0x00, 0x00, 0x01, T0_INT1(BR_ERR_OVERSIZED_ID), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_RESUME_MISMATCH), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_UNEXPECTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_UNSUPPORTED_VERSION), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_WRONG_KEY_USAGE), 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, action)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, alert)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, application_data)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_client_context, auth_type)), 0x00, 0x00, + 0x01, + T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, cipher_suite)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, client_random)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, close_received)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, ecdhe_curve)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, ecdhe_point)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, ecdhe_point_len)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, flags)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_client_context, hash_id)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_client_context, hashes)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, log_max_frag_len)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_client_context, min_clienthello_len)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, pad)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, protocol_names_num)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, record_type_in)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, record_type_out)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, reneg)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, saved_finished)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, selected_protocol)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, server_name)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, server_random)), 0x00, 0x00, + 0x01, + T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, session_id)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, session_id_len)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, shutdown_recv)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, suites_buf)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, suites_num)), 0x00, 0x00, + 0x01, + T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, version)), + 0x00, 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_in)), + 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_ssl_engine_context, version_max)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_min)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_out)), + 0x00, 0x00, 0x09, 0x26, 0x58, 0x06, 0x02, 0x68, 0x28, 0x00, 0x00, 0x06, + 0x08, 0x2C, 0x0E, 0x05, 0x02, 0x71, 0x28, 0x04, 0x01, 0x3C, 0x00, 0x00, + 0x01, 0x01, 0x00, 0x01, 0x03, 0x00, 0x99, 0x26, 0x5E, 0x44, 0x9D, 0x26, + 0x05, 0x04, 0x60, 0x01, 0x00, 0x00, 0x02, 0x00, 0x0E, 0x06, 0x02, 0x9D, + 0x00, 0x5E, 0x04, 0x6B, 0x00, 0x06, 0x02, 0x68, 0x28, 0x00, 0x00, 0x26, + 0x89, 0x44, 0x05, 0x03, 0x01, 0x0C, 0x08, 0x44, 0x79, 0x2C, 0xAB, 0x1C, + 0x84, 0x01, 0x0C, 0x31, 0x00, 0x00, 0x26, 0x1F, 0x01, 0x08, 0x0B, 0x44, + 0x5C, 0x1F, 0x08, 0x00, 0x01, 0x03, 0x00, 0x77, 0x2E, 0x02, 0x00, 0x36, + 0x17, 0x01, 0x01, 0x0B, 0x77, 0x3E, 0x29, 0x1A, 0x36, 0x06, 0x07, 0x02, + 0x00, 0xCF, 0x03, 0x00, 0x04, 0x75, 0x01, 0x00, 0xC5, 0x02, 0x00, 0x26, + 0x1A, 0x17, 0x06, 0x02, 0x6F, 0x28, 0xCF, 0x04, 0x76, 0x01, 0x01, 0x00, + 0x77, 0x3E, 0x01, 0x16, 0x87, 0x3E, 0x01, 0x00, 0x8A, 0x3C, 0x34, 0xD5, + 0x29, 0xB4, 0x06, 0x09, 0x01, 0x7F, 0xAF, 0x01, 0x7F, 0xD2, 0x04, 0x80, + 0x53, 0xB1, 0x79, 0x2C, 0xA1, 0x01, T0_INT1(BR_KEYTYPE_SIGN), 0x17, + 0x06, 0x01, 0xB5, 0xB8, 0x26, 0x01, 0x0D, 0x0E, 0x06, 0x07, 0x25, 0xB7, + 0xB8, 0x01, 0x7F, 0x04, 0x02, 0x01, 0x00, 0x03, 0x00, 0x01, 0x0E, 0x0E, + 0x05, 0x02, 0x72, 0x28, 0x06, 0x02, 0x67, 0x28, 0x33, 0x06, 0x02, 0x72, + 0x28, 0x02, 0x00, 0x06, 0x1C, 0xD3, 0x80, 0x2E, 0x01, 0x81, 0x7F, 0x0E, + 0x06, 0x0D, 0x25, 0x01, 0x10, 0xDE, 0x01, 0x00, 0xDD, 0x79, 0x2C, 0xAB, + 0x24, 0x04, 0x04, 0xD6, 0x06, 0x01, 0xD4, 0x04, 0x01, 0xD6, 0x01, 0x7F, + 0xD2, 0x01, 0x7F, 0xAF, 0x01, 0x01, 0x77, 0x3E, 0x01, 0x17, 0x87, 0x3E, + 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x00, + 0x38, 0x0E, 0x06, 0x05, 0x25, 0x01, + T0_INT1(BR_KEYTYPE_RSA | BR_KEYTYPE_KEYX), 0x04, 0x30, 0x01, 0x01, + 0x38, 0x0E, 0x06, 0x05, 0x25, 0x01, + T0_INT1(BR_KEYTYPE_RSA | BR_KEYTYPE_SIGN), 0x04, 0x25, 0x01, 0x02, + 0x38, 0x0E, 0x06, 0x05, 0x25, 0x01, + T0_INT1(BR_KEYTYPE_EC | BR_KEYTYPE_SIGN), 0x04, 0x1A, 0x01, 0x03, + 0x38, 0x0E, 0x06, 0x05, 0x25, 0x01, + T0_INT1(BR_KEYTYPE_EC | BR_KEYTYPE_KEYX), 0x04, 0x0F, 0x01, 0x04, + 0x38, 0x0E, 0x06, 0x05, 0x25, 0x01, + T0_INT1(BR_KEYTYPE_EC | BR_KEYTYPE_KEYX), 0x04, 0x04, 0x01, 0x00, + 0x44, 0x25, 0x00, 0x00, 0x82, 0x2E, 0x01, 0x0E, 0x0E, 0x06, 0x04, 0x01, + 0x00, 0x04, 0x02, 0x01, 0x05, 0x00, 0x00, 0x40, 0x06, 0x04, 0x01, 0x06, + 0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x88, 0x2E, 0x26, 0x06, 0x08, 0x01, + 0x01, 0x09, 0x01, 0x11, 0x07, 0x04, 0x03, 0x25, 0x01, 0x05, 0x00, 0x01, + 0x41, 0x03, 0x00, 0x25, 0x01, 0x00, 0x43, 0x06, 0x03, 0x02, 0x00, 0x08, + 0x42, 0x06, 0x03, 0x02, 0x00, 0x08, 0x26, 0x06, 0x06, 0x01, 0x01, 0x0B, + 0x01, 0x06, 0x08, 0x00, 0x00, 0x8B, 0x3F, 0x26, 0x06, 0x03, 0x01, 0x09, + 0x08, 0x00, 0x01, 0x40, 0x26, 0x06, 0x1E, 0x01, 0x00, 0x03, 0x00, 0x26, + 0x06, 0x0E, 0x26, 0x01, 0x01, 0x17, 0x02, 0x00, 0x08, 0x03, 0x00, 0x01, + 0x01, 0x11, 0x04, 0x6F, 0x25, 0x02, 0x00, 0x01, 0x01, 0x0B, 0x01, 0x06, + 0x08, 0x00, 0x00, 0x7F, 0x2D, 0x44, 0x11, 0x01, 0x01, 0x17, 0x35, 0x00, + 0x00, 0x9F, 0xCE, 0x26, 0x01, 0x07, 0x17, 0x01, 0x00, 0x38, 0x0E, 0x06, + 0x09, 0x25, 0x01, 0x10, 0x17, 0x06, 0x01, 0x9F, 0x04, 0x35, 0x01, 0x01, + 0x38, 0x0E, 0x06, 0x2C, 0x25, 0x25, 0x01, 0x00, 0x77, 0x3E, 0xB3, 0x88, + 0x2E, 0x01, 0x01, 0x0E, 0x01, 0x01, 0xA8, 0x37, 0x06, 0x17, 0x29, 0x1A, + 0x36, 0x06, 0x04, 0xCE, 0x25, 0x04, 0x78, 0x01, 0x80, 0x64, 0xC5, 0x01, + 0x01, 0x77, 0x3E, 0x01, 0x17, 0x87, 0x3E, 0x04, 0x01, 0x9F, 0x04, 0x03, + 0x72, 0x28, 0x25, 0x04, 0xFF, 0x34, 0x01, 0x26, 0x03, 0x00, 0x09, 0x26, + 0x58, 0x06, 0x02, 0x68, 0x28, 0x02, 0x00, 0x00, 0x00, 0x9A, 0x01, 0x0F, + 0x17, 0x00, 0x00, 0x76, 0x2E, 0x01, 0x00, 0x38, 0x0E, 0x06, 0x10, 0x25, + 0x26, 0x01, 0x01, 0x0D, 0x06, 0x03, 0x25, 0x01, 0x02, 0x76, 0x3E, 0x01, + 0x00, 0x04, 0x21, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x14, 0x25, 0x01, 0x00, + 0x76, 0x3E, 0x26, 0x01, 0x80, 0x64, 0x0E, 0x06, 0x05, 0x01, 0x82, 0x00, + 0x08, 0x28, 0x5A, 0x04, 0x07, 0x25, 0x01, 0x82, 0x00, 0x08, 0x28, 0x25, + 0x00, 0x00, 0x01, 0x00, 0x2F, 0x06, 0x05, 0x3A, 0xAC, 0x37, 0x04, 0x78, + 0x26, 0x06, 0x04, 0x01, 0x01, 0x8F, 0x3E, 0x00, 0x01, 0xBF, 0xAA, 0xBF, + 0xAA, 0xC1, 0x84, 0x44, 0x26, 0x03, 0x00, 0xB6, 0x9B, 0x9B, 0x02, 0x00, + 0x4D, 0x26, 0x58, 0x06, 0x0A, 0x01, 0x03, 0xA8, 0x06, 0x02, 0x72, 0x28, + 0x25, 0x04, 0x03, 0x5C, 0x8A, 0x3C, 0x00, 0x00, 0x2F, 0x06, 0x0B, 0x86, + 0x2E, 0x01, 0x14, 0x0D, 0x06, 0x02, 0x72, 0x28, 0x04, 0x11, 0xCE, 0x01, + 0x07, 0x17, 0x26, 0x01, 0x02, 0x0D, 0x06, 0x06, 0x06, 0x02, 0x72, 0x28, + 0x04, 0x70, 0x25, 0xC2, 0x01, 0x01, 0x0D, 0x33, 0x37, 0x06, 0x02, 0x61, + 0x28, 0x26, 0x01, 0x01, 0xC8, 0x36, 0xB2, 0x00, 0x01, 0xB8, 0x01, 0x0B, + 0x0E, 0x05, 0x02, 0x72, 0x28, 0x26, 0x01, 0x03, 0x0E, 0x06, 0x08, 0xC0, + 0x06, 0x02, 0x68, 0x28, 0x44, 0x25, 0x00, 0x44, 0x57, 0xC0, 0xAA, 0x26, + 0x06, 0x23, 0xC0, 0xAA, 0x26, 0x56, 0x26, 0x06, 0x18, 0x26, 0x01, 0x82, + 0x00, 0x0F, 0x06, 0x05, 0x01, 0x82, 0x00, 0x04, 0x01, 0x26, 0x03, 0x00, + 0x84, 0x02, 0x00, 0xB6, 0x02, 0x00, 0x53, 0x04, 0x65, 0x9B, 0x54, 0x04, + 0x5A, 0x9B, 0x9B, 0x55, 0x26, 0x06, 0x02, 0x35, 0x00, 0x25, 0x2B, 0x00, + 0x00, 0x79, 0x2C, 0xA1, 0x01, 0x7F, 0xB0, 0x26, 0x58, 0x06, 0x02, 0x35, + 0x28, 0x26, 0x05, 0x02, 0x72, 0x28, 0x38, 0x17, 0x0D, 0x06, 0x02, 0x74, + 0x28, 0x3B, 0x00, 0x00, 0x9C, 0xB8, 0x01, 0x14, 0x0D, 0x06, 0x02, 0x72, + 0x28, 0x84, 0x01, 0x0C, 0x08, 0x01, 0x0C, 0xB6, 0x9B, 0x84, 0x26, 0x01, + 0x0C, 0x08, 0x01, 0x0C, 0x30, 0x05, 0x02, 0x64, 0x28, 0x00, 0x00, 0xB9, + 0x06, 0x02, 0x72, 0x28, 0x06, 0x02, 0x66, 0x28, 0x00, 0x0A, 0xB8, 0x01, + 0x02, 0x0E, 0x05, 0x02, 0x72, 0x28, 0xBF, 0x03, 0x00, 0x02, 0x00, 0x95, + 0x2C, 0x0A, 0x02, 0x00, 0x94, 0x2C, 0x0F, 0x37, 0x06, 0x02, 0x73, 0x28, + 0x02, 0x00, 0x93, 0x2C, 0x0D, 0x06, 0x02, 0x6B, 0x28, 0x02, 0x00, 0x96, + 0x3C, 0x8C, 0x01, 0x20, 0xB6, 0x01, 0x00, 0x03, 0x01, 0xC1, 0x03, 0x02, + 0x02, 0x02, 0x01, 0x20, 0x0F, 0x06, 0x02, 0x70, 0x28, 0x84, 0x02, 0x02, + 0xB6, 0x02, 0x02, 0x8E, 0x2E, 0x0E, 0x02, 0x02, 0x01, 0x00, 0x0F, 0x17, + 0x06, 0x0B, 0x8D, 0x84, 0x02, 0x02, 0x30, 0x06, 0x04, 0x01, 0x7F, 0x03, + 0x01, 0x8D, 0x84, 0x02, 0x02, 0x31, 0x02, 0x02, 0x8E, 0x3E, 0x02, 0x00, + 0x92, 0x02, 0x01, 0x98, 0xBF, 0x26, 0xC3, 0x58, 0x06, 0x02, 0x62, 0x28, + 0x26, 0xCD, 0x02, 0x00, 0x01, 0x86, 0x03, 0x0A, 0x17, 0x06, 0x02, 0x62, + 0x28, 0x79, 0x02, 0x01, 0x98, 0xC1, 0x06, 0x02, 0x63, 0x28, 0x26, 0x06, + 0x81, 0x47, 0xBF, 0xAA, 0xA6, 0x03, 0x03, 0xA4, 0x03, 0x04, 0xA2, 0x03, + 0x05, 0xA5, 0x03, 0x06, 0xA7, 0x03, 0x07, 0xA3, 0x03, 0x08, 0x27, 0x03, + 0x09, 0x26, 0x06, 0x81, 0x18, 0xBF, 0x01, 0x00, 0x38, 0x0E, 0x06, 0x0F, + 0x25, 0x02, 0x03, 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x03, 0xBE, + 0x04, 0x80, 0x7F, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x02, 0x05, + 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x05, 0xBC, 0x04, 0x80, 0x6A, + 0x01, 0x83, 0xFE, 0x01, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x02, 0x04, 0x05, + 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x04, 0xBD, 0x04, 0x80, 0x53, 0x01, + 0x0D, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x06, 0x05, 0x02, 0x6C, 0x28, + 0x01, 0x00, 0x03, 0x06, 0xBA, 0x04, 0x3F, 0x01, 0x0A, 0x38, 0x0E, 0x06, + 0x0E, 0x25, 0x02, 0x07, 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x07, + 0xBA, 0x04, 0x2B, 0x01, 0x0B, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x08, + 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x08, 0xBA, 0x04, 0x17, 0x01, + 0x10, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x09, 0x05, 0x02, 0x6C, 0x28, + 0x01, 0x00, 0x03, 0x09, 0xAE, 0x04, 0x03, 0x6C, 0x28, 0x25, 0x04, 0xFE, + 0x64, 0x02, 0x04, 0x06, 0x0D, 0x02, 0x04, 0x01, 0x05, 0x0F, 0x06, 0x02, + 0x69, 0x28, 0x01, 0x01, 0x88, 0x3E, 0x9B, 0x04, 0x0C, 0xA4, 0x01, 0x05, + 0x0F, 0x06, 0x02, 0x69, 0x28, 0x01, 0x01, 0x88, 0x3E, 0x9B, 0x02, 0x01, + 0x00, 0x04, 0xB8, 0x01, 0x0C, 0x0E, 0x05, 0x02, 0x72, 0x28, 0xC1, 0x01, + 0x03, 0x0E, 0x05, 0x02, 0x6D, 0x28, 0xBF, 0x26, 0x7C, 0x3E, 0x26, 0x01, + 0x20, 0x10, 0x06, 0x02, 0x6D, 0x28, 0x40, 0x44, 0x11, 0x01, 0x01, 0x17, + 0x05, 0x02, 0x6D, 0x28, 0xC1, 0x26, 0x01, 0x81, 0x05, 0x0F, 0x06, 0x02, + 0x6D, 0x28, 0x26, 0x7E, 0x3E, 0x7D, 0x44, 0xB6, 0x92, 0x2C, 0x01, 0x86, + 0x03, 0x10, 0x03, 0x00, 0x79, 0x2C, 0xCB, 0x03, 0x01, 0x01, 0x02, 0x03, + 0x02, 0x02, 0x00, 0x06, 0x21, 0xC1, 0x26, 0x26, 0x01, 0x02, 0x0A, 0x44, + 0x01, 0x06, 0x0F, 0x37, 0x06, 0x02, 0x6D, 0x28, 0x03, 0x02, 0xC1, 0x02, + 0x01, 0x01, 0x01, 0x0B, 0x01, 0x03, 0x08, 0x0E, 0x05, 0x02, 0x6D, 0x28, + 0x04, 0x08, 0x02, 0x01, 0x06, 0x04, 0x01, 0x00, 0x03, 0x02, 0xBF, 0x26, + 0x03, 0x03, 0x26, 0x01, 0x84, 0x00, 0x0F, 0x06, 0x02, 0x6E, 0x28, 0x84, + 0x44, 0xB6, 0x02, 0x02, 0x02, 0x01, 0x02, 0x03, 0x50, 0x26, 0x06, 0x01, + 0x28, 0x25, 0x9B, 0x00, 0x02, 0x03, 0x00, 0x03, 0x01, 0x02, 0x00, 0x97, + 0x02, 0x01, 0x02, 0x00, 0x39, 0x26, 0x01, 0x00, 0x0E, 0x06, 0x02, 0x60, + 0x00, 0xD0, 0x04, 0x74, 0x02, 0x01, 0x00, 0x03, 0x00, 0xC1, 0xAA, 0x26, + 0x06, 0x80, 0x43, 0xC1, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, + 0x81, 0x7F, 0x04, 0x2E, 0x01, 0x80, 0x40, 0x38, 0x0E, 0x06, 0x07, 0x25, + 0x01, 0x83, 0xFE, 0x00, 0x04, 0x20, 0x01, 0x80, 0x41, 0x38, 0x0E, 0x06, + 0x07, 0x25, 0x01, 0x84, 0x80, 0x00, 0x04, 0x12, 0x01, 0x80, 0x42, 0x38, + 0x0E, 0x06, 0x07, 0x25, 0x01, 0x88, 0x80, 0x00, 0x04, 0x04, 0x01, 0x00, + 0x44, 0x25, 0x02, 0x00, 0x37, 0x03, 0x00, 0x04, 0xFF, 0x39, 0x9B, 0x79, + 0x2C, 0xC9, 0x05, 0x09, 0x02, 0x00, 0x01, 0x83, 0xFF, 0x7F, 0x17, 0x03, + 0x00, 0x92, 0x2C, 0x01, 0x86, 0x03, 0x10, 0x06, 0x3A, 0xBB, 0x26, 0x81, + 0x3D, 0x41, 0x25, 0x26, 0x01, 0x08, 0x0B, 0x37, 0x01, 0x8C, 0x80, 0x00, + 0x37, 0x17, 0x02, 0x00, 0x17, 0x02, 0x00, 0x01, 0x8C, 0x80, 0x00, 0x17, + 0x06, 0x19, 0x26, 0x01, 0x81, 0x7F, 0x17, 0x06, 0x05, 0x01, 0x84, 0x80, + 0x00, 0x37, 0x26, 0x01, 0x83, 0xFE, 0x00, 0x17, 0x06, 0x05, 0x01, 0x88, + 0x80, 0x00, 0x37, 0x03, 0x00, 0x04, 0x09, 0x02, 0x00, 0x01, 0x8C, 0x88, + 0x01, 0x17, 0x03, 0x00, 0x16, 0xBF, 0xAA, 0x26, 0x06, 0x23, 0xBF, 0xAA, + 0x26, 0x15, 0x26, 0x06, 0x18, 0x26, 0x01, 0x82, 0x00, 0x0F, 0x06, 0x05, + 0x01, 0x82, 0x00, 0x04, 0x01, 0x26, 0x03, 0x01, 0x84, 0x02, 0x01, 0xB6, + 0x02, 0x01, 0x12, 0x04, 0x65, 0x9B, 0x13, 0x04, 0x5A, 0x9B, 0x14, 0x9B, + 0x02, 0x00, 0x2A, 0x00, 0x00, 0xB9, 0x26, 0x5A, 0x06, 0x07, 0x25, 0x06, + 0x02, 0x66, 0x28, 0x04, 0x74, 0x00, 0x00, 0xC2, 0x01, 0x03, 0xC0, 0x44, + 0x25, 0x44, 0x00, 0x00, 0xBF, 0xC6, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, + 0xBF, 0xAA, 0x26, 0x06, 0x80, 0x50, 0xC1, 0x03, 0x01, 0xC1, 0x03, 0x02, + 0x02, 0x01, 0x01, 0x08, 0x0E, 0x06, 0x16, 0x02, 0x02, 0x01, 0x0F, 0x0C, + 0x06, 0x0D, 0x01, 0x01, 0x02, 0x02, 0x01, 0x10, 0x08, 0x0B, 0x02, 0x00, + 0x37, 0x03, 0x00, 0x04, 0x2A, 0x02, 0x01, 0x01, 0x02, 0x10, 0x02, 0x01, + 0x01, 0x06, 0x0C, 0x17, 0x02, 0x02, 0x01, 0x01, 0x0E, 0x02, 0x02, 0x01, + 0x03, 0x0E, 0x37, 0x17, 0x06, 0x11, 0x02, 0x00, 0x01, 0x01, 0x02, 0x02, + 0x5D, 0x01, 0x02, 0x0B, 0x02, 0x01, 0x08, 0x0B, 0x37, 0x03, 0x00, 0x04, + 0xFF, 0x2C, 0x9B, 0x02, 0x00, 0x00, 0x00, 0xBF, 0x01, 0x01, 0x0E, 0x05, + 0x02, 0x65, 0x28, 0xC1, 0x01, 0x08, 0x08, 0x82, 0x2E, 0x0E, 0x05, 0x02, + 0x65, 0x28, 0x00, 0x00, 0xBF, 0x88, 0x2E, 0x05, 0x15, 0x01, 0x01, 0x0E, + 0x05, 0x02, 0x69, 0x28, 0xC1, 0x01, 0x00, 0x0E, 0x05, 0x02, 0x69, 0x28, + 0x01, 0x02, 0x88, 0x3E, 0x04, 0x1C, 0x01, 0x19, 0x0E, 0x05, 0x02, 0x69, + 0x28, 0xC1, 0x01, 0x18, 0x0E, 0x05, 0x02, 0x69, 0x28, 0x84, 0x01, 0x18, + 0xB6, 0x89, 0x84, 0x01, 0x18, 0x30, 0x05, 0x02, 0x69, 0x28, 0x00, 0x00, + 0xBF, 0x06, 0x02, 0x6A, 0x28, 0x00, 0x00, 0x01, 0x02, 0x97, 0xC2, 0x01, + 0x08, 0x0B, 0xC2, 0x08, 0x00, 0x00, 0x01, 0x03, 0x97, 0xC2, 0x01, 0x08, + 0x0B, 0xC2, 0x08, 0x01, 0x08, 0x0B, 0xC2, 0x08, 0x00, 0x00, 0x01, 0x01, + 0x97, 0xC2, 0x00, 0x00, 0x3A, 0x26, 0x58, 0x05, 0x01, 0x00, 0x25, 0xD0, + 0x04, 0x76, 0x02, 0x03, 0x00, 0x91, 0x2E, 0x03, 0x01, 0x01, 0x00, 0x26, + 0x02, 0x01, 0x0A, 0x06, 0x10, 0x26, 0x01, 0x01, 0x0B, 0x90, 0x08, 0x2C, + 0x02, 0x00, 0x0E, 0x06, 0x01, 0x00, 0x5C, 0x04, 0x6A, 0x25, 0x01, 0x7F, + 0x00, 0x00, 0x01, 0x15, 0x87, 0x3E, 0x44, 0x52, 0x25, 0x52, 0x25, 0x29, + 0x00, 0x00, 0x01, 0x01, 0x44, 0xC4, 0x00, 0x00, 0x44, 0x38, 0x97, 0x44, + 0x26, 0x06, 0x05, 0xC2, 0x25, 0x5D, 0x04, 0x78, 0x25, 0x00, 0x00, 0x26, + 0x01, 0x81, 0xAC, 0x00, 0x0E, 0x06, 0x04, 0x25, 0x01, 0x7F, 0x00, 0x9A, + 0x59, 0x00, 0x02, 0x03, 0x00, 0x79, 0x2C, 0x9A, 0x03, 0x01, 0x02, 0x01, + 0x01, 0x0F, 0x17, 0x02, 0x01, 0x01, 0x04, 0x11, 0x01, 0x0F, 0x17, 0x02, + 0x01, 0x01, 0x08, 0x11, 0x01, 0x0F, 0x17, 0x01, 0x00, 0x38, 0x0E, 0x06, + 0x10, 0x25, 0x01, 0x00, 0x01, 0x18, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, + 0x01, 0x4A, 0x04, 0x81, 0x0D, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x10, 0x25, + 0x01, 0x01, 0x01, 0x10, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, 0x01, 0x4A, + 0x04, 0x80, 0x77, 0x01, 0x02, 0x38, 0x0E, 0x06, 0x10, 0x25, 0x01, 0x01, + 0x01, 0x20, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, 0x01, 0x4A, 0x04, 0x80, + 0x61, 0x01, 0x03, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x25, 0x01, 0x10, 0x02, + 0x00, 0x06, 0x03, 0x47, 0x04, 0x01, 0x48, 0x04, 0x80, 0x4C, 0x01, 0x04, + 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x25, 0x01, 0x20, 0x02, 0x00, 0x06, 0x03, + 0x47, 0x04, 0x01, 0x48, 0x04, 0x38, 0x01, 0x05, 0x38, 0x0E, 0x06, 0x0C, + 0x25, 0x25, 0x02, 0x00, 0x06, 0x03, 0x4B, 0x04, 0x01, 0x4C, 0x04, 0x26, + 0x26, 0x01, 0x09, 0x0F, 0x06, 0x02, 0x68, 0x28, 0x44, 0x25, 0x26, 0x01, + 0x01, 0x17, 0x01, 0x04, 0x0B, 0x01, 0x10, 0x08, 0x44, 0x01, 0x08, 0x17, + 0x01, 0x10, 0x44, 0x09, 0x02, 0x00, 0x06, 0x03, 0x45, 0x04, 0x01, 0x46, + 0x00, 0x25, 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x02, 0x0F, 0x00, + 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x26, 0x5B, 0x44, 0x01, 0x03, 0x0A, 0x17, + 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x01, 0x0E, 0x00, 0x00, 0x9A, + 0x01, 0x0C, 0x11, 0x5A, 0x00, 0x00, 0x9A, 0x01, 0x81, 0x70, 0x17, 0x01, + 0x20, 0x0D, 0x00, 0x00, 0x1B, 0x01, 0x00, 0x75, 0x2E, 0x26, 0x06, 0x22, + 0x01, 0x01, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, 0x00, 0x9E, 0x04, 0x14, + 0x01, 0x02, 0x38, 0x0E, 0x06, 0x0D, 0x25, 0x77, 0x2E, 0x01, 0x01, 0x0E, + 0x06, 0x03, 0x01, 0x10, 0x37, 0x04, 0x01, 0x25, 0x04, 0x01, 0x25, 0x7B, + 0x2E, 0x05, 0x33, 0x2F, 0x06, 0x30, 0x86, 0x2E, 0x01, 0x14, 0x38, 0x0E, + 0x06, 0x06, 0x25, 0x01, 0x02, 0x37, 0x04, 0x22, 0x01, 0x15, 0x38, 0x0E, + 0x06, 0x09, 0x25, 0xAD, 0x06, 0x03, 0x01, 0x7F, 0x9E, 0x04, 0x13, 0x01, + 0x16, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, 0x01, 0x37, 0x04, 0x07, 0x25, + 0x01, 0x04, 0x37, 0x01, 0x00, 0x25, 0x1A, 0x06, 0x03, 0x01, 0x08, 0x37, + 0x00, 0x00, 0x1B, 0x26, 0x05, 0x13, 0x2F, 0x06, 0x10, 0x86, 0x2E, 0x01, + 0x15, 0x0E, 0x06, 0x08, 0x25, 0xAD, 0x01, 0x00, 0x77, 0x3E, 0x04, 0x01, + 0x20, 0x00, 0x00, 0xCE, 0x01, 0x07, 0x17, 0x01, 0x01, 0x0F, 0x06, 0x02, + 0x72, 0x28, 0x00, 0x01, 0x03, 0x00, 0x29, 0x1A, 0x06, 0x05, 0x02, 0x00, + 0x87, 0x3E, 0x00, 0xCE, 0x25, 0x04, 0x74, 0x00, 0x01, 0x14, 0xD1, 0x01, + 0x01, 0xDE, 0x29, 0x26, 0x01, 0x00, 0xC8, 0x01, 0x16, 0xD1, 0xD7, 0x29, + 0x00, 0x00, 0x01, 0x0B, 0xDE, 0x4E, 0x26, 0x26, 0x01, 0x03, 0x08, 0xDD, + 0xDD, 0x18, 0x26, 0x58, 0x06, 0x02, 0x25, 0x00, 0xDD, 0x1D, 0x26, 0x06, + 0x05, 0x84, 0x44, 0xD8, 0x04, 0x77, 0x25, 0x04, 0x6C, 0x00, 0x21, 0x01, + 0x0F, 0xDE, 0x26, 0x92, 0x2C, 0x01, 0x86, 0x03, 0x10, 0x06, 0x0C, 0x01, + 0x04, 0x08, 0xDD, 0x80, 0x2E, 0xDE, 0x78, 0x2E, 0xDE, 0x04, 0x02, 0x5E, + 0xDD, 0x26, 0xDC, 0x84, 0x44, 0xD8, 0x00, 0x02, 0xA4, 0xA6, 0x08, 0xA2, + 0x08, 0xA5, 0x08, 0xA7, 0x08, 0xA3, 0x08, 0x27, 0x08, 0x03, 0x00, 0x01, + 0x01, 0xDE, 0x01, 0x27, 0x8E, 0x2E, 0x08, 0x91, 0x2E, 0x01, 0x01, 0x0B, + 0x08, 0x02, 0x00, 0x06, 0x04, 0x5E, 0x02, 0x00, 0x08, 0x83, 0x2C, 0x38, + 0x09, 0x26, 0x5B, 0x06, 0x24, 0x02, 0x00, 0x05, 0x04, 0x44, 0x5E, 0x44, + 0x5F, 0x01, 0x04, 0x09, 0x26, 0x58, 0x06, 0x03, 0x25, 0x01, 0x00, 0x26, + 0x01, 0x04, 0x08, 0x02, 0x00, 0x08, 0x03, 0x00, 0x44, 0x01, 0x04, 0x08, + 0x38, 0x08, 0x44, 0x04, 0x03, 0x25, 0x01, 0x7F, 0x03, 0x01, 0xDD, 0x94, + 0x2C, 0xDC, 0x7A, 0x01, 0x04, 0x19, 0x7A, 0x01, 0x04, 0x08, 0x01, 0x1C, + 0x32, 0x7A, 0x01, 0x20, 0xD8, 0x8D, 0x8E, 0x2E, 0xDA, 0x91, 0x2E, 0x26, + 0x01, 0x01, 0x0B, 0xDC, 0x90, 0x44, 0x26, 0x06, 0x0F, 0x5D, 0x38, 0x2C, + 0x26, 0xC7, 0x05, 0x02, 0x62, 0x28, 0xDC, 0x44, 0x5E, 0x44, 0x04, 0x6E, + 0x60, 0x01, 0x01, 0xDE, 0x01, 0x00, 0xDE, 0x02, 0x00, 0x06, 0x81, 0x5A, + 0x02, 0x00, 0xDC, 0xA4, 0x06, 0x0E, 0x01, 0x83, 0xFE, 0x01, 0xDC, 0x89, + 0xA4, 0x01, 0x04, 0x09, 0x26, 0xDC, 0x5D, 0xDA, 0xA6, 0x06, 0x16, 0x01, + 0x00, 0xDC, 0x8B, 0xA6, 0x01, 0x04, 0x09, 0x26, 0xDC, 0x01, 0x02, 0x09, + 0x26, 0xDC, 0x01, 0x00, 0xDE, 0x01, 0x03, 0x09, 0xD9, 0xA2, 0x06, 0x0C, + 0x01, 0x01, 0xDC, 0x01, 0x01, 0xDC, 0x82, 0x2E, 0x01, 0x08, 0x09, 0xDE, + 0xA5, 0x06, 0x19, 0x01, 0x0D, 0xDC, 0xA5, 0x01, 0x04, 0x09, 0x26, 0xDC, + 0x01, 0x02, 0x09, 0xDC, 0x42, 0x06, 0x03, 0x01, 0x03, 0xDB, 0x43, 0x06, + 0x03, 0x01, 0x01, 0xDB, 0xA7, 0x26, 0x06, 0x36, 0x01, 0x0A, 0xDC, 0x01, + 0x04, 0x09, 0x26, 0xDC, 0x5F, 0xDC, 0x40, 0x01, 0x00, 0x26, 0x01, 0x82, + 0x80, 0x80, 0x80, 0x00, 0x17, 0x06, 0x0A, 0x01, 0xFD, 0xFF, 0xFF, 0xFF, + 0x7F, 0x17, 0x01, 0x1D, 0xDC, 0x26, 0x01, 0x20, 0x0A, 0x06, 0x0C, 0xA0, + 0x11, 0x01, 0x01, 0x17, 0x06, 0x02, 0x26, 0xDC, 0x5C, 0x04, 0x6E, 0x60, + 0x04, 0x01, 0x25, 0xA3, 0x06, 0x0A, 0x01, 0x0B, 0xDC, 0x01, 0x02, 0xDC, + 0x01, 0x82, 0x00, 0xDC, 0x27, 0x26, 0x06, 0x1F, 0x01, 0x10, 0xDC, 0x01, + 0x04, 0x09, 0x26, 0xDC, 0x5F, 0xDC, 0x85, 0x2C, 0x01, 0x00, 0xA0, 0x0F, + 0x06, 0x0A, 0x26, 0x1E, 0x26, 0xDE, 0x84, 0x44, 0xD8, 0x5C, 0x04, 0x72, + 0x60, 0x04, 0x01, 0x25, 0x02, 0x01, 0x58, 0x05, 0x11, 0x01, 0x15, 0xDC, + 0x02, 0x01, 0x26, 0xDC, 0x26, 0x06, 0x06, 0x5D, 0x01, 0x00, 0xDE, 0x04, + 0x77, 0x25, 0x00, 0x00, 0x01, 0x10, 0xDE, 0x79, 0x2C, 0x26, 0xCC, 0x06, + 0x0C, 0xAB, 0x23, 0x26, 0x5E, 0xDD, 0x26, 0xDC, 0x84, 0x44, 0xD8, 0x04, + 0x0D, 0x26, 0xCA, 0x44, 0xAB, 0x22, 0x26, 0x5C, 0xDD, 0x26, 0xDE, 0x84, + 0x44, 0xD8, 0x00, 0x00, 0x9C, 0x01, 0x14, 0xDE, 0x01, 0x0C, 0xDD, 0x84, + 0x01, 0x0C, 0xD8, 0x00, 0x00, 0x51, 0x26, 0x01, 0x00, 0x0E, 0x06, 0x02, + 0x60, 0x00, 0xCE, 0x25, 0x04, 0x73, 0x00, 0x26, 0xDC, 0xD8, 0x00, 0x00, + 0x26, 0xDE, 0xD8, 0x00, 0x01, 0x03, 0x00, 0x41, 0x25, 0x26, 0x01, 0x10, + 0x17, 0x06, 0x06, 0x01, 0x04, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x08, + 0x17, 0x06, 0x06, 0x01, 0x03, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x20, + 0x17, 0x06, 0x06, 0x01, 0x05, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x80, + 0x40, 0x17, 0x06, 0x06, 0x01, 0x06, 0xDE, 0x02, 0x00, 0xDE, 0x01, 0x04, + 0x17, 0x06, 0x06, 0x01, 0x02, 0xDE, 0x02, 0x00, 0xDE, 0x00, 0x00, 0x26, + 0x01, 0x08, 0x4F, 0xDE, 0xDE, 0x00, 0x00, 0x26, 0x01, 0x10, 0x4F, 0xDE, + 0xDC, 0x00, 0x00, 0x26, 0x52, 0x06, 0x02, 0x25, 0x00, 0xCE, 0x25, 0x04, + 0x76 +}; + +static const uint16_t t0_caddr[] = { + 0, + 5, + 10, + 15, + 20, + 25, + 30, + 35, + 40, + 44, + 48, + 52, + 56, + 60, + 64, + 68, + 72, + 76, + 80, + 84, + 88, + 92, + 96, + 100, + 104, + 108, + 112, + 116, + 120, + 124, + 129, + 134, + 139, + 144, + 149, + 154, + 159, + 164, + 169, + 174, + 179, + 184, + 189, + 194, + 199, + 204, + 209, + 214, + 219, + 224, + 229, + 234, + 239, + 244, + 249, + 254, + 259, + 264, + 269, + 274, + 279, + 284, + 289, + 294, + 303, + 316, + 320, + 345, + 351, + 370, + 381, + 422, + 542, + 546, + 611, + 626, + 637, + 655, + 684, + 694, + 730, + 740, + 818, + 832, + 838, + 897, + 916, + 951, + 1000, + 1076, + 1103, + 1134, + 1145, + 1497, + 1644, + 1668, + 1884, + 1898, + 1907, + 1911, + 2006, + 2027, + 2083, + 2090, + 2101, + 2117, + 2123, + 2134, + 2169, + 2181, + 2187, + 2202, + 2218, + 2411, + 2420, + 2433, + 2442, + 2449, + 2459, + 2565, + 2590, + 2603, + 2619, + 2637, + 2669, + 2703, + 3071, + 3107, + 3120, + 3134, + 3139, + 3144, + 3210, + 3218, + 3226 +}; + +#define T0_INTERPRETED 88 + +#define T0_ENTER(ip, rp, slot) do { \ + const unsigned char *t0_newip; \ + uint32_t t0_lnum; \ + t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \ + t0_lnum = t0_parse7E_unsigned(&t0_newip); \ + (rp) += t0_lnum; \ + *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \ + (ip) = t0_newip; \ + } while (0) + +#define T0_DEFENTRY(name, slot) \ +void \ +name(void *ctx) \ +{ \ + t0_context *t0ctx = ctx; \ + t0ctx->ip = &t0_codeblock[0]; \ + T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \ +} + +T0_DEFENTRY(br_ssl_hs_client_init_main, 169) + +#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++) + +void +br_ssl_hs_client_run(void *t0ctx) +{ + uint32_t *dp, *rp; + const unsigned char *ip; + +#define T0_LOCAL(x) (*(rp - 2 - (x))) +#define T0_POP() (*-- dp) +#define T0_POPi() (*(int32_t *)(-- dp)) +#define T0_PEEK(x) (*(dp - 1 - (x))) +#define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x))) +#define T0_PUSH(v) do { *dp = (v); dp ++; } while (0) +#define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0) +#define T0_RPOP() (*-- rp) +#define T0_RPOPi() (*(int32_t *)(-- rp)) +#define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0) +#define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0) +#define T0_ROLL(x) do { \ + size_t t0len = (size_t)(x); \ + uint32_t t0tmp = *(dp - 1 - t0len); \ + memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_SWAP() do { \ + uint32_t t0tmp = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_ROT() do { \ + uint32_t t0tmp = *(dp - 3); \ + *(dp - 3) = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_NROT() do { \ + uint32_t t0tmp = *(dp - 1); \ + *(dp - 1) = *(dp - 2); \ + *(dp - 2) = *(dp - 3); \ + *(dp - 3) = t0tmp; \ +} while (0) +#define T0_PICK(x) do { \ + uint32_t t0depth = (x); \ + T0_PUSH(T0_PEEK(t0depth)); \ +} while (0) +#define T0_CO() do { \ + goto t0_exit; \ +} while (0) +#define T0_RET() goto t0_next + + dp = ((t0_context *)t0ctx)->dp; + rp = ((t0_context *)t0ctx)->rp; + ip = ((t0_context *)t0ctx)->ip; + goto t0_next; + for (;;) { + uint32_t t0x; + + t0_next: + t0x = T0_NEXT(&ip); + if (t0x < T0_INTERPRETED) { + switch (t0x) { + int32_t t0off; + + case 0: /* ret */ + t0x = T0_RPOP(); + rp -= (t0x >> 16); + t0x &= 0xFFFF; + if (t0x == 0) { + ip = NULL; + goto t0_exit; + } + ip = &t0_codeblock[t0x]; + break; + case 1: /* literal constant */ + T0_PUSHi(t0_parse7E_signed(&ip)); + break; + case 2: /* read local */ + T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip))); + break; + case 3: /* write local */ + T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP(); + break; + case 4: /* jump */ + t0off = t0_parse7E_signed(&ip); + ip += t0off; + break; + case 5: /* jump if */ + t0off = t0_parse7E_signed(&ip); + if (T0_POP()) { + ip += t0off; + } + break; + case 6: /* jump if not */ + t0off = t0_parse7E_signed(&ip); + if (!T0_POP()) { + ip += t0off; + } + break; + case 7: { + /* * */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a * b); + + } + break; + case 8: { + /* + */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a + b); + + } + break; + case 9: { + /* - */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a - b); + + } + break; + case 10: { + /* < */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a < b)); + + } + break; + case 11: { + /* << */ + + int c = (int)T0_POPi(); + uint32_t x = T0_POP(); + T0_PUSH(x << c); + + } + break; + case 12: { + /* <= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a <= b)); + + } + break; + case 13: { + /* <> */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a != b)); + + } + break; + case 14: { + /* = */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a == b)); + + } + break; + case 15: { + /* > */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a > b)); + + } + break; + case 16: { + /* >= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a >= b)); + + } + break; + case 17: { + /* >> */ + + int c = (int)T0_POPi(); + int32_t x = T0_POPi(); + T0_PUSHi(x >> c); + + } + break; + case 18: { + /* anchor-dn-append-name */ + + size_t len; + + len = T0_POP(); + if (CTX->client_auth_vtable != NULL) { + (*CTX->client_auth_vtable)->append_name( + CTX->client_auth_vtable, ENG->pad, len); + } + + } + break; + case 19: { + /* anchor-dn-end-name */ + + if (CTX->client_auth_vtable != NULL) { + (*CTX->client_auth_vtable)->end_name( + CTX->client_auth_vtable); + } + + } + break; + case 20: { + /* anchor-dn-end-name-list */ + + if (CTX->client_auth_vtable != NULL) { + (*CTX->client_auth_vtable)->end_name_list( + CTX->client_auth_vtable); + } + + } + break; + case 21: { + /* anchor-dn-start-name */ + + size_t len; + + len = T0_POP(); + if (CTX->client_auth_vtable != NULL) { + (*CTX->client_auth_vtable)->start_name( + CTX->client_auth_vtable, len); + } + + } + break; + case 22: { + /* anchor-dn-start-name-list */ + + if (CTX->client_auth_vtable != NULL) { + (*CTX->client_auth_vtable)->start_name_list( + CTX->client_auth_vtable); + } + + } + break; + case 23: { + /* and */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a & b); + + } + break; + case 24: { + /* begin-cert */ + + if (ENG->chain_len == 0) { + T0_PUSHi(-1); + } else { + ENG->cert_cur = ENG->chain->data; + ENG->cert_len = ENG->chain->data_len; + ENG->chain ++; + ENG->chain_len --; + T0_PUSH(ENG->cert_len); + } + + } + break; + case 25: { + /* bzero */ + + size_t len = (size_t)T0_POP(); + void *addr = (unsigned char *)ENG + (size_t)T0_POP(); + memset(addr, 0, len); + + } + break; + case 26: { + /* can-output? */ + + T0_PUSHi(-(ENG->hlen_out > 0)); + + } + break; + case 27: { + /* co */ + T0_CO(); + } + break; + case 28: { + /* compute-Finished-inner */ + + int prf_id = T0_POP(); + int from_client = T0_POPi(); + unsigned char tmp[48]; + br_tls_prf_seed_chunk seed; + + br_tls_prf_impl prf = br_ssl_engine_get_PRF(ENG, prf_id); + seed.data = tmp; + if (ENG->session.version >= BR_TLS12) { + seed.len = br_multihash_out(&ENG->mhash, prf_id, tmp); + } else { + br_multihash_out(&ENG->mhash, br_md5_ID, tmp); + br_multihash_out(&ENG->mhash, br_sha1_ID, tmp + 16); + seed.len = 36; + } + prf(ENG->pad, 12, ENG->session.master_secret, + sizeof ENG->session.master_secret, + from_client ? "client finished" : "server finished", + 1, &seed); + + } + break; + case 29: { + /* copy-cert-chunk */ + + size_t clen; + + clen = ENG->cert_len; + if (clen > sizeof ENG->pad) { + clen = sizeof ENG->pad; + } + memcpy(ENG->pad, ENG->cert_cur, clen); + ENG->cert_cur += clen; + ENG->cert_len -= clen; + T0_PUSH(clen); + + } + break; + case 30: { + /* copy-protocol-name */ + + size_t idx = T0_POP(); + size_t len = strlen(ENG->protocol_names[idx]); + memcpy(ENG->pad, ENG->protocol_names[idx], len); + T0_PUSH(len); + + } + break; + case 31: { + /* data-get8 */ + + size_t addr = T0_POP(); + T0_PUSH(t0_datablock[addr]); + + } + break; + case 32: { + /* discard-input */ + + ENG->hlen_in = 0; + + } + break; + case 33: { + /* do-client-sign */ + + size_t sig_len; + + sig_len = make_client_sign(CTX); + if (sig_len == 0) { + br_ssl_engine_fail(ENG, BR_ERR_INVALID_ALGORITHM); + T0_CO(); + } + T0_PUSH(sig_len); + + } + break; + case 34: { + /* do-ecdh */ + + unsigned prf_id = T0_POP(); + unsigned ecdhe = T0_POP(); + int x; + + x = make_pms_ecdh(CTX, ecdhe, prf_id); + if (x < 0) { + br_ssl_engine_fail(ENG, -x); + T0_CO(); + } else { + T0_PUSH(x); + } + + } + break; + case 35: { + /* do-rsa-encrypt */ + + int x; + + x = make_pms_rsa(CTX, T0_POP()); + if (x < 0) { + br_ssl_engine_fail(ENG, -x); + T0_CO(); + } else { + T0_PUSH(x); + } + + } + break; + case 36: { + /* do-static-ecdh */ + + unsigned prf_id = T0_POP(); + + if (make_pms_static_ecdh(CTX, prf_id) < 0) { + br_ssl_engine_fail(ENG, BR_ERR_INVALID_ALGORITHM); + T0_CO(); + } + + } + break; + case 37: { + /* drop */ + (void)T0_POP(); + } + break; + case 38: { + /* dup */ + T0_PUSH(T0_PEEK(0)); + } + break; + case 39: { + /* ext-ALPN-length */ + + size_t u, len; + + if (ENG->protocol_names_num == 0) { + T0_PUSH(0); + T0_RET(); + } + len = 6; + for (u = 0; u < ENG->protocol_names_num; u ++) { + len += 1 + strlen(ENG->protocol_names[u]); + } + T0_PUSH(len); + + } + break; + case 40: { + /* fail */ + + br_ssl_engine_fail(ENG, (int)T0_POPi()); + T0_CO(); + + } + break; + case 41: { + /* flush-record */ + + br_ssl_engine_flush_record(ENG); + + } + break; + case 42: { + /* get-client-chain */ + + uint32_t auth_types; + + auth_types = T0_POP(); + if (CTX->client_auth_vtable != NULL) { + br_ssl_client_certificate ux; + + (*CTX->client_auth_vtable)->choose(CTX->client_auth_vtable, + CTX, auth_types, &ux); + CTX->auth_type = (unsigned char)ux.auth_type; + CTX->hash_id = (unsigned char)ux.hash_id; + ENG->chain = ux.chain; + ENG->chain_len = ux.chain_len; + } else { + CTX->hash_id = 0; + ENG->chain_len = 0; + } + + } + break; + case 43: { + /* get-key-type-usages */ + + const br_x509_class *xc; + const br_x509_pkey *pk; + unsigned usages; + + xc = *(ENG->x509ctx); + pk = xc->get_pkey(ENG->x509ctx, &usages); + if (pk == NULL) { + T0_PUSH(0); + } else { + T0_PUSH(pk->key_type | usages); + } + + } + break; + case 44: { + /* get16 */ + + size_t addr = (size_t)T0_POP(); + T0_PUSH(*(uint16_t *)(void *)((unsigned char *)ENG + addr)); + + } + break; + case 45: { + /* get32 */ + + size_t addr = (size_t)T0_POP(); + T0_PUSH(*(uint32_t *)(void *)((unsigned char *)ENG + addr)); + + } + break; + case 46: { + /* get8 */ + + size_t addr = (size_t)T0_POP(); + T0_PUSH(*((unsigned char *)ENG + addr)); + + } + break; + case 47: { + /* has-input? */ + + T0_PUSHi(-(ENG->hlen_in != 0)); + + } + break; + case 48: { + /* memcmp */ + + size_t len = (size_t)T0_POP(); + void *addr2 = (unsigned char *)ENG + (size_t)T0_POP(); + void *addr1 = (unsigned char *)ENG + (size_t)T0_POP(); + int x = memcmp(addr1, addr2, len); + T0_PUSH((uint32_t)-(x == 0)); + + } + break; + case 49: { + /* memcpy */ + + size_t len = (size_t)T0_POP(); + void *src = (unsigned char *)ENG + (size_t)T0_POP(); + void *dst = (unsigned char *)ENG + (size_t)T0_POP(); + memcpy(dst, src, len); + + } + break; + case 50: { + /* mkrand */ + + size_t len = (size_t)T0_POP(); + void *addr = (unsigned char *)ENG + (size_t)T0_POP(); + br_hmac_drbg_generate(&ENG->rng, addr, len); + + } + break; + case 51: { + /* more-incoming-bytes? */ + + T0_PUSHi(ENG->hlen_in != 0 || !br_ssl_engine_recvrec_finished(ENG)); + + } + break; + case 52: { + /* multihash-init */ + + br_multihash_init(&ENG->mhash); + + } + break; + case 53: { + /* neg */ + + uint32_t a = T0_POP(); + T0_PUSH(-a); + + } + break; + case 54: { + /* not */ + + uint32_t a = T0_POP(); + T0_PUSH(~a); + + } + break; + case 55: { + /* or */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a | b); + + } + break; + case 56: { + /* over */ + T0_PUSH(T0_PEEK(1)); + } + break; + case 57: { + /* read-chunk-native */ + + size_t clen = ENG->hlen_in; + if (clen > 0) { + uint32_t addr, len; + + len = T0_POP(); + addr = T0_POP(); + if ((size_t)len < clen) { + clen = (size_t)len; + } + memcpy((unsigned char *)ENG + addr, ENG->hbuf_in, clen); + if (ENG->record_type_in == BR_SSL_HANDSHAKE) { + br_multihash_update(&ENG->mhash, ENG->hbuf_in, clen); + } + T0_PUSH(addr + (uint32_t)clen); + T0_PUSH(len - (uint32_t)clen); + ENG->hbuf_in += clen; + ENG->hlen_in -= clen; + } + + } + break; + case 58: { + /* read8-native */ + + if (ENG->hlen_in > 0) { + unsigned char x; + + x = *ENG->hbuf_in ++; + if (ENG->record_type_in == BR_SSL_HANDSHAKE) { + br_multihash_update(&ENG->mhash, &x, 1); + } + T0_PUSH(x); + ENG->hlen_in --; + } else { + T0_PUSHi(-1); + } + + } + break; + case 59: { + /* set-server-curve */ + + const br_x509_class *xc; + const br_x509_pkey *pk; + + xc = *(ENG->x509ctx); + pk = xc->get_pkey(ENG->x509ctx, NULL); + CTX->server_curve = + (pk->key_type == BR_KEYTYPE_EC) ? pk->key.ec.curve : 0; + + } + break; + case 60: { + /* set16 */ + + size_t addr = (size_t)T0_POP(); + *(uint16_t *)(void *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP(); + + } + break; + case 61: { + /* set32 */ + + size_t addr = (size_t)T0_POP(); + *(uint32_t *)(void *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP(); + + } + break; + case 62: { + /* set8 */ + + size_t addr = (size_t)T0_POP(); + *((unsigned char *)ENG + addr) = (unsigned char)T0_POP(); + + } + break; + case 63: { + /* strlen */ + + void *str = (unsigned char *)ENG + (size_t)T0_POP(); + T0_PUSH((uint32_t)strlen(str)); + + } + break; + case 64: { + /* supported-curves */ + + uint32_t x = ENG->iec == NULL ? 0 : ENG->iec->supported_curves; + T0_PUSH(x); + + } + break; + case 65: { + /* supported-hash-functions */ + + int i; + unsigned x, num; + + x = 0; + num = 0; + for (i = br_sha1_ID; i <= br_sha512_ID; i ++) { + if (br_multihash_getimpl(&ENG->mhash, i)) { + x |= 1U << i; + num ++; + } + } + T0_PUSH(x); + T0_PUSH(num); + + } + break; + case 66: { + /* supports-ecdsa? */ + + T0_PUSHi(-(ENG->iecdsa != 0)); + + } + break; + case 67: { + /* supports-rsa-sign? */ + + T0_PUSHi(-(ENG->irsavrfy != 0)); + + } + break; + case 68: { + /* swap */ + T0_SWAP(); + } + break; + case 69: { + /* switch-aesccm-in */ + + int is_client, prf_id; + unsigned cipher_key_len, tag_len; + + tag_len = T0_POP(); + cipher_key_len = T0_POP(); + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_ccm_in(ENG, is_client, prf_id, + ENG->iaes_ctrcbc, cipher_key_len, tag_len); + + } + break; + case 70: { + /* switch-aesccm-out */ + + int is_client, prf_id; + unsigned cipher_key_len, tag_len; + + tag_len = T0_POP(); + cipher_key_len = T0_POP(); + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_ccm_out(ENG, is_client, prf_id, + ENG->iaes_ctrcbc, cipher_key_len, tag_len); + + } + break; + case 71: { + /* switch-aesgcm-in */ + + int is_client, prf_id; + unsigned cipher_key_len; + + cipher_key_len = T0_POP(); + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_gcm_in(ENG, is_client, prf_id, + ENG->iaes_ctr, cipher_key_len); + + } + break; + case 72: { + /* switch-aesgcm-out */ + + int is_client, prf_id; + unsigned cipher_key_len; + + cipher_key_len = T0_POP(); + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_gcm_out(ENG, is_client, prf_id, + ENG->iaes_ctr, cipher_key_len); + + } + break; + case 73: { + /* switch-cbc-in */ + + int is_client, prf_id, mac_id, aes; + unsigned cipher_key_len; + + cipher_key_len = T0_POP(); + aes = T0_POP(); + mac_id = T0_POP(); + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_cbc_in(ENG, is_client, prf_id, mac_id, + aes ? ENG->iaes_cbcdec : ENG->ides_cbcdec, cipher_key_len); + + } + break; + case 74: { + /* switch-cbc-out */ + + int is_client, prf_id, mac_id, aes; + unsigned cipher_key_len; + + cipher_key_len = T0_POP(); + aes = T0_POP(); + mac_id = T0_POP(); + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_cbc_out(ENG, is_client, prf_id, mac_id, + aes ? ENG->iaes_cbcenc : ENG->ides_cbcenc, cipher_key_len); + + } + break; + case 75: { + /* switch-chapol-in */ + + int is_client, prf_id; + + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_chapol_in(ENG, is_client, prf_id); + + } + break; + case 76: { + /* switch-chapol-out */ + + int is_client, prf_id; + + prf_id = T0_POP(); + is_client = T0_POP(); + br_ssl_engine_switch_chapol_out(ENG, is_client, prf_id); + + } + break; + case 77: { + /* test-protocol-name */ + + size_t len = T0_POP(); + size_t u; + + for (u = 0; u < ENG->protocol_names_num; u ++) { + const char *name; + + name = ENG->protocol_names[u]; + if (len == strlen(name) && memcmp(ENG->pad, name, len) == 0) { + T0_PUSH(u); + T0_RET(); + } + } + T0_PUSHi(-1); + + } + break; + case 78: { + /* total-chain-length */ + + size_t u; + uint32_t total; + + total = 0; + for (u = 0; u < ENG->chain_len; u ++) { + total += 3 + (uint32_t)ENG->chain[u].data_len; + } + T0_PUSH(total); + + } + break; + case 79: { + /* u>> */ + + int c = (int)T0_POPi(); + uint32_t x = T0_POP(); + T0_PUSH(x >> c); + + } + break; + case 80: { + /* verify-SKE-sig */ + + size_t sig_len = T0_POP(); + int use_rsa = T0_POPi(); + int hash = T0_POPi(); + + T0_PUSH(verify_SKE_sig(CTX, hash, use_rsa, sig_len)); + + } + break; + case 81: { + /* write-blob-chunk */ + + size_t clen = ENG->hlen_out; + if (clen > 0) { + uint32_t addr, len; + + len = T0_POP(); + addr = T0_POP(); + if ((size_t)len < clen) { + clen = (size_t)len; + } + memcpy(ENG->hbuf_out, (unsigned char *)ENG + addr, clen); + if (ENG->record_type_out == BR_SSL_HANDSHAKE) { + br_multihash_update(&ENG->mhash, ENG->hbuf_out, clen); + } + T0_PUSH(addr + (uint32_t)clen); + T0_PUSH(len - (uint32_t)clen); + ENG->hbuf_out += clen; + ENG->hlen_out -= clen; + } + + } + break; + case 82: { + /* write8-native */ + + unsigned char x; + + x = (unsigned char)T0_POP(); + if (ENG->hlen_out > 0) { + if (ENG->record_type_out == BR_SSL_HANDSHAKE) { + br_multihash_update(&ENG->mhash, &x, 1); + } + *ENG->hbuf_out ++ = x; + ENG->hlen_out --; + T0_PUSHi(-1); + } else { + T0_PUSHi(0); + } + + } + break; + case 83: { + /* x509-append */ + + const br_x509_class *xc; + size_t len; + + xc = *(ENG->x509ctx); + len = T0_POP(); + xc->append(ENG->x509ctx, ENG->pad, len); + + } + break; + case 84: { + /* x509-end-cert */ + + const br_x509_class *xc; + + xc = *(ENG->x509ctx); + xc->end_cert(ENG->x509ctx); + + } + break; + case 85: { + /* x509-end-chain */ + + const br_x509_class *xc; + + xc = *(ENG->x509ctx); + T0_PUSH(xc->end_chain(ENG->x509ctx)); + + } + break; + case 86: { + /* x509-start-cert */ + + const br_x509_class *xc; + + xc = *(ENG->x509ctx); + xc->start_cert(ENG->x509ctx, T0_POP()); + + } + break; + case 87: { + /* x509-start-chain */ + + const br_x509_class *xc; + uint32_t bc; + + bc = T0_POP(); + xc = *(ENG->x509ctx); + xc->start_chain(ENG->x509ctx, bc ? ENG->server_name : NULL); + + } + break; + } + + } else { + T0_ENTER(ip, rp, t0x); + } + } +t0_exit: + ((t0_context *)t0ctx)->dp = dp; + ((t0_context *)t0ctx)->rp = rp; + ((t0_context *)t0ctx)->ip = ip; +} diff --git a/third_party/bearssl/src/ssl_io.c b/third_party/bearssl/src/ssl_io.c new file mode 100644 index 0000000..1952615 --- /dev/null +++ b/third_party/bearssl/src/ssl_io.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_ssl.h */ +void +br_sslio_init(br_sslio_context *ctx, + br_ssl_engine_context *engine, + int (*low_read)(void *read_context, + unsigned char *data, size_t len), + void *read_context, + int (*low_write)(void *write_context, + const unsigned char *data, size_t len), + void *write_context) +{ + ctx->engine = engine; + ctx->low_read = low_read; + ctx->read_context = read_context; + ctx->low_write = low_write; + ctx->write_context = write_context; +} + +/* + * Run the engine, until the specified target state is achieved, or + * an error occurs. The target state is SENDAPP, RECVAPP, or the + * combination of both (the combination matches either). When a match is + * achieved, this function returns 0. On error, it returns -1. + */ +static int +run_until(br_sslio_context *ctx, unsigned target) +{ + for (;;) { + unsigned state; + + state = br_ssl_engine_current_state(ctx->engine); + if (state & BR_SSL_CLOSED) { + return -1; + } + + /* + * If there is some record data to send, do it. This takes + * precedence over everything else. + */ + if (state & BR_SSL_SENDREC) { + unsigned char *buf; + size_t len; + int wlen; + + buf = br_ssl_engine_sendrec_buf(ctx->engine, &len); + wlen = ctx->low_write(ctx->write_context, buf, len); + if (wlen < 0) { + /* + * If we received a close_notify and we + * still send something, then we have our + * own response close_notify to send, and + * the peer is allowed by RFC 5246 not to + * wait for it. + */ + if (!ctx->engine->shutdown_recv) { + br_ssl_engine_fail( + ctx->engine, BR_ERR_IO); + } + return -1; + } + if (wlen > 0) { + br_ssl_engine_sendrec_ack(ctx->engine, wlen); + } + continue; + } + + /* + * If we reached our target, then we are finished. + */ + if (state & target) { + return 0; + } + + /* + * If some application data must be read, and we did not + * exit, then this means that we are trying to write data, + * and that's not possible until the application data is + * read. This may happen if using a shared in/out buffer, + * and the underlying protocol is not strictly half-duplex. + * This is unrecoverable here, so we report an error. + */ + if (state & BR_SSL_RECVAPP) { + return -1; + } + + /* + * If we reached that point, then either we are trying + * to read data and there is some, or the engine is stuck + * until a new record is obtained. + */ + if (state & BR_SSL_RECVREC) { + unsigned char *buf; + size_t len; + int rlen; + + buf = br_ssl_engine_recvrec_buf(ctx->engine, &len); + rlen = ctx->low_read(ctx->read_context, buf, len); + if (rlen < 0) { + br_ssl_engine_fail(ctx->engine, BR_ERR_IO); + return -1; + } + if (rlen > 0) { + br_ssl_engine_recvrec_ack(ctx->engine, rlen); + } + continue; + } + + /* + * We can reach that point if the target RECVAPP, and + * the state contains SENDAPP only. This may happen with + * a shared in/out buffer. In that case, we must flush + * the buffered data to "make room" for a new incoming + * record. + */ + br_ssl_engine_flush(ctx->engine, 0); + } +} + +/* see bearssl_ssl.h */ +int +br_sslio_read(br_sslio_context *ctx, void *dst, size_t len) +{ + unsigned char *buf; + size_t alen; + + if (len == 0) { + return 0; + } + if (run_until(ctx, BR_SSL_RECVAPP) < 0) { + return -1; + } + buf = br_ssl_engine_recvapp_buf(ctx->engine, &alen); + if (alen > len) { + alen = len; + } + memcpy(dst, buf, alen); + br_ssl_engine_recvapp_ack(ctx->engine, alen); + return (int)alen; +} + +/* see bearssl_ssl.h */ +int +br_sslio_read_all(br_sslio_context *ctx, void *dst, size_t len) +{ + unsigned char *buf; + + buf = dst; + while (len > 0) { + int rlen; + + rlen = br_sslio_read(ctx, buf, len); + if (rlen < 0) { + return -1; + } + buf += rlen; + len -= (size_t)rlen; + } + return 0; +} + +/* see bearssl_ssl.h */ +int +br_sslio_write(br_sslio_context *ctx, const void *src, size_t len) +{ + unsigned char *buf; + size_t alen; + + if (len == 0) { + return 0; + } + if (run_until(ctx, BR_SSL_SENDAPP) < 0) { + return -1; + } + buf = br_ssl_engine_sendapp_buf(ctx->engine, &alen); + if (alen > len) { + alen = len; + } + memcpy(buf, src, alen); + br_ssl_engine_sendapp_ack(ctx->engine, alen); + return (int)alen; +} + +/* see bearssl_ssl.h */ +int +br_sslio_write_all(br_sslio_context *ctx, const void *src, size_t len) +{ + const unsigned char *buf; + + buf = src; + while (len > 0) { + int wlen; + + wlen = br_sslio_write(ctx, buf, len); + if (wlen < 0) { + return -1; + } + buf += wlen; + len -= (size_t)wlen; + } + return 0; +} + +/* see bearssl_ssl.h */ +int +br_sslio_flush(br_sslio_context *ctx) +{ + /* + * We trigger a flush. We know the data is gone when there is + * no longer any record data to send, and we can either read + * or write application data. The call to run_until() does the + * job because it ensures that any assembled record data is + * first sent down the wire before considering anything else. + */ + br_ssl_engine_flush(ctx->engine, 0); + return run_until(ctx, BR_SSL_SENDAPP | BR_SSL_RECVAPP); +} + +/* see bearssl_ssl.h */ +int +br_sslio_close(br_sslio_context *ctx) +{ + br_ssl_engine_close(ctx->engine); + while (br_ssl_engine_current_state(ctx->engine) != BR_SSL_CLOSED) { + /* + * Discard any incoming application data. + */ + size_t len; + + run_until(ctx, BR_SSL_RECVAPP); + if (br_ssl_engine_recvapp_buf(ctx->engine, &len) != NULL) { + br_ssl_engine_recvapp_ack(ctx->engine, len); + } + } + return br_ssl_engine_last_error(ctx->engine) == BR_ERR_OK; +} diff --git a/third_party/bearssl/src/ssl_keyexport.c b/third_party/bearssl/src/ssl_keyexport.c new file mode 100644 index 0000000..58e6dc3 --- /dev/null +++ b/third_party/bearssl/src/ssl_keyexport.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Supported cipher suites that use SHA-384 for the PRF when selected + * for TLS 1.2. All other cipher suites are deemed to use SHA-256. + */ +static const uint16_t suites_sha384[] = { + BR_TLS_RSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384, + BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 +}; + +/* see bearssl_ssl.h */ +int +br_ssl_key_export(br_ssl_engine_context *cc, + void *dst, size_t len, const char *label, + const void *context, size_t context_len) +{ + br_tls_prf_seed_chunk chunks[4]; + br_tls_prf_impl iprf; + size_t num_chunks, u; + unsigned char tmp[2]; + int prf_id; + + if (cc->application_data != 1) { + return 0; + } + chunks[0].data = cc->client_random; + chunks[0].len = sizeof cc->client_random; + chunks[1].data = cc->server_random; + chunks[1].len = sizeof cc->server_random; + if (context != NULL) { + br_enc16be(tmp, (unsigned)context_len); + chunks[2].data = tmp; + chunks[2].len = 2; + chunks[3].data = context; + chunks[3].len = context_len; + num_chunks = 4; + } else { + num_chunks = 2; + } + prf_id = BR_SSLPRF_SHA256; + for (u = 0; u < (sizeof suites_sha384) / sizeof(uint16_t); u ++) { + if (suites_sha384[u] == cc->session.cipher_suite) { + prf_id = BR_SSLPRF_SHA384; + } + } + iprf = br_ssl_engine_get_PRF(cc, prf_id); + iprf(dst, len, + cc->session.master_secret, sizeof cc->session.master_secret, + label, num_chunks, chunks); + return 1; +} diff --git a/third_party/bearssl/src/ssl_lru.c b/third_party/bearssl/src/ssl_lru.c new file mode 100644 index 0000000..4c71011 --- /dev/null +++ b/third_party/bearssl/src/ssl_lru.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Each entry consists in a fixed number of bytes. Entries are concatenated + * in the store block. "Addresses" are really offsets in the block, + * expressed over 32 bits (so the cache may have size at most 4 GB, which + * "ought to be enough for everyone"). The "null address" is 0xFFFFFFFF. + * Note that since the storage block alignment is in no way guaranteed, we + * perform only accesses that can handle unaligned data. + * + * Two concurrent data structures are maintained: + * + * -- Entries are organised in a doubly-linked list; saved entries are added + * at the head, and loaded entries are moved to the head. Eviction uses + * the list tail (this is the LRU algorithm). + * + * -- Entries are indexed with a binary tree: all left descendants of a + * node have a lower session ID (in lexicographic order), while all + * right descendants have a higher session ID. The tree is heuristically + * balanced. + * + * Entry format: + * + * session ID 32 bytes + * master secret 48 bytes + * protocol version 2 bytes (big endian) + * cipher suite 2 bytes (big endian) + * list prev 4 bytes (big endian) + * list next 4 bytes (big endian) + * tree left child 4 bytes (big endian) + * tree right child 4 bytes (big endian) + * + * If an entry has a protocol version set to 0, then it is "disabled": + * it was a session pushed to the cache at some point, but it has + * been explicitly removed. + * + * We need to keep the tree balanced because an attacker could make + * handshakes, selecting some specific sessions (by reusing them) to + * try to make us make an imbalanced tree that makes lookups expensive + * (a denial-of-service attack that would persist as long as the cache + * remains, i.e. even after the attacker made all his connections). + * To do that, we replace the session ID (or the start of the session ID) + * with a HMAC value computed over the replaced part; the hash function + * implementation and the key are obtained from the server context upon + * first save() call. + * + * Theoretically, an attacker could use the exact timing of the lookup + * to infer the current tree topology, and try to revive entries to make + * it as unbalanced as possible. However, since the session ID are + * chosen randomly by the server, and the attacker cannot see the + * indexing values and must thus rely on blind selection, it should be + * exponentially difficult for the attacker to maintain a large + * imbalance. + */ +#define SESSION_ID_LEN 32 +#define MASTER_SECRET_LEN 48 + +#define SESSION_ID_OFF 0 +#define MASTER_SECRET_OFF 32 +#define VERSION_OFF 80 +#define CIPHER_SUITE_OFF 82 +#define LIST_PREV_OFF 84 +#define LIST_NEXT_OFF 88 +#define TREE_LEFT_OFF 92 +#define TREE_RIGHT_OFF 96 + +#define LRU_ENTRY_LEN 100 + +#define ADDR_NULL ((uint32_t)-1) + +#define GETSET(name, off) \ +static inline uint32_t get_ ## name(br_ssl_session_cache_lru *cc, uint32_t x) \ +{ \ + return br_dec32be(cc->store + x + (off)); \ +} \ +static inline void set_ ## name(br_ssl_session_cache_lru *cc, \ + uint32_t x, uint32_t val) \ +{ \ + br_enc32be(cc->store + x + (off), val); \ +} + +GETSET(prev, LIST_PREV_OFF) +GETSET(next, LIST_NEXT_OFF) +GETSET(left, TREE_LEFT_OFF) +GETSET(right, TREE_RIGHT_OFF) + +/* + * Transform the session ID by replacing the first N bytes with a HMAC + * value computed over these bytes, using the random key K (the HMAC + * value is truncated if needed). HMAC will use the same hash function + * as the DRBG in the SSL server context, so with SHA-256, SHA-384, + * or SHA-1, depending on what is available. + * + * The risk of collision is considered too small to be a concern; and + * the impact of a collision is low (the handshake won't succeed). This + * risk is much lower than any transmission error, which would lead to + * the same consequences. + * + * Source and destination arrays msut be disjoint. + */ +static void +mask_id(br_ssl_session_cache_lru *cc, + const unsigned char *src, unsigned char *dst) +{ + br_hmac_key_context hkc; + br_hmac_context hc; + + memcpy(dst, src, SESSION_ID_LEN); + br_hmac_key_init(&hkc, cc->hash, cc->index_key, sizeof cc->index_key); + br_hmac_init(&hc, &hkc, SESSION_ID_LEN); + br_hmac_update(&hc, src, SESSION_ID_LEN); + br_hmac_out(&hc, dst); +} + +/* + * Find a node by ID. Returned value is the node address, or ADDR_NULL if + * the node is not found. + * + * If addr_link is not NULL, then '*addr_link' is set to the address of the + * last followed link. If the found node is the root, or if the tree is + * empty, then '*addr_link' is set to ADDR_NULL. + */ +static uint32_t +find_node(br_ssl_session_cache_lru *cc, const unsigned char *id, + uint32_t *addr_link) +{ + uint32_t x, y; + + x = cc->root; + y = ADDR_NULL; + while (x != ADDR_NULL) { + int r; + + r = memcmp(id, cc->store + x + SESSION_ID_OFF, SESSION_ID_LEN); + if (r < 0) { + y = x + TREE_LEFT_OFF; + x = get_left(cc, x); + } else if (r == 0) { + if (addr_link != NULL) { + *addr_link = y; + } + return x; + } else { + y = x + TREE_RIGHT_OFF; + x = get_right(cc, x); + } + } + if (addr_link != NULL) { + *addr_link = y; + } + return ADDR_NULL; +} + +/* + * For node x, find its replacement upon removal. + * + * -- If node x has no child, then this returns ADDR_NULL. + * -- Otherwise, if node x has a left child, then the replacement is the + * rightmost left-descendent. + * -- Otherwise, the replacement is the leftmost right-descendent. + * + * If a node is returned, then '*al' is set to the address of the field + * that points to that node. Otherwise (node x has no child), '*al' is + * set to ADDR_NULL. + * + * Note that the replacement node, when found, is always a descendent + * of node 'x', so it cannot be the tree root. Thus, '*al' can be set + * to ADDR_NULL only when no node is found and ADDR_NULL is returned. + */ +static uint32_t +find_replacement_node(br_ssl_session_cache_lru *cc, uint32_t x, uint32_t *al) +{ + uint32_t y1, y2; + + y1 = get_left(cc, x); + if (y1 != ADDR_NULL) { + y2 = x + TREE_LEFT_OFF; + for (;;) { + uint32_t z; + + z = get_right(cc, y1); + if (z == ADDR_NULL) { + *al = y2; + return y1; + } + y2 = y1 + TREE_RIGHT_OFF; + y1 = z; + } + } + y1 = get_right(cc, x); + if (y1 != ADDR_NULL) { + y2 = x + TREE_RIGHT_OFF; + for (;;) { + uint32_t z; + + z = get_left(cc, y1); + if (z == ADDR_NULL) { + *al = y2; + return y1; + } + y2 = y1 + TREE_LEFT_OFF; + y1 = z; + } + } + *al = ADDR_NULL; + return ADDR_NULL; +} + +/* + * Set the link at address 'alx' to point to node 'x'. If 'alx' is + * ADDR_NULL, then this sets the tree root to 'x'. + */ +static inline void +set_link(br_ssl_session_cache_lru *cc, uint32_t alx, uint32_t x) +{ + if (alx == ADDR_NULL) { + cc->root = x; + } else { + br_enc32be(cc->store + alx, x); + } +} + +/* + * Remove node 'x' from the tree. This function shall not be called if + * node 'x' is not part of the tree. + */ +static void +remove_node(br_ssl_session_cache_lru *cc, uint32_t x) +{ + uint32_t alx, y, aly; + + /* + * Removal algorithm: + * ------------------ + * + * - If we remove the root, then the tree becomes empty. + * + * - If the removed node has no child, then we can simply remove + * it, with nothing else to do. + * + * - Otherwise, the removed node must be replaced by either its + * rightmost left-descendent, or its leftmost right-descendent. + * The replacement node itself must be removed from its current + * place. By definition, that replacement node has either no + * child, or at most a single child that will replace it in the + * tree. + */ + + /* + * Find node back and its ancestor link. If the node was the + * root, then alx is set to ADDR_NULL. + */ + find_node(cc, cc->store + x + SESSION_ID_OFF, &alx); + + /* + * Find replacement node 'y', and 'aly' is set to the address of + * the link to that replacement node. If the removed node has no + * child, then both 'y' and 'aly' are set to ADDR_NULL. + */ + y = find_replacement_node(cc, x, &aly); + + if (y != ADDR_NULL) { + uint32_t z; + + /* + * The unlinked replacement node may have one child (but + * not two) that takes its place. + */ + z = get_left(cc, y); + if (z == ADDR_NULL) { + z = get_right(cc, y); + } + set_link(cc, aly, z); + + /* + * Link the replacement node in its new place, overwriting + * the current link to the node 'x' (which removes 'x'). + */ + set_link(cc, alx, y); + + /* + * The replacement node adopts the left and right children + * of the removed node. Note that this also works even if + * the replacement node was a direct descendent of the + * removed node, since we unlinked it previously. + */ + set_left(cc, y, get_left(cc, x)); + set_right(cc, y, get_right(cc, x)); + } else { + /* + * No replacement, we simply unlink the node 'x'. + */ + set_link(cc, alx, ADDR_NULL); + } +} + +static void +lru_save(const br_ssl_session_cache_class **ctx, + br_ssl_server_context *server_ctx, + const br_ssl_session_parameters *params) +{ + br_ssl_session_cache_lru *cc; + unsigned char id[SESSION_ID_LEN]; + uint32_t x, alx; + + cc = (br_ssl_session_cache_lru *)ctx; + + /* + * If the buffer is too small, we don't record anything. This + * test avoids problems in subsequent code. + */ + if (cc->store_len < LRU_ENTRY_LEN) { + return; + } + + /* + * Upon the first save in a session cache instance, we obtain + * a random key for our indexing. + */ + if (!cc->init_done) { + br_hmac_drbg_generate(&server_ctx->eng.rng, + cc->index_key, sizeof cc->index_key); + cc->hash = br_hmac_drbg_get_hash(&server_ctx->eng.rng); + cc->init_done = 1; + } + mask_id(cc, params->session_id, id); + + /* + * Look for the node in the tree. If the same ID is already used, + * then reject it. This is a collision event, which should be + * exceedingly rare. + * Note: we do NOT record the emplacement here, because the + * removal of an entry may change the tree topology. + */ + if (find_node(cc, id, NULL) != ADDR_NULL) { + return; + } + + /* + * Find some room for the new parameters. If the cache is not + * full yet, add it to the end of the area and bump the pointer up. + * Otherwise, evict the list tail entry. Note that we already + * filtered out the case of a ridiculously small buffer that + * cannot hold any entry at all; thus, if there is no room for an + * extra entry, then the cache cannot be empty. + */ + if (cc->store_ptr > (cc->store_len - LRU_ENTRY_LEN)) { + /* + * Evict tail. If the buffer has room for a single entry, + * then this may also be the head. + */ + x = cc->tail; + cc->tail = get_prev(cc, x); + if (cc->tail == ADDR_NULL) { + cc->head = ADDR_NULL; + } else { + set_next(cc, cc->tail, ADDR_NULL); + } + + /* + * Remove the node from the tree. + */ + remove_node(cc, x); + } else { + /* + * Allocate room for new node. + */ + x = cc->store_ptr; + cc->store_ptr += LRU_ENTRY_LEN; + } + + /* + * Find the emplacement for the new node, and link it. + */ + find_node(cc, id, &alx); + set_link(cc, alx, x); + set_left(cc, x, ADDR_NULL); + set_right(cc, x, ADDR_NULL); + + /* + * New entry becomes new list head. It may also become the list + * tail if the cache was empty at that point. + */ + if (cc->head == ADDR_NULL) { + cc->tail = x; + } else { + set_prev(cc, cc->head, x); + } + set_prev(cc, x, ADDR_NULL); + set_next(cc, x, cc->head); + cc->head = x; + + /* + * Fill data in the entry. + */ + memcpy(cc->store + x + SESSION_ID_OFF, id, SESSION_ID_LEN); + memcpy(cc->store + x + MASTER_SECRET_OFF, + params->master_secret, MASTER_SECRET_LEN); + br_enc16be(cc->store + x + VERSION_OFF, params->version); + br_enc16be(cc->store + x + CIPHER_SUITE_OFF, params->cipher_suite); +} + +static int +lru_load(const br_ssl_session_cache_class **ctx, + br_ssl_server_context *server_ctx, + br_ssl_session_parameters *params) +{ + br_ssl_session_cache_lru *cc; + unsigned char id[SESSION_ID_LEN]; + uint32_t x; + + (void)server_ctx; + cc = (br_ssl_session_cache_lru *)ctx; + if (!cc->init_done) { + return 0; + } + mask_id(cc, params->session_id, id); + x = find_node(cc, id, NULL); + if (x != ADDR_NULL) { + unsigned version; + + version = br_dec16be(cc->store + x + VERSION_OFF); + if (version == 0) { + /* + * Entry is disabled, we pretend we did not find it. + * Notably, we don't move it to the front of the + * LRU list. + */ + return 0; + } + params->version = version; + params->cipher_suite = br_dec16be( + cc->store + x + CIPHER_SUITE_OFF); + memcpy(params->master_secret, + cc->store + x + MASTER_SECRET_OFF, + MASTER_SECRET_LEN); + if (x != cc->head) { + /* + * Found node is not at list head, so move + * it to the head. + */ + uint32_t p, n; + + p = get_prev(cc, x); + n = get_next(cc, x); + set_next(cc, p, n); + if (n == ADDR_NULL) { + cc->tail = p; + } else { + set_prev(cc, n, p); + } + set_prev(cc, cc->head, x); + set_next(cc, x, cc->head); + set_prev(cc, x, ADDR_NULL); + cc->head = x; + } + return 1; + } + return 0; +} + +static const br_ssl_session_cache_class lru_class = { + sizeof(br_ssl_session_cache_lru), + &lru_save, + &lru_load +}; + +/* see inner.h */ +void +br_ssl_session_cache_lru_init(br_ssl_session_cache_lru *cc, + unsigned char *store, size_t store_len) +{ + cc->vtable = &lru_class; + cc->store = store; + cc->store_len = store_len; + cc->store_ptr = 0; + cc->init_done = 0; + cc->head = ADDR_NULL; + cc->tail = ADDR_NULL; + cc->root = ADDR_NULL; +} + +/* see bearssl_ssl.h */ +void br_ssl_session_cache_lru_forget( + br_ssl_session_cache_lru *cc, const unsigned char *id) +{ + unsigned char mid[SESSION_ID_LEN]; + uint32_t addr; + + /* + * If the cache is not initialised yet, then it is empty, and + * there is nothing to forget. + */ + if (!cc->init_done) { + return; + } + + /* + * Look for the node in the tree. If found, the entry is marked + * as "disabled"; it will be reused in due course, as it ages + * through the list. + * + * We do not go through the complex moves of actually releasing + * the entry right away because explicitly forgetting sessions + * should be a rare event, meant mostly for testing purposes, + * so this is not worth the extra code size. + */ + mask_id(cc, id, mid); + addr = find_node(cc, mid, NULL); + if (addr != ADDR_NULL) { + br_enc16be(cc->store + addr + VERSION_OFF, 0); + } +} diff --git a/third_party/bearssl/src/ssl_rec_cbc.c b/third_party/bearssl/src/ssl_rec_cbc.c new file mode 100644 index 0000000..c38cbfd --- /dev/null +++ b/third_party/bearssl/src/ssl_rec_cbc.c @@ -0,0 +1,440 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static void +in_cbc_init(br_sslrec_in_cbc_context *cc, + const br_block_cbcdec_class *bc_impl, + const void *bc_key, size_t bc_key_len, + const br_hash_class *dig_impl, + const void *mac_key, size_t mac_key_len, size_t mac_out_len, + const void *iv) +{ + cc->vtable = &br_sslrec_in_cbc_vtable; + cc->seq = 0; + bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len); + br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len); + cc->mac_len = mac_out_len; + if (iv == NULL) { + memset(cc->iv, 0, sizeof cc->iv); + cc->explicit_IV = 1; + } else { + memcpy(cc->iv, iv, bc_impl->block_size); + cc->explicit_IV = 0; + } +} + +static int +cbc_check_length(const br_sslrec_in_cbc_context *cc, size_t rlen) +{ + /* + * Plaintext size: at most 16384 bytes + * Padding: at most 256 bytes + * MAC: mac_len extra bytes + * TLS 1.1+: each record has an explicit IV + * + * Minimum length includes at least one byte of padding, and the + * MAC. + * + * Total length must be a multiple of the block size. + */ + size_t blen; + size_t min_len, max_len; + + blen = cc->bc.vtable->block_size; + min_len = (blen + cc->mac_len) & ~(blen - 1); + max_len = (16384 + 256 + cc->mac_len) & ~(blen - 1); + if (cc->explicit_IV) { + min_len += blen; + max_len += blen; + } + return min_len <= rlen && rlen <= max_len; +} + +/* + * Rotate array buf[] of length 'len' to the left (towards low indices) + * by 'num' bytes if ctl is 1; otherwise, leave it unchanged. This is + * constant-time. 'num' MUST be lower than 'len'. 'len' MUST be lower + * than or equal to 64. + */ +static void +cond_rotate(uint32_t ctl, unsigned char *buf, size_t len, size_t num) +{ + unsigned char tmp[64]; + size_t u, v; + + for (u = 0, v = num; u < len; u ++) { + tmp[u] = MUX(ctl, buf[v], buf[u]); + if (++ v == len) { + v = 0; + } + } + memcpy(buf, tmp, len); +} + +static unsigned char * +cbc_decrypt(br_sslrec_in_cbc_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + /* + * We represent all lengths on 32-bit integers, because: + * -- SSL record lengths always fit in 32 bits; + * -- our constant-time primitives operate on 32-bit integers. + */ + unsigned char *buf; + uint32_t u, v, len, blen, min_len, max_len; + uint32_t good, pad_len, rot_count, len_withmac, len_nomac; + unsigned char tmp1[64], tmp2[64]; + int i; + br_hmac_context hc; + + buf = data; + len = *data_len; + blen = cc->bc.vtable->block_size; + + /* + * Decrypt data, and skip the explicit IV (if applicable). Note + * that the total length is supposed to have been verified by + * the caller. If there is an explicit IV, then we actually + * "decrypt" it using the implicit IV (from previous record), + * which is useless but harmless. + */ + cc->bc.vtable->run(&cc->bc.vtable, cc->iv, data, len); + if (cc->explicit_IV) { + buf += blen; + len -= blen; + } + + /* + * Compute minimum and maximum length of plaintext + MAC. These + * lengths can be inferred from the outside: they are not secret. + */ + min_len = (cc->mac_len + 256 < len) ? len - 256 : cc->mac_len; + max_len = len - 1; + + /* + * Use the last decrypted byte to compute the actual payload + * length. Take care not to overflow (we use unsigned types). + */ + pad_len = buf[max_len]; + good = LE(pad_len, (uint32_t)(max_len - min_len)); + len = MUX(good, (uint32_t)(max_len - pad_len), min_len); + + /* + * Check padding contents: all padding bytes must be equal to + * the value of pad_len. + */ + for (u = min_len; u < max_len; u ++) { + good &= LT(u, len) | EQ(buf[u], pad_len); + } + + /* + * Extract the MAC value. This is done in one pass, but results + * in a "rotated" MAC value depending on where it actually + * occurs. The 'rot_count' value is set to the offset of the + * first MAC byte within tmp1[]. + * + * min_len and max_len are also adjusted to the minimum and + * maximum lengths of the plaintext alone (without the MAC). + */ + len_withmac = (uint32_t)len; + len_nomac = len_withmac - cc->mac_len; + min_len -= cc->mac_len; + rot_count = 0; + memset(tmp1, 0, cc->mac_len); + v = 0; + for (u = min_len; u < max_len; u ++) { + tmp1[v] |= MUX(GE(u, len_nomac) & LT(u, len_withmac), + buf[u], 0x00); + rot_count = MUX(EQ(u, len_nomac), v, rot_count); + if (++ v == cc->mac_len) { + v = 0; + } + } + max_len -= cc->mac_len; + + /* + * Rotate back the MAC value. The loop below does the constant-time + * rotation in time n*log n for a MAC output of length n. We assume + * that the MAC output length is no more than 64 bytes, so the + * rotation count fits on 6 bits. + */ + for (i = 5; i >= 0; i --) { + uint32_t rc; + + rc = (uint32_t)1 << i; + cond_rotate(rot_count >> i, tmp1, cc->mac_len, rc); + rot_count &= ~rc; + } + + /* + * Recompute the HMAC value. The input is the concatenation of + * the sequence number (8 bytes), the record header (5 bytes), + * and the payload. + * + * At that point, min_len is the minimum plaintext length, but + * max_len still includes the MAC length. + */ + br_enc64be(tmp2, cc->seq ++); + tmp2[8] = (unsigned char)record_type; + br_enc16be(tmp2 + 9, version); + br_enc16be(tmp2 + 11, len_nomac); + br_hmac_init(&hc, &cc->mac, cc->mac_len); + br_hmac_update(&hc, tmp2, 13); + br_hmac_outCT(&hc, buf, len_nomac, min_len, max_len, tmp2); + + /* + * Compare the extracted and recomputed MAC values. + */ + for (u = 0; u < cc->mac_len; u ++) { + good &= EQ0(tmp1[u] ^ tmp2[u]); + } + + /* + * Check that the plaintext length is valid. The previous + * check was on the encrypted length, but the padding may have + * turned shorter than expected. + * + * Once this final test is done, the critical "constant-time" + * section ends and we can make conditional jumps again. + */ + good &= LE(len_nomac, 16384); + + if (!good) { + return 0; + } + *data_len = len_nomac; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable = { + { + sizeof(br_sslrec_in_cbc_context), + (int (*)(const br_sslrec_in_class *const *, size_t)) + &cbc_check_length, + (unsigned char *(*)(const br_sslrec_in_class **, + int, unsigned, void *, size_t *)) + &cbc_decrypt + }, + (void (*)(const br_sslrec_in_cbc_class **, + const br_block_cbcdec_class *, const void *, size_t, + const br_hash_class *, const void *, size_t, size_t, + const void *)) + &in_cbc_init +}; + +/* + * For CBC output: + * + * -- With TLS 1.1+, there is an explicit IV. Generation method uses + * HMAC, computed over the current sequence number, and the current MAC + * key. The resulting value is truncated to the size of a block, and + * added at the head of the plaintext; it will get encrypted along with + * the data. This custom generation mechanism is "safe" under the + * assumption that HMAC behaves like a random oracle; since the MAC for + * a record is computed over the concatenation of the sequence number, + * the record header and the plaintext, the HMAC-for-IV will not collide + * with the normal HMAC. + * + * -- With TLS 1.0, for application data, we want to enforce a 1/n-1 + * split, as a countermeasure against chosen-plaintext attacks. We thus + * need to leave some room in the buffer for that extra record. + */ + +static void +out_cbc_init(br_sslrec_out_cbc_context *cc, + const br_block_cbcenc_class *bc_impl, + const void *bc_key, size_t bc_key_len, + const br_hash_class *dig_impl, + const void *mac_key, size_t mac_key_len, size_t mac_out_len, + const void *iv) +{ + cc->vtable = &br_sslrec_out_cbc_vtable; + cc->seq = 0; + bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len); + br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len); + cc->mac_len = mac_out_len; + if (iv == NULL) { + memset(cc->iv, 0, sizeof cc->iv); + cc->explicit_IV = 1; + } else { + memcpy(cc->iv, iv, bc_impl->block_size); + cc->explicit_IV = 0; + } +} + +static void +cbc_max_plaintext(const br_sslrec_out_cbc_context *cc, + size_t *start, size_t *end) +{ + size_t blen, len; + + blen = cc->bc.vtable->block_size; + if (cc->explicit_IV) { + *start += blen; + } else { + *start += 4 + ((cc->mac_len + blen + 1) & ~(blen - 1)); + } + len = (*end - *start) & ~(blen - 1); + len -= 1 + cc->mac_len; + if (len > 16384) { + len = 16384; + } + *end = *start + len; +} + +static unsigned char * +cbc_encrypt(br_sslrec_out_cbc_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + unsigned char *buf, *rbuf; + size_t len, blen, plen; + unsigned char tmp[13]; + br_hmac_context hc; + + buf = data; + len = *data_len; + blen = cc->bc.vtable->block_size; + + /* + * If using TLS 1.0, with more than one byte of plaintext, and + * the record is application data, then we need to compute + * a "split". We do not perform the split on other record types + * because it turned out that some existing, deployed + * implementations of SSL/TLS do not tolerate the splitting of + * some message types (in particular the Finished message). + * + * If using TLS 1.1+, then there is an explicit IV. We produce + * that IV by adding an extra initial plaintext block, whose + * value is computed with HMAC over the record sequence number. + */ + if (cc->explicit_IV) { + /* + * We use here the fact that all the HMAC variants we + * support can produce at least 16 bytes, while all the + * block ciphers we support have blocks of no more than + * 16 bytes. Thus, we can always truncate the HMAC output + * down to the block size. + */ + br_enc64be(tmp, cc->seq); + br_hmac_init(&hc, &cc->mac, blen); + br_hmac_update(&hc, tmp, 8); + br_hmac_out(&hc, buf - blen); + rbuf = buf - blen - 5; + } else { + if (len > 1 && record_type == BR_SSL_APPLICATION_DATA) { + /* + * To do the split, we use a recursive invocation; + * since we only give one byte to the inner call, + * the recursion stops there. + * + * We need to compute the exact size of the extra + * record, so that the two resulting records end up + * being sequential in RAM. + * + * We use here the fact that cbc_max_plaintext() + * adjusted the start offset to leave room for the + * initial fragment. + */ + size_t xlen; + + rbuf = buf - 4 + - ((cc->mac_len + blen + 1) & ~(blen - 1)); + rbuf[0] = buf[0]; + xlen = 1; + rbuf = cbc_encrypt(cc, record_type, + version, rbuf, &xlen); + buf ++; + len --; + } else { + rbuf = buf - 5; + } + } + + /* + * Compute MAC. + */ + br_enc64be(tmp, cc->seq ++); + tmp[8] = record_type; + br_enc16be(tmp + 9, version); + br_enc16be(tmp + 11, len); + br_hmac_init(&hc, &cc->mac, cc->mac_len); + br_hmac_update(&hc, tmp, 13); + br_hmac_update(&hc, buf, len); + br_hmac_out(&hc, buf + len); + len += cc->mac_len; + + /* + * Add padding. + */ + plen = blen - (len & (blen - 1)); + memset(buf + len, (unsigned)plen - 1, plen); + len += plen; + + /* + * If an explicit IV is used, the corresponding extra block was + * already put in place earlier; we just have to account for it + * here. + */ + if (cc->explicit_IV) { + buf -= blen; + len += blen; + } + + /* + * Encrypt the whole thing. If there is an explicit IV, we also + * encrypt it, which is fine (encryption of a uniformly random + * block is still a uniformly random block). + */ + cc->bc.vtable->run(&cc->bc.vtable, cc->iv, buf, len); + + /* + * Add the header and return. + */ + buf[-5] = record_type; + br_enc16be(buf - 4, version); + br_enc16be(buf - 2, len); + *data_len = (size_t)((buf + len) - rbuf); + return rbuf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable = { + { + sizeof(br_sslrec_out_cbc_context), + (void (*)(const br_sslrec_out_class *const *, + size_t *, size_t *)) + &cbc_max_plaintext, + (unsigned char *(*)(const br_sslrec_out_class **, + int, unsigned, void *, size_t *)) + &cbc_encrypt + }, + (void (*)(const br_sslrec_out_cbc_class **, + const br_block_cbcenc_class *, const void *, size_t, + const br_hash_class *, const void *, size_t, size_t, + const void *)) + &out_cbc_init +}; diff --git a/third_party/bearssl/src/ssl_rec_ccm.c b/third_party/bearssl/src/ssl_rec_ccm.c new file mode 100644 index 0000000..92c3295 --- /dev/null +++ b/third_party/bearssl/src/ssl_rec_ccm.c @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2018 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * CCM initialisation. This does everything except setting the vtable, + * which depends on whether this is a context for encrypting or for + * decrypting. + */ +static void +gen_ccm_init(br_sslrec_ccm_context *cc, + const br_block_ctrcbc_class *bc_impl, + const void *key, size_t key_len, + const void *iv, size_t tag_len) +{ + cc->seq = 0; + bc_impl->init(&cc->bc.vtable, key, key_len); + memcpy(cc->iv, iv, sizeof cc->iv); + cc->tag_len = tag_len; +} + +static void +in_ccm_init(br_sslrec_ccm_context *cc, + const br_block_ctrcbc_class *bc_impl, + const void *key, size_t key_len, + const void *iv, size_t tag_len) +{ + cc->vtable.in = &br_sslrec_in_ccm_vtable; + gen_ccm_init(cc, bc_impl, key, key_len, iv, tag_len); +} + +static int +ccm_check_length(const br_sslrec_ccm_context *cc, size_t rlen) +{ + /* + * CCM overhead is 8 bytes for nonce_explicit, and the tag + * (normally 8 or 16 bytes, depending on cipher suite). + */ + size_t over; + + over = 8 + cc->tag_len; + return rlen >= over && rlen <= (16384 + over); +} + +static unsigned char * +ccm_decrypt(br_sslrec_ccm_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + br_ccm_context zc; + unsigned char *buf; + unsigned char nonce[12], header[13]; + size_t len; + + buf = (unsigned char *)data + 8; + len = *data_len - (8 + cc->tag_len); + + /* + * Make nonce (implicit + explicit parts). + */ + memcpy(nonce, cc->iv, sizeof cc->iv); + memcpy(nonce + 4, data, 8); + + /* + * Assemble synthetic header for the AAD. + */ + br_enc64be(header, cc->seq ++); + header[8] = (unsigned char)record_type; + br_enc16be(header + 9, version); + br_enc16be(header + 11, len); + + /* + * Perform CCM decryption. + */ + br_ccm_init(&zc, &cc->bc.vtable); + br_ccm_reset(&zc, nonce, sizeof nonce, sizeof header, len, cc->tag_len); + br_ccm_aad_inject(&zc, header, sizeof header); + br_ccm_flip(&zc); + br_ccm_run(&zc, 0, buf, len); + if (!br_ccm_check_tag(&zc, buf + len)) { + return NULL; + } + *data_len = len; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_in_ccm_class br_sslrec_in_ccm_vtable = { + { + sizeof(br_sslrec_ccm_context), + (int (*)(const br_sslrec_in_class *const *, size_t)) + &ccm_check_length, + (unsigned char *(*)(const br_sslrec_in_class **, + int, unsigned, void *, size_t *)) + &ccm_decrypt + }, + (void (*)(const br_sslrec_in_ccm_class **, + const br_block_ctrcbc_class *, const void *, size_t, + const void *, size_t)) + &in_ccm_init +}; + +static void +out_ccm_init(br_sslrec_ccm_context *cc, + const br_block_ctrcbc_class *bc_impl, + const void *key, size_t key_len, + const void *iv, size_t tag_len) +{ + cc->vtable.out = &br_sslrec_out_ccm_vtable; + gen_ccm_init(cc, bc_impl, key, key_len, iv, tag_len); +} + +static void +ccm_max_plaintext(const br_sslrec_ccm_context *cc, + size_t *start, size_t *end) +{ + size_t len; + + *start += 8; + len = *end - *start - cc->tag_len; + if (len > 16384) { + len = 16384; + } + *end = *start + len; +} + +static unsigned char * +ccm_encrypt(br_sslrec_ccm_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + br_ccm_context zc; + unsigned char *buf; + unsigned char nonce[12], header[13]; + size_t len; + + buf = (unsigned char *)data; + len = *data_len; + + /* + * Make nonce; the explicit part is an encoding of the sequence + * number. + */ + memcpy(nonce, cc->iv, sizeof cc->iv); + br_enc64be(nonce + 4, cc->seq); + + /* + * Assemble synthetic header for the AAD. + */ + br_enc64be(header, cc->seq ++); + header[8] = (unsigned char)record_type; + br_enc16be(header + 9, version); + br_enc16be(header + 11, len); + + /* + * Perform CCM encryption. + */ + br_ccm_init(&zc, &cc->bc.vtable); + br_ccm_reset(&zc, nonce, sizeof nonce, sizeof header, len, cc->tag_len); + br_ccm_aad_inject(&zc, header, sizeof header); + br_ccm_flip(&zc); + br_ccm_run(&zc, 1, buf, len); + br_ccm_get_tag(&zc, buf + len); + + /* + * Assemble header and adjust pointer/length. + */ + len += 8 + cc->tag_len; + buf -= 13; + memcpy(buf + 5, nonce + 4, 8); + buf[0] = (unsigned char)record_type; + br_enc16be(buf + 1, version); + br_enc16be(buf + 3, len); + *data_len = len + 5; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_out_ccm_class br_sslrec_out_ccm_vtable = { + { + sizeof(br_sslrec_ccm_context), + (void (*)(const br_sslrec_out_class *const *, + size_t *, size_t *)) + &ccm_max_plaintext, + (unsigned char *(*)(const br_sslrec_out_class **, + int, unsigned, void *, size_t *)) + &ccm_encrypt + }, + (void (*)(const br_sslrec_out_ccm_class **, + const br_block_ctrcbc_class *, const void *, size_t, + const void *, size_t)) + &out_ccm_init +}; diff --git a/third_party/bearssl/src/ssl_rec_chapol.c b/third_party/bearssl/src/ssl_rec_chapol.c new file mode 100644 index 0000000..73b3c78 --- /dev/null +++ b/third_party/bearssl/src/ssl_rec_chapol.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static void +gen_chapol_init(br_sslrec_chapol_context *cc, + br_chacha20_run ichacha, br_poly1305_run ipoly, + const void *key, const void *iv) +{ + cc->seq = 0; + cc->ichacha = ichacha; + cc->ipoly = ipoly; + memcpy(cc->key, key, sizeof cc->key); + memcpy(cc->iv, iv, sizeof cc->iv); +} + +static void +gen_chapol_process(br_sslrec_chapol_context *cc, + int record_type, unsigned version, void *data, size_t len, + void *tag, int encrypt) +{ + unsigned char header[13]; + unsigned char nonce[12]; + uint64_t seq; + size_t u; + + seq = cc->seq ++; + br_enc64be(header, seq); + header[8] = (unsigned char)record_type; + br_enc16be(header + 9, version); + br_enc16be(header + 11, len); + memcpy(nonce, cc->iv, 12); + for (u = 0; u < 8; u ++) { + nonce[11 - u] ^= (unsigned char)seq; + seq >>= 8; + } + cc->ipoly(cc->key, nonce, data, len, header, sizeof header, + tag, cc->ichacha, encrypt); +} + +static void +in_chapol_init(br_sslrec_chapol_context *cc, + br_chacha20_run ichacha, br_poly1305_run ipoly, + const void *key, const void *iv) +{ + cc->vtable.in = &br_sslrec_in_chapol_vtable; + gen_chapol_init(cc, ichacha, ipoly, key, iv); +} + +static int +chapol_check_length(const br_sslrec_chapol_context *cc, size_t rlen) +{ + /* + * Overhead is just the authentication tag (16 bytes). + */ + (void)cc; + return rlen >= 16 && rlen <= (16384 + 16); +} + +static unsigned char * +chapol_decrypt(br_sslrec_chapol_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + unsigned char *buf; + size_t u, len; + unsigned char tag[16]; + unsigned bad; + + buf = data; + len = *data_len - 16; + gen_chapol_process(cc, record_type, version, buf, len, tag, 0); + bad = 0; + for (u = 0; u < 16; u ++) { + bad |= tag[u] ^ buf[len + u]; + } + if (bad) { + return NULL; + } + *data_len = len; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_in_chapol_class br_sslrec_in_chapol_vtable = { + { + sizeof(br_sslrec_chapol_context), + (int (*)(const br_sslrec_in_class *const *, size_t)) + &chapol_check_length, + (unsigned char *(*)(const br_sslrec_in_class **, + int, unsigned, void *, size_t *)) + &chapol_decrypt + }, + (void (*)(const br_sslrec_in_chapol_class **, + br_chacha20_run, br_poly1305_run, + const void *, const void *)) + &in_chapol_init +}; + +static void +out_chapol_init(br_sslrec_chapol_context *cc, + br_chacha20_run ichacha, br_poly1305_run ipoly, + const void *key, const void *iv) +{ + cc->vtable.out = &br_sslrec_out_chapol_vtable; + gen_chapol_init(cc, ichacha, ipoly, key, iv); +} + +static void +chapol_max_plaintext(const br_sslrec_chapol_context *cc, + size_t *start, size_t *end) +{ + size_t len; + + (void)cc; + len = *end - *start - 16; + if (len > 16384) { + len = 16384; + } + *end = *start + len; +} + +static unsigned char * +chapol_encrypt(br_sslrec_chapol_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + unsigned char *buf; + size_t len; + + buf = data; + len = *data_len; + gen_chapol_process(cc, record_type, version, buf, len, buf + len, 1); + buf -= 5; + buf[0] = (unsigned char)record_type; + br_enc16be(buf + 1, version); + br_enc16be(buf + 3, len + 16); + *data_len = len + 21; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_out_chapol_class br_sslrec_out_chapol_vtable = { + { + sizeof(br_sslrec_chapol_context), + (void (*)(const br_sslrec_out_class *const *, + size_t *, size_t *)) + &chapol_max_plaintext, + (unsigned char *(*)(const br_sslrec_out_class **, + int, unsigned, void *, size_t *)) + &chapol_encrypt + }, + (void (*)(const br_sslrec_out_chapol_class **, + br_chacha20_run, br_poly1305_run, + const void *, const void *)) + &out_chapol_init +}; diff --git a/third_party/bearssl/src/ssl_rec_gcm.c b/third_party/bearssl/src/ssl_rec_gcm.c new file mode 100644 index 0000000..70df277 --- /dev/null +++ b/third_party/bearssl/src/ssl_rec_gcm.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * GCM initialisation. This does everything except setting the vtable, + * which depends on whether this is a context for encrypting or for + * decrypting. + */ +static void +gen_gcm_init(br_sslrec_gcm_context *cc, + const br_block_ctr_class *bc_impl, + const void *key, size_t key_len, + br_ghash gh_impl, + const void *iv) +{ + unsigned char tmp[12]; + + cc->seq = 0; + bc_impl->init(&cc->bc.vtable, key, key_len); + cc->gh = gh_impl; + memcpy(cc->iv, iv, sizeof cc->iv); + memset(cc->h, 0, sizeof cc->h); + memset(tmp, 0, sizeof tmp); + bc_impl->run(&cc->bc.vtable, tmp, 0, cc->h, sizeof cc->h); +} + +static void +in_gcm_init(br_sslrec_gcm_context *cc, + const br_block_ctr_class *bc_impl, + const void *key, size_t key_len, + br_ghash gh_impl, + const void *iv) +{ + cc->vtable.in = &br_sslrec_in_gcm_vtable; + gen_gcm_init(cc, bc_impl, key, key_len, gh_impl, iv); +} + +static int +gcm_check_length(const br_sslrec_gcm_context *cc, size_t rlen) +{ + /* + * GCM adds a fixed overhead: + * 8 bytes for the nonce_explicit (before the ciphertext) + * 16 bytes for the authentication tag (after the ciphertext) + */ + (void)cc; + return rlen >= 24 && rlen <= (16384 + 24); +} + +/* + * Compute the authentication tag. The value written in 'tag' must still + * be CTR-encrypted. + */ +static void +do_tag(br_sslrec_gcm_context *cc, + int record_type, unsigned version, + void *data, size_t len, void *tag) +{ + unsigned char header[13]; + unsigned char footer[16]; + + /* + * Compute authentication tag. Three elements must be injected in + * sequence, each possibly 0-padded to reach a length multiple + * of the block size: the 13-byte header (sequence number, record + * type, protocol version, record length), the cipher text, and + * the word containing the encodings of the bit lengths of the two + * other elements. + */ + br_enc64be(header, cc->seq ++); + header[8] = (unsigned char)record_type; + br_enc16be(header + 9, version); + br_enc16be(header + 11, len); + br_enc64be(footer, (uint64_t)(sizeof header) << 3); + br_enc64be(footer + 8, (uint64_t)len << 3); + memset(tag, 0, 16); + cc->gh(tag, cc->h, header, sizeof header); + cc->gh(tag, cc->h, data, len); + cc->gh(tag, cc->h, footer, sizeof footer); +} + +/* + * Do CTR encryption. This also does CTR encryption of a single block at + * address 'xortag' with the counter value appropriate for the final + * processing of the authentication tag. + */ +static void +do_ctr(br_sslrec_gcm_context *cc, const void *nonce, void *data, size_t len, + void *xortag) +{ + unsigned char iv[12]; + + memcpy(iv, cc->iv, 4); + memcpy(iv + 4, nonce, 8); + cc->bc.vtable->run(&cc->bc.vtable, iv, 2, data, len); + cc->bc.vtable->run(&cc->bc.vtable, iv, 1, xortag, 16); +} + +static unsigned char * +gcm_decrypt(br_sslrec_gcm_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + unsigned char *buf; + size_t len, u; + uint32_t bad; + unsigned char tag[16]; + + buf = (unsigned char *)data + 8; + len = *data_len - 24; + do_tag(cc, record_type, version, buf, len, tag); + do_ctr(cc, data, buf, len, tag); + + /* + * Compare the computed tag with the value from the record. It + * is possibly useless to do a constant-time comparison here, + * but it does not hurt. + */ + bad = 0; + for (u = 0; u < 16; u ++) { + bad |= tag[u] ^ buf[len + u]; + } + if (bad) { + return NULL; + } + *data_len = len; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_in_gcm_class br_sslrec_in_gcm_vtable = { + { + sizeof(br_sslrec_gcm_context), + (int (*)(const br_sslrec_in_class *const *, size_t)) + &gcm_check_length, + (unsigned char *(*)(const br_sslrec_in_class **, + int, unsigned, void *, size_t *)) + &gcm_decrypt + }, + (void (*)(const br_sslrec_in_gcm_class **, + const br_block_ctr_class *, const void *, size_t, + br_ghash, const void *)) + &in_gcm_init +}; + +static void +out_gcm_init(br_sslrec_gcm_context *cc, + const br_block_ctr_class *bc_impl, + const void *key, size_t key_len, + br_ghash gh_impl, + const void *iv) +{ + cc->vtable.out = &br_sslrec_out_gcm_vtable; + gen_gcm_init(cc, bc_impl, key, key_len, gh_impl, iv); +} + +static void +gcm_max_plaintext(const br_sslrec_gcm_context *cc, + size_t *start, size_t *end) +{ + size_t len; + + (void)cc; + *start += 8; + len = *end - *start - 16; + if (len > 16384) { + len = 16384; + } + *end = *start + len; +} + +static unsigned char * +gcm_encrypt(br_sslrec_gcm_context *cc, + int record_type, unsigned version, void *data, size_t *data_len) +{ + unsigned char *buf; + size_t u, len; + unsigned char tmp[16]; + + buf = (unsigned char *)data; + len = *data_len; + memset(tmp, 0, sizeof tmp); + br_enc64be(buf - 8, cc->seq); + do_ctr(cc, buf - 8, buf, len, tmp); + do_tag(cc, record_type, version, buf, len, buf + len); + for (u = 0; u < 16; u ++) { + buf[len + u] ^= tmp[u]; + } + len += 24; + buf -= 13; + buf[0] = (unsigned char)record_type; + br_enc16be(buf + 1, version); + br_enc16be(buf + 3, len); + *data_len = len + 5; + return buf; +} + +/* see bearssl_ssl.h */ +const br_sslrec_out_gcm_class br_sslrec_out_gcm_vtable = { + { + sizeof(br_sslrec_gcm_context), + (void (*)(const br_sslrec_out_class *const *, + size_t *, size_t *)) + &gcm_max_plaintext, + (unsigned char *(*)(const br_sslrec_out_class **, + int, unsigned, void *, size_t *)) + &gcm_encrypt + }, + (void (*)(const br_sslrec_out_gcm_class **, + const br_block_ctr_class *, const void *, size_t, + br_ghash, const void *)) + &out_gcm_init +}; diff --git a/third_party/bearssl/src/ssl_scert_single_ec.c b/third_party/bearssl/src/ssl_scert_single_ec.c new file mode 100644 index 0000000..ce8d753 --- /dev/null +++ b/third_party/bearssl/src/ssl_scert_single_ec.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static int +se_choose(const br_ssl_server_policy_class **pctx, + const br_ssl_server_context *cc, + br_ssl_server_choices *choices) +{ + br_ssl_server_policy_ec_context *pc; + const br_suite_translated *st; + size_t u, st_num; + unsigned hash_id; + + pc = (br_ssl_server_policy_ec_context *)pctx; + st = br_ssl_server_get_client_suites(cc, &st_num); + hash_id = br_ssl_choose_hash(br_ssl_server_get_client_hashes(cc) >> 8); + if (cc->eng.session.version < BR_TLS12) { + hash_id = br_sha1_ID; + } + choices->chain = pc->chain; + choices->chain_len = pc->chain_len; + for (u = 0; u < st_num; u ++) { + unsigned tt; + + tt = st[u][1]; + switch (tt >> 12) { + case BR_SSLKEYX_ECDH_RSA: + if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0 + && pc->cert_issuer_key_type == BR_KEYTYPE_RSA) + { + choices->cipher_suite = st[u][0]; + return 1; + } + break; + case BR_SSLKEYX_ECDH_ECDSA: + if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0 + && pc->cert_issuer_key_type == BR_KEYTYPE_EC) + { + choices->cipher_suite = st[u][0]; + return 1; + } + break; + case BR_SSLKEYX_ECDHE_ECDSA: + if ((pc->allowed_usages & BR_KEYTYPE_SIGN) != 0 + && hash_id != 0) + { + choices->cipher_suite = st[u][0]; + choices->algo_id = hash_id + 0xFF00; + return 1; + } + break; + } + } + return 0; +} + +static uint32_t +se_do_keyx(const br_ssl_server_policy_class **pctx, + unsigned char *data, size_t *len) +{ + br_ssl_server_policy_ec_context *pc; + uint32_t r; + size_t xoff, xlen; + + pc = (br_ssl_server_policy_ec_context *)pctx; + r = pc->iec->mul(data, *len, pc->sk->x, pc->sk->xlen, pc->sk->curve); + xoff = pc->iec->xoff(pc->sk->curve, &xlen); + memmove(data, data + xoff, xlen); + *len = xlen; + return r; +} + +static size_t +se_do_sign(const br_ssl_server_policy_class **pctx, + unsigned algo_id, unsigned char *data, size_t hv_len, size_t len) +{ + br_ssl_server_policy_ec_context *pc; + unsigned char hv[64]; + const br_hash_class *hc; + + algo_id &= 0xFF; + pc = (br_ssl_server_policy_ec_context *)pctx; + hc = br_multihash_getimpl(pc->mhash, algo_id); + if (hc == NULL) { + return 0; + } + memcpy(hv, data, hv_len); + if (len < 139) { + return 0; + } + return pc->iecdsa(pc->iec, hc, hv, pc->sk, data); +} + +static const br_ssl_server_policy_class se_policy_vtable = { + sizeof(br_ssl_server_policy_ec_context), + se_choose, + se_do_keyx, + se_do_sign +}; + +/* see bearssl_ssl.h */ +void +br_ssl_server_set_single_ec(br_ssl_server_context *cc, + const br_x509_certificate *chain, size_t chain_len, + const br_ec_private_key *sk, unsigned allowed_usages, + unsigned cert_issuer_key_type, + const br_ec_impl *iec, br_ecdsa_sign iecdsa) +{ + cc->chain_handler.single_ec.vtable = &se_policy_vtable; + cc->chain_handler.single_ec.chain = chain; + cc->chain_handler.single_ec.chain_len = chain_len; + cc->chain_handler.single_ec.sk = sk; + cc->chain_handler.single_ec.allowed_usages = allowed_usages; + cc->chain_handler.single_ec.cert_issuer_key_type = cert_issuer_key_type; + cc->chain_handler.single_ec.mhash = &cc->eng.mhash; + cc->chain_handler.single_ec.iec = iec; + cc->chain_handler.single_ec.iecdsa = iecdsa; + cc->policy_vtable = &cc->chain_handler.single_ec.vtable; +} diff --git a/third_party/bearssl/src/ssl_scert_single_rsa.c b/third_party/bearssl/src/ssl_scert_single_rsa.c new file mode 100644 index 0000000..b2c7767 --- /dev/null +++ b/third_party/bearssl/src/ssl_scert_single_rsa.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +static int +sr_choose(const br_ssl_server_policy_class **pctx, + const br_ssl_server_context *cc, + br_ssl_server_choices *choices) +{ + br_ssl_server_policy_rsa_context *pc; + const br_suite_translated *st; + size_t u, st_num; + unsigned hash_id; + int fh; + + pc = (br_ssl_server_policy_rsa_context *)pctx; + st = br_ssl_server_get_client_suites(cc, &st_num); + if (cc->eng.session.version < BR_TLS12) { + hash_id = 0; + fh = 1; + } else { + hash_id = br_ssl_choose_hash( + br_ssl_server_get_client_hashes(cc)); + fh = (hash_id != 0); + } + choices->chain = pc->chain; + choices->chain_len = pc->chain_len; + for (u = 0; u < st_num; u ++) { + unsigned tt; + + tt = st[u][1]; + switch (tt >> 12) { + case BR_SSLKEYX_RSA: + if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0) { + choices->cipher_suite = st[u][0]; + return 1; + } + break; + case BR_SSLKEYX_ECDHE_RSA: + if ((pc->allowed_usages & BR_KEYTYPE_SIGN) != 0 && fh) { + choices->cipher_suite = st[u][0]; + choices->algo_id = hash_id + 0xFF00; + return 1; + } + break; + } + } + return 0; +} + +static uint32_t +sr_do_keyx(const br_ssl_server_policy_class **pctx, + unsigned char *data, size_t *len) +{ + br_ssl_server_policy_rsa_context *pc; + + pc = (br_ssl_server_policy_rsa_context *)pctx; + return br_rsa_ssl_decrypt(pc->irsacore, pc->sk, data, *len); +} + +/* + * OID for hash functions in RSA signatures. + */ +static const unsigned char HASH_OID_SHA1[] = { + 0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A +}; + +static const unsigned char HASH_OID_SHA224[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04 +}; + +static const unsigned char HASH_OID_SHA256[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01 +}; + +static const unsigned char HASH_OID_SHA384[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02 +}; + +static const unsigned char HASH_OID_SHA512[] = { + 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03 +}; + +static const unsigned char *HASH_OID[] = { + HASH_OID_SHA1, + HASH_OID_SHA224, + HASH_OID_SHA256, + HASH_OID_SHA384, + HASH_OID_SHA512 +}; + +static size_t +sr_do_sign(const br_ssl_server_policy_class **pctx, + unsigned algo_id, unsigned char *data, size_t hv_len, size_t len) +{ + br_ssl_server_policy_rsa_context *pc; + unsigned char hv[64]; + size_t sig_len; + const unsigned char *hash_oid; + + pc = (br_ssl_server_policy_rsa_context *)pctx; + memcpy(hv, data, hv_len); + algo_id &= 0xFF; + if (algo_id == 0) { + hash_oid = NULL; + } else if (algo_id >= 2 && algo_id <= 6) { + hash_oid = HASH_OID[algo_id - 2]; + } else { + return 0; + } + sig_len = (pc->sk->n_bitlen + 7) >> 3; + if (len < sig_len) { + return 0; + } + return pc->irsasign(hash_oid, hv, hv_len, pc->sk, data) ? sig_len : 0; +} + +static const br_ssl_server_policy_class sr_policy_vtable = { + sizeof(br_ssl_server_policy_rsa_context), + sr_choose, + sr_do_keyx, + sr_do_sign +}; + +/* see bearssl_ssl.h */ +void +br_ssl_server_set_single_rsa(br_ssl_server_context *cc, + const br_x509_certificate *chain, size_t chain_len, + const br_rsa_private_key *sk, unsigned allowed_usages, + br_rsa_private irsacore, br_rsa_pkcs1_sign irsasign) +{ + cc->chain_handler.single_rsa.vtable = &sr_policy_vtable; + cc->chain_handler.single_rsa.chain = chain; + cc->chain_handler.single_rsa.chain_len = chain_len; + cc->chain_handler.single_rsa.sk = sk; + cc->chain_handler.single_rsa.allowed_usages = allowed_usages; + cc->chain_handler.single_rsa.irsacore = irsacore; + cc->chain_handler.single_rsa.irsasign = irsasign; + cc->policy_vtable = &cc->chain_handler.single_rsa.vtable; +} diff --git a/third_party/bearssl/src/sysrng.c b/third_party/bearssl/src/sysrng.c new file mode 100644 index 0000000..5a92114 --- /dev/null +++ b/third_party/bearssl/src/sysrng.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2017 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define BR_ENABLE_INTRINSICS 1 +#include "inner.h" + +#if BR_USE_GETENTROPY +#include <unistd.h> +#endif + +#if BR_USE_URANDOM +#include <sys/types.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#endif + +#if BR_USE_WIN32_RAND +#include <windows.h> +#include <wincrypt.h> +#pragma comment(lib, "advapi32") +#endif + +/* + * Seeder that uses the RDRAND opcodes (on x86 CPU). + */ +#if BR_RDRAND +BR_TARGETS_X86_UP +BR_TARGET("rdrnd") +static int +seeder_rdrand(const br_prng_class **ctx) +{ + unsigned char tmp[32]; + size_t u; + + for (u = 0; u < sizeof tmp; u += sizeof(uint32_t)) { + int j; + uint32_t x; + + /* + * We use the 32-bit intrinsic so that code is compatible + * with both 32-bit and 64-bit architectures. + * + * Intel recommends trying at least 10 times in case of + * failure. + * + * AMD bug: there are reports that some AMD processors + * have a bug that makes them fail silently after a + * suspend/resume cycle, in which case RDRAND will report + * a success but always return 0xFFFFFFFF. + * see: https://bugzilla.kernel.org/show_bug.cgi?id=85911 + * + * As a mitigation, if the 32-bit value is 0 or -1, then + * it is considered a failure and tried again. This should + * reliably detect the buggy case, at least. This also + * implies that the selected seed values can never be + * 0x00000000 or 0xFFFFFFFF, which is not a problem since + * we are generating a seed for a PRNG, and we overdo it + * a bit (we generate 32 bytes of randomness, and 256 bits + * of entropy are really overkill). + */ + for (j = 0; j < 10; j ++) { + if (_rdrand32_step(&x) && x != 0 && x != (uint32_t)-1) { + goto next_word; + } + } + return 0; + next_word: + br_enc32le(tmp + u, x); + } + (*ctx)->update(ctx, tmp, sizeof tmp); + return 1; +} +BR_TARGETS_X86_DOWN + +static int +rdrand_supported(void) +{ + /* + * The RDRND support is bit 30 of ECX, as returned by CPUID. + */ + return br_cpuid(0, 0, 0x40000000, 0); +} +#endif + +/* + * Seeder that uses /dev/urandom (on Unix-like systems). + */ +#if BR_USE_URANDOM +static int +seeder_urandom(const br_prng_class **ctx) +{ + int f; + + f = open("/dev/urandom", O_RDONLY); + if (f >= 0) { + unsigned char tmp[32]; + size_t u; + + for (u = 0; u < sizeof tmp;) { + ssize_t len; + + len = read(f, tmp + u, (sizeof tmp) - u); + if (len < 0) { + if (errno == EINTR) { + continue; + } + break; + } + u += (size_t)len; + } + close(f); + if (u == sizeof tmp) { + (*ctx)->update(ctx, tmp, sizeof tmp); + return 1; + } + } + return 0; +} +#endif + +/* + * Seeder that uses getentropy() (backed by getrandom() on some systems, + * e.g. Linux). On failure, it will use the /dev/urandom seeder (if + * enabled). + */ +#if BR_USE_GETENTROPY +static int +seeder_getentropy(const br_prng_class **ctx) +{ + unsigned char tmp[32]; + + if (getentropy(tmp, sizeof tmp) == 0) { + (*ctx)->update(ctx, tmp, sizeof tmp); + return 1; + } +#if BR_USE_URANDOM + return seeder_urandom(ctx); +#else + return 0; +#endif +} +#endif + +/* + * Seeder that uses CryptGenRandom() (on Windows). + */ +#if BR_USE_WIN32_RAND +static int +seeder_win32(const br_prng_class **ctx) +{ + HCRYPTPROV hp; + + if (CryptAcquireContext(&hp, 0, 0, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) + { + BYTE buf[32]; + BOOL r; + + r = CryptGenRandom(hp, sizeof buf, buf); + CryptReleaseContext(hp, 0); + if (r) { + (*ctx)->update(ctx, buf, sizeof buf); + return 1; + } + } + return 0; +} +#endif + +/* + * An aggregate seeder that uses RDRAND, and falls back to an OS-provided + * source if RDRAND fails. + */ +#if BR_RDRAND && (BR_USE_GETENTROPY || BR_USE_URANDOM || BR_USE_WIN32_RAND) +static int +seeder_rdrand_with_fallback(const br_prng_class **ctx) +{ + if (!seeder_rdrand(ctx)) { +#if BR_USE_GETENTROPY + return seeder_getentropy(ctx); +#elif BR_USE_URANDOM + return seeder_urandom(ctx); +#elif BR_USE_WIN32_RAND + return seeder_win32(ctx); +#else +#error "macro selection has gone wrong" +#endif + } + return 1; +} +#endif + +/* see bearssl_rand.h */ +br_prng_seeder +br_prng_seeder_system(const char **name) +{ +#if BR_RDRAND + if (rdrand_supported()) { + if (name != NULL) { + *name = "rdrand"; + } +#if BR_USE_GETENTROPY || BR_USE_URANDOM || BR_USE_WIN32_RAND + return &seeder_rdrand_with_fallback; +#else + return &seeder_rdrand; +#endif + } +#endif +#if BR_USE_GETENTROPY + if (name != NULL) { + *name = "getentropy"; + } + return &seeder_getentropy; +#elif BR_USE_URANDOM + if (name != NULL) { + *name = "urandom"; + } + return &seeder_urandom; +#elif BR_USE_WIN32_RAND + if (name != NULL) { + *name = "win32"; + } + return &seeder_win32; +#else + if (name != NULL) { + *name = "none"; + } + return 0; +#endif +} diff --git a/third_party/bearssl/src/x509_decoder.c b/third_party/bearssl/src/x509_decoder.c new file mode 100644 index 0000000..8dd970f --- /dev/null +++ b/third_party/bearssl/src/x509_decoder.c @@ -0,0 +1,773 @@ +/* Automatically generated code; do not modify directly. */ + +#include <stddef.h> +#include <stdint.h> + +typedef struct { + uint32_t *dp; + uint32_t *rp; + const unsigned char *ip; +} t0_context; + +static uint32_t +t0_parse7E_unsigned(const unsigned char **p) +{ + uint32_t x; + + x = 0; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + return x; + } + } +} + +static int32_t +t0_parse7E_signed(const unsigned char **p) +{ + int neg; + uint32_t x; + + neg = ((**p) >> 6) & 1; + x = (uint32_t)-neg; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + if (neg) { + return -(int32_t)~x - 1; + } else { + return (int32_t)x; + } + } + } +} + +#define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80) +#define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F) +#define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8) +#define T0_INT1(x) T0_FBYTE(x, 0) +#define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) + +/* static const unsigned char t0_datablock[]; */ + + +void br_x509_decoder_init_main(void *t0ctx); + +void br_x509_decoder_run(void *t0ctx); + + + +#include "inner.h" + + + + + +#include "inner.h" + +#define CTX ((br_x509_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_decoder_context, cpu))) +#define CONTEXT_NAME br_x509_decoder_context + +/* see bearssl_x509.h */ +void +br_x509_decoder_init(br_x509_decoder_context *ctx, + void (*append_dn)(void *ctx, const void *buf, size_t len), + void *append_dn_ctx) +{ + memset(ctx, 0, sizeof *ctx); + /* obsolete + ctx->err = 0; + ctx->hbuf = NULL; + ctx->hlen = 0; + */ + ctx->append_dn = append_dn; + ctx->append_dn_ctx = append_dn_ctx; + ctx->cpu.dp = &ctx->dp_stack[0]; + ctx->cpu.rp = &ctx->rp_stack[0]; + br_x509_decoder_init_main(&ctx->cpu); + br_x509_decoder_run(&ctx->cpu); +} + +/* see bearssl_x509.h */ +void +br_x509_decoder_push(br_x509_decoder_context *ctx, + const void *data, size_t len) +{ + ctx->hbuf = data; + ctx->hlen = len; + br_x509_decoder_run(&ctx->cpu); +} + + + +static const unsigned char t0_datablock[] = { + 0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x09, + 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05, 0x09, 0x2A, 0x86, + 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E, 0x09, 0x2A, 0x86, 0x48, 0x86, + 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, + 0x01, 0x01, 0x0C, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, + 0x0D, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, + 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, + 0x05, 0x2B, 0x81, 0x04, 0x00, 0x23, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, + 0x04, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01, 0x08, + 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x08, 0x2A, 0x86, 0x48, + 0xCE, 0x3D, 0x04, 0x03, 0x03, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, + 0x03, 0x04, 0x00, 0x1F, 0x03, 0xFC, 0x07, 0x7F, 0x0B, 0x5E, 0x0F, 0x1F, + 0x12, 0xFE, 0x16, 0xBF, 0x1A, 0x9F, 0x1E, 0x7E, 0x22, 0x3F, 0x26, 0x1E, + 0x29, 0xDF, 0x00, 0x1F, 0x03, 0xFD, 0x07, 0x9F, 0x0B, 0x7E, 0x0F, 0x3F, + 0x13, 0x1E, 0x16, 0xDF, 0x1A, 0xBF, 0x1E, 0x9E, 0x22, 0x5F, 0x26, 0x3E, + 0x29, 0xFF, 0x03, 0x55, 0x1D, 0x13 +}; + +static const unsigned char t0_codeblock[] = { + 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x01, 0x00, 0x11, 0x00, 0x00, 0x01, + 0x01, 0x09, 0x00, 0x00, 0x01, 0x01, 0x0A, 0x00, 0x00, 0x1A, 0x1A, 0x00, + 0x00, 0x01, T0_INT1(BR_ERR_X509_BAD_BOOLEAN), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_TIME), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_PARTIAL_BYTE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01, + T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA), + 0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, copy_dn)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(CONTEXT_NAME, decoded)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, isCA)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_x509_decoder_context, pkey_data)), 0x01, + T0_INT2(BR_X509_BUFSIZE_KEY), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, notafter_days)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, notafter_seconds)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, notbefore_days)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, notbefore_seconds)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, pad)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, signer_hash_id)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, signer_key_type)), 0x00, 0x00, 0x01, + 0x80, 0x45, 0x00, 0x00, 0x01, 0x80, 0x4E, 0x00, 0x00, 0x01, 0x80, 0x54, + 0x00, 0x00, 0x01, 0x81, 0x36, 0x00, 0x02, 0x03, 0x00, 0x03, 0x01, 0x1B, + 0x02, 0x01, 0x13, 0x26, 0x02, 0x00, 0x0F, 0x15, 0x00, 0x00, 0x05, 0x02, + 0x34, 0x1D, 0x00, 0x00, 0x06, 0x02, 0x35, 0x1D, 0x00, 0x00, 0x01, 0x10, + 0x4F, 0x00, 0x00, 0x11, 0x05, 0x02, 0x38, 0x1D, 0x4C, 0x00, 0x00, 0x11, + 0x05, 0x02, 0x38, 0x1D, 0x4D, 0x00, 0x00, 0x06, 0x02, 0x30, 0x1D, 0x00, + 0x00, 0x1B, 0x19, 0x01, 0x08, 0x0E, 0x26, 0x29, 0x19, 0x09, 0x00, 0x00, + 0x01, 0x30, 0x0A, 0x1B, 0x01, 0x00, 0x01, 0x09, 0x4B, 0x05, 0x02, 0x2F, + 0x1D, 0x00, 0x00, 0x20, 0x20, 0x00, 0x00, 0x01, 0x80, 0x5A, 0x00, 0x00, + 0x01, 0x80, 0x62, 0x00, 0x00, 0x01, 0x80, 0x6B, 0x00, 0x00, 0x01, 0x80, + 0x74, 0x00, 0x00, 0x01, 0x80, 0x7D, 0x00, 0x00, 0x01, 0x3D, 0x00, 0x00, + 0x20, 0x11, 0x06, 0x04, 0x2B, 0x6B, 0x7A, 0x71, 0x00, 0x04, 0x01, 0x00, + 0x3D, 0x25, 0x01, 0x00, 0x3C, 0x25, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x6D, + 0x6D, 0x70, 0x1B, 0x01, 0x20, 0x11, 0x06, 0x11, 0x1A, 0x4C, 0x6B, 0x70, + 0x01, 0x02, 0x50, 0x6E, 0x01, 0x02, 0x12, 0x06, 0x02, 0x39, 0x1D, 0x51, + 0x70, 0x01, 0x02, 0x50, 0x6C, 0x6D, 0x7A, 0x6D, 0x7A, 0x6D, 0x65, 0x43, + 0x24, 0x42, 0x24, 0x65, 0x41, 0x24, 0x40, 0x24, 0x51, 0x01, 0x01, 0x3C, + 0x25, 0x6D, 0x7A, 0x01, 0x00, 0x3C, 0x25, 0x6D, 0x6D, 0x60, 0x05, 0x02, + 0x39, 0x1D, 0x74, 0x1C, 0x06, 0x1C, 0x7A, 0x61, 0x6D, 0x3F, 0x68, 0x03, + 0x00, 0x3F, 0x26, 0x02, 0x00, 0x09, 0x26, 0x02, 0x00, 0x0A, 0x68, 0x03, + 0x01, 0x51, 0x51, 0x02, 0x00, 0x02, 0x01, 0x18, 0x04, 0x1E, 0x5A, 0x1C, + 0x06, 0x18, 0x64, 0x03, 0x02, 0x51, 0x61, 0x1B, 0x03, 0x03, 0x1B, 0x3F, + 0x23, 0x0D, 0x06, 0x02, 0x33, 0x1D, 0x62, 0x02, 0x02, 0x02, 0x03, 0x17, + 0x04, 0x02, 0x39, 0x1D, 0x51, 0x01, 0x00, 0x3E, 0x25, 0x71, 0x01, 0x21, + 0x5B, 0x01, 0x22, 0x5B, 0x1B, 0x01, 0x23, 0x11, 0x06, 0x28, 0x1A, 0x4C, + 0x6B, 0x6D, 0x1B, 0x06, 0x1D, 0x6D, 0x60, 0x1A, 0x70, 0x1B, 0x01, 0x01, + 0x11, 0x06, 0x03, 0x63, 0x1A, 0x70, 0x01, 0x04, 0x50, 0x6B, 0x4A, 0x1C, + 0x06, 0x03, 0x5F, 0x04, 0x01, 0x7B, 0x51, 0x51, 0x04, 0x60, 0x51, 0x51, + 0x04, 0x08, 0x01, 0x7F, 0x11, 0x05, 0x02, 0x38, 0x1D, 0x1A, 0x51, 0x6D, + 0x60, 0x06, 0x80, 0x63, 0x75, 0x1C, 0x06, 0x06, 0x01, 0x02, 0x3B, 0x04, + 0x80, 0x57, 0x76, 0x1C, 0x06, 0x06, 0x01, 0x03, 0x3B, 0x04, 0x80, 0x4D, + 0x77, 0x1C, 0x06, 0x06, 0x01, 0x04, 0x3B, 0x04, 0x80, 0x43, 0x78, 0x1C, + 0x06, 0x05, 0x01, 0x05, 0x3B, 0x04, 0x3A, 0x79, 0x1C, 0x06, 0x05, 0x01, + 0x06, 0x3B, 0x04, 0x31, 0x55, 0x1C, 0x06, 0x05, 0x01, 0x02, 0x3A, 0x04, + 0x28, 0x56, 0x1C, 0x06, 0x05, 0x01, 0x03, 0x3A, 0x04, 0x1F, 0x57, 0x1C, + 0x06, 0x05, 0x01, 0x04, 0x3A, 0x04, 0x16, 0x58, 0x1C, 0x06, 0x05, 0x01, + 0x05, 0x3A, 0x04, 0x0D, 0x59, 0x1C, 0x06, 0x05, 0x01, 0x06, 0x3A, 0x04, + 0x04, 0x01, 0x00, 0x01, 0x00, 0x04, 0x04, 0x01, 0x00, 0x01, 0x00, 0x46, + 0x25, 0x45, 0x25, 0x7A, 0x61, 0x7A, 0x51, 0x1A, 0x01, 0x01, 0x3D, 0x25, + 0x73, 0x30, 0x1D, 0x00, 0x00, 0x01, 0x81, 0x06, 0x00, 0x01, 0x54, 0x0D, + 0x06, 0x02, 0x32, 0x1D, 0x1B, 0x03, 0x00, 0x0A, 0x02, 0x00, 0x00, 0x00, + 0x6D, 0x71, 0x1B, 0x01, 0x01, 0x11, 0x06, 0x08, 0x63, 0x01, 0x01, 0x15, + 0x3E, 0x25, 0x04, 0x01, 0x2B, 0x7A, 0x00, 0x00, 0x70, 0x01, 0x06, 0x50, + 0x6F, 0x00, 0x00, 0x70, 0x01, 0x03, 0x50, 0x6B, 0x72, 0x06, 0x02, 0x37, + 0x1D, 0x00, 0x00, 0x26, 0x1B, 0x06, 0x07, 0x21, 0x1B, 0x06, 0x01, 0x16, + 0x04, 0x76, 0x2B, 0x00, 0x00, 0x01, 0x01, 0x50, 0x6A, 0x01, 0x01, 0x10, + 0x06, 0x02, 0x2C, 0x1D, 0x72, 0x27, 0x00, 0x00, 0x60, 0x05, 0x02, 0x39, + 0x1D, 0x47, 0x1C, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x48, 0x1C, 0x06, + 0x04, 0x01, 0x18, 0x04, 0x0A, 0x49, 0x1C, 0x06, 0x04, 0x01, 0x19, 0x04, + 0x02, 0x39, 0x1D, 0x00, 0x04, 0x70, 0x1B, 0x01, 0x17, 0x01, 0x18, 0x4B, + 0x05, 0x02, 0x2F, 0x1D, 0x01, 0x18, 0x11, 0x03, 0x00, 0x4D, 0x6B, 0x66, + 0x02, 0x00, 0x06, 0x0C, 0x01, 0x80, 0x64, 0x08, 0x03, 0x01, 0x66, 0x02, + 0x01, 0x09, 0x04, 0x0E, 0x1B, 0x01, 0x32, 0x0D, 0x06, 0x04, 0x01, 0x80, + 0x64, 0x09, 0x01, 0x8E, 0x6C, 0x09, 0x03, 0x01, 0x02, 0x01, 0x01, 0x82, + 0x6D, 0x08, 0x02, 0x01, 0x01, 0x03, 0x09, 0x01, 0x04, 0x0C, 0x09, 0x02, + 0x01, 0x01, 0x80, 0x63, 0x09, 0x01, 0x80, 0x64, 0x0C, 0x0A, 0x02, 0x01, + 0x01, 0x83, 0x0F, 0x09, 0x01, 0x83, 0x10, 0x0C, 0x09, 0x03, 0x03, 0x01, + 0x01, 0x01, 0x0C, 0x67, 0x2A, 0x01, 0x01, 0x0E, 0x02, 0x01, 0x01, 0x04, + 0x07, 0x28, 0x02, 0x01, 0x01, 0x80, 0x64, 0x07, 0x27, 0x02, 0x01, 0x01, + 0x83, 0x10, 0x07, 0x28, 0x1F, 0x15, 0x06, 0x03, 0x01, 0x18, 0x09, 0x5D, + 0x09, 0x52, 0x1B, 0x01, 0x05, 0x14, 0x02, 0x03, 0x09, 0x03, 0x03, 0x01, + 0x1F, 0x15, 0x01, 0x01, 0x26, 0x67, 0x02, 0x03, 0x09, 0x2A, 0x03, 0x03, + 0x01, 0x00, 0x01, 0x17, 0x67, 0x01, 0x9C, 0x10, 0x08, 0x03, 0x02, 0x01, + 0x00, 0x01, 0x3B, 0x67, 0x01, 0x3C, 0x08, 0x02, 0x02, 0x09, 0x03, 0x02, + 0x01, 0x00, 0x01, 0x3C, 0x67, 0x02, 0x02, 0x09, 0x03, 0x02, 0x72, 0x1B, + 0x01, 0x2E, 0x11, 0x06, 0x0D, 0x1A, 0x72, 0x1B, 0x01, 0x30, 0x01, 0x39, + 0x4B, 0x06, 0x03, 0x1A, 0x04, 0x74, 0x01, 0x80, 0x5A, 0x10, 0x06, 0x02, + 0x2F, 0x1D, 0x51, 0x02, 0x03, 0x02, 0x02, 0x00, 0x01, 0x72, 0x53, 0x01, + 0x0A, 0x08, 0x03, 0x00, 0x72, 0x53, 0x02, 0x00, 0x09, 0x00, 0x02, 0x03, + 0x00, 0x03, 0x01, 0x66, 0x1B, 0x02, 0x01, 0x02, 0x00, 0x4B, 0x05, 0x02, + 0x2F, 0x1D, 0x00, 0x00, 0x23, 0x70, 0x01, 0x02, 0x50, 0x0B, 0x69, 0x00, + 0x03, 0x1B, 0x03, 0x00, 0x03, 0x01, 0x03, 0x02, 0x6B, 0x72, 0x1B, 0x01, + 0x81, 0x00, 0x13, 0x06, 0x02, 0x36, 0x1D, 0x1B, 0x01, 0x00, 0x11, 0x06, + 0x0B, 0x1A, 0x1B, 0x05, 0x04, 0x1A, 0x01, 0x00, 0x00, 0x72, 0x04, 0x6F, + 0x02, 0x01, 0x1B, 0x05, 0x02, 0x33, 0x1D, 0x2A, 0x03, 0x01, 0x02, 0x02, + 0x25, 0x02, 0x02, 0x29, 0x03, 0x02, 0x1B, 0x06, 0x03, 0x72, 0x04, 0x68, + 0x1A, 0x02, 0x00, 0x02, 0x01, 0x0A, 0x00, 0x01, 0x72, 0x1B, 0x01, 0x81, + 0x00, 0x0D, 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x0A, 0x1B, 0x05, 0x02, + 0x31, 0x1D, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x12, 0x06, + 0x19, 0x02, 0x00, 0x2A, 0x03, 0x00, 0x1B, 0x01, 0x83, 0xFF, 0xFF, 0x7F, + 0x12, 0x06, 0x02, 0x32, 0x1D, 0x01, 0x08, 0x0E, 0x26, 0x72, 0x23, 0x09, + 0x04, 0x60, 0x00, 0x00, 0x6A, 0x5E, 0x00, 0x00, 0x6B, 0x7A, 0x00, 0x00, + 0x70, 0x4E, 0x6B, 0x00, 0x01, 0x6B, 0x1B, 0x05, 0x02, 0x36, 0x1D, 0x72, + 0x1B, 0x01, 0x81, 0x00, 0x13, 0x06, 0x02, 0x36, 0x1D, 0x03, 0x00, 0x1B, + 0x06, 0x16, 0x72, 0x02, 0x00, 0x1B, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x13, + 0x06, 0x02, 0x36, 0x1D, 0x01, 0x08, 0x0E, 0x09, 0x03, 0x00, 0x04, 0x67, + 0x1A, 0x02, 0x00, 0x00, 0x00, 0x6B, 0x1B, 0x01, 0x81, 0x7F, 0x12, 0x06, + 0x08, 0x7A, 0x01, 0x00, 0x44, 0x25, 0x01, 0x00, 0x00, 0x1B, 0x44, 0x25, + 0x44, 0x29, 0x62, 0x01, 0x7F, 0x00, 0x01, 0x72, 0x03, 0x00, 0x02, 0x00, + 0x01, 0x05, 0x14, 0x01, 0x01, 0x15, 0x1E, 0x02, 0x00, 0x01, 0x06, 0x14, + 0x1B, 0x01, 0x01, 0x15, 0x06, 0x02, 0x2D, 0x1D, 0x01, 0x04, 0x0E, 0x02, + 0x00, 0x01, 0x1F, 0x15, 0x1B, 0x01, 0x1F, 0x11, 0x06, 0x02, 0x2E, 0x1D, + 0x09, 0x00, 0x00, 0x1B, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, 0x00, 0x70, + 0x00, 0x00, 0x1B, 0x05, 0x02, 0x32, 0x1D, 0x2A, 0x73, 0x00, 0x00, 0x22, + 0x1B, 0x01, 0x00, 0x13, 0x06, 0x01, 0x00, 0x1A, 0x16, 0x04, 0x74, 0x00, + 0x01, 0x01, 0x00, 0x00, 0x01, 0x0B, 0x00, 0x00, 0x01, 0x15, 0x00, 0x00, + 0x01, 0x1F, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x01, 0x33, 0x00, 0x00, + 0x7B, 0x1A, 0x00, 0x00, 0x1B, 0x06, 0x07, 0x7C, 0x1B, 0x06, 0x01, 0x16, + 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x20, 0x21, 0x0B, 0x2B, 0x00 +}; + +static const uint16_t t0_caddr[] = { + 0, + 5, + 10, + 15, + 20, + 24, + 28, + 32, + 36, + 40, + 44, + 48, + 52, + 56, + 60, + 64, + 68, + 72, + 76, + 80, + 84, + 88, + 93, + 98, + 103, + 111, + 116, + 121, + 126, + 131, + 136, + 141, + 146, + 151, + 156, + 161, + 166, + 181, + 187, + 193, + 198, + 206, + 214, + 220, + 231, + 246, + 250, + 255, + 260, + 265, + 270, + 275, + 279, + 289, + 620, + 625, + 639, + 659, + 666, + 678, + 692, + 707, + 740, + 960, + 974, + 991, + 1000, + 1067, + 1123, + 1127, + 1131, + 1136, + 1184, + 1210, + 1254, + 1265, + 1274, + 1287, + 1291, + 1295, + 1299, + 1303, + 1307, + 1311, + 1315, + 1327 +}; + +#define T0_INTERPRETED 39 + +#define T0_ENTER(ip, rp, slot) do { \ + const unsigned char *t0_newip; \ + uint32_t t0_lnum; \ + t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \ + t0_lnum = t0_parse7E_unsigned(&t0_newip); \ + (rp) += t0_lnum; \ + *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \ + (ip) = t0_newip; \ + } while (0) + +#define T0_DEFENTRY(name, slot) \ +void \ +name(void *ctx) \ +{ \ + t0_context *t0ctx = ctx; \ + t0ctx->ip = &t0_codeblock[0]; \ + T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \ +} + +T0_DEFENTRY(br_x509_decoder_init_main, 92) + +#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++) + +void +br_x509_decoder_run(void *t0ctx) +{ + uint32_t *dp, *rp; + const unsigned char *ip; + +#define T0_LOCAL(x) (*(rp - 2 - (x))) +#define T0_POP() (*-- dp) +#define T0_POPi() (*(int32_t *)(-- dp)) +#define T0_PEEK(x) (*(dp - 1 - (x))) +#define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x))) +#define T0_PUSH(v) do { *dp = (v); dp ++; } while (0) +#define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0) +#define T0_RPOP() (*-- rp) +#define T0_RPOPi() (*(int32_t *)(-- rp)) +#define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0) +#define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0) +#define T0_ROLL(x) do { \ + size_t t0len = (size_t)(x); \ + uint32_t t0tmp = *(dp - 1 - t0len); \ + memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_SWAP() do { \ + uint32_t t0tmp = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_ROT() do { \ + uint32_t t0tmp = *(dp - 3); \ + *(dp - 3) = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_NROT() do { \ + uint32_t t0tmp = *(dp - 1); \ + *(dp - 1) = *(dp - 2); \ + *(dp - 2) = *(dp - 3); \ + *(dp - 3) = t0tmp; \ +} while (0) +#define T0_PICK(x) do { \ + uint32_t t0depth = (x); \ + T0_PUSH(T0_PEEK(t0depth)); \ +} while (0) +#define T0_CO() do { \ + goto t0_exit; \ +} while (0) +#define T0_RET() goto t0_next + + dp = ((t0_context *)t0ctx)->dp; + rp = ((t0_context *)t0ctx)->rp; + ip = ((t0_context *)t0ctx)->ip; + goto t0_next; + for (;;) { + uint32_t t0x; + + t0_next: + t0x = T0_NEXT(&ip); + if (t0x < T0_INTERPRETED) { + switch (t0x) { + int32_t t0off; + + case 0: /* ret */ + t0x = T0_RPOP(); + rp -= (t0x >> 16); + t0x &= 0xFFFF; + if (t0x == 0) { + ip = NULL; + goto t0_exit; + } + ip = &t0_codeblock[t0x]; + break; + case 1: /* literal constant */ + T0_PUSHi(t0_parse7E_signed(&ip)); + break; + case 2: /* read local */ + T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip))); + break; + case 3: /* write local */ + T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP(); + break; + case 4: /* jump */ + t0off = t0_parse7E_signed(&ip); + ip += t0off; + break; + case 5: /* jump if */ + t0off = t0_parse7E_signed(&ip); + if (T0_POP()) { + ip += t0off; + } + break; + case 6: /* jump if not */ + t0off = t0_parse7E_signed(&ip); + if (!T0_POP()) { + ip += t0off; + } + break; + case 7: { + /* %25 */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSHi(a % b); + + } + break; + case 8: { + /* * */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a * b); + + } + break; + case 9: { + /* + */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a + b); + + } + break; + case 10: { + /* - */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a - b); + + } + break; + case 11: { + /* -rot */ + T0_NROT(); + } + break; + case 12: { + /* / */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSHi(a / b); + + } + break; + case 13: { + /* < */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a < b)); + + } + break; + case 14: { + /* << */ + + int c = (int)T0_POPi(); + uint32_t x = T0_POP(); + T0_PUSH(x << c); + + } + break; + case 15: { + /* <= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a <= b)); + + } + break; + case 16: { + /* <> */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a != b)); + + } + break; + case 17: { + /* = */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a == b)); + + } + break; + case 18: { + /* > */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a > b)); + + } + break; + case 19: { + /* >= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a >= b)); + + } + break; + case 20: { + /* >> */ + + int c = (int)T0_POPi(); + int32_t x = T0_POPi(); + T0_PUSHi(x >> c); + + } + break; + case 21: { + /* and */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a & b); + + } + break; + case 22: { + /* co */ + T0_CO(); + } + break; + case 23: { + /* copy-ec-pkey */ + + size_t qlen = T0_POP(); + uint32_t curve = T0_POP(); + CTX->pkey.key_type = BR_KEYTYPE_EC; + CTX->pkey.key.ec.curve = curve; + CTX->pkey.key.ec.q = CTX->pkey_data; + CTX->pkey.key.ec.qlen = qlen; + + } + break; + case 24: { + /* copy-rsa-pkey */ + + size_t elen = T0_POP(); + size_t nlen = T0_POP(); + CTX->pkey.key_type = BR_KEYTYPE_RSA; + CTX->pkey.key.rsa.n = CTX->pkey_data; + CTX->pkey.key.rsa.nlen = nlen; + CTX->pkey.key.rsa.e = CTX->pkey_data + nlen; + CTX->pkey.key.rsa.elen = elen; + + } + break; + case 25: { + /* data-get8 */ + + size_t addr = T0_POP(); + T0_PUSH(t0_datablock[addr]); + + } + break; + case 26: { + /* drop */ + (void)T0_POP(); + } + break; + case 27: { + /* dup */ + T0_PUSH(T0_PEEK(0)); + } + break; + case 28: { + /* eqOID */ + + const unsigned char *a2 = &t0_datablock[T0_POP()]; + const unsigned char *a1 = &CTX->pad[0]; + size_t len = a1[0]; + int x; + if (len == a2[0]) { + x = -(memcmp(a1 + 1, a2 + 1, len) == 0); + } else { + x = 0; + } + T0_PUSH((uint32_t)x); + + } + break; + case 29: { + /* fail */ + + CTX->err = T0_POPi(); + T0_CO(); + + } + break; + case 30: { + /* neg */ + + uint32_t a = T0_POP(); + T0_PUSH(-a); + + } + break; + case 31: { + /* or */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a | b); + + } + break; + case 32: { + /* over */ + T0_PUSH(T0_PEEK(1)); + } + break; + case 33: { + /* read-blob-inner */ + + uint32_t len = T0_POP(); + uint32_t addr = T0_POP(); + size_t clen = CTX->hlen; + if (clen > len) { + clen = (size_t)len; + } + if (addr != 0) { + memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen); + } + if (CTX->copy_dn && CTX->append_dn) { + CTX->append_dn(CTX->append_dn_ctx, CTX->hbuf, clen); + } + CTX->hbuf += clen; + CTX->hlen -= clen; + T0_PUSH(addr + clen); + T0_PUSH(len - clen); + + } + break; + case 34: { + /* read8-low */ + + if (CTX->hlen == 0) { + T0_PUSHi(-1); + } else { + unsigned char x = *CTX->hbuf ++; + if (CTX->copy_dn && CTX->append_dn) { + CTX->append_dn(CTX->append_dn_ctx, &x, 1); + } + CTX->hlen --; + T0_PUSH(x); + } + + } + break; + case 35: { + /* rot */ + T0_ROT(); + } + break; + case 36: { + /* set32 */ + + uint32_t addr = T0_POP(); + *(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP(); + + } + break; + case 37: { + /* set8 */ + + uint32_t addr = T0_POP(); + *((unsigned char *)CTX + addr) = (unsigned char)T0_POP(); + + } + break; + case 38: { + /* swap */ + T0_SWAP(); + } + break; + } + + } else { + T0_ENTER(ip, rp, t0x); + } + } +t0_exit: + ((t0_context *)t0ctx)->dp = dp; + ((t0_context *)t0ctx)->rp = rp; + ((t0_context *)t0ctx)->ip = ip; +} diff --git a/third_party/bearssl/src/x509_knownkey.c b/third_party/bearssl/src/x509_knownkey.c new file mode 100644 index 0000000..7674f3f --- /dev/null +++ b/third_party/bearssl/src/x509_knownkey.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_x509.h */ +void +br_x509_knownkey_init_rsa(br_x509_knownkey_context *ctx, + const br_rsa_public_key *pk, unsigned usages) +{ + ctx->vtable = &br_x509_knownkey_vtable; + ctx->pkey.key_type = BR_KEYTYPE_RSA; + ctx->pkey.key.rsa = *pk; + ctx->usages = usages; +} + +/* see bearssl_x509.h */ +void +br_x509_knownkey_init_ec(br_x509_knownkey_context *ctx, + const br_ec_public_key *pk, unsigned usages) +{ + ctx->vtable = &br_x509_knownkey_vtable; + ctx->pkey.key_type = BR_KEYTYPE_EC; + ctx->pkey.key.ec = *pk; + ctx->usages = usages; +} + +static void +kk_start_chain(const br_x509_class **ctx, const char *server_name) +{ + (void)ctx; + (void)server_name; +} + +static void +kk_start_cert(const br_x509_class **ctx, uint32_t length) +{ + (void)ctx; + (void)length; +} + +static void +kk_append(const br_x509_class **ctx, const unsigned char *buf, size_t len) +{ + (void)ctx; + (void)buf; + (void)len; +} + +static void +kk_end_cert(const br_x509_class **ctx) +{ + (void)ctx; +} + +static unsigned +kk_end_chain(const br_x509_class **ctx) +{ + (void)ctx; + return 0; +} + +static const br_x509_pkey * +kk_get_pkey(const br_x509_class *const *ctx, unsigned *usages) +{ + const br_x509_knownkey_context *xc; + + xc = (const br_x509_knownkey_context *)ctx; + if (usages != NULL) { + *usages = xc->usages; + } + return &xc->pkey; +} + +/* see bearssl_x509.h */ +const br_x509_class br_x509_knownkey_vtable = { + sizeof(br_x509_knownkey_context), + kk_start_chain, + kk_start_cert, + kk_append, + kk_end_cert, + kk_end_chain, + kk_get_pkey +}; diff --git a/third_party/bearssl/src/x509_minimal.c b/third_party/bearssl/src/x509_minimal.c new file mode 100644 index 0000000..b3079de --- /dev/null +++ b/third_party/bearssl/src/x509_minimal.c @@ -0,0 +1,1697 @@ +/* Automatically generated code; do not modify directly. */ + +#include <stddef.h> +#include <stdint.h> + +typedef struct { + uint32_t *dp; + uint32_t *rp; + const unsigned char *ip; +} t0_context; + +static uint32_t +t0_parse7E_unsigned(const unsigned char **p) +{ + uint32_t x; + + x = 0; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + return x; + } + } +} + +static int32_t +t0_parse7E_signed(const unsigned char **p) +{ + int neg; + uint32_t x; + + neg = ((**p) >> 6) & 1; + x = (uint32_t)-neg; + for (;;) { + unsigned y; + + y = *(*p) ++; + x = (x << 7) | (uint32_t)(y & 0x7F); + if (y < 0x80) { + if (neg) { + return -(int32_t)~x - 1; + } else { + return (int32_t)x; + } + } + } +} + +#define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80) +#define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F) +#define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8) +#define T0_INT1(x) T0_FBYTE(x, 0) +#define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) +#define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0) + +/* static const unsigned char t0_datablock[]; */ + + +void br_x509_minimal_init_main(void *t0ctx); + +void br_x509_minimal_run(void *t0ctx); + + + +#include "inner.h" + + + + + +#include "inner.h" + +/* + * Implementation Notes + * -------------------- + * + * The C code pushes the data by chunks; all decoding is done in the + * T0 code. The cert_length value is set to the certificate length when + * a new certificate is started; the T0 code picks it up as outer limit, + * and decoding functions use it to ensure that no attempt is made at + * reading past it. The T0 code also checks that once the certificate is + * decoded, there are no trailing bytes. + * + * The T0 code sets cert_length to 0 when the certificate is fully + * decoded. + * + * The C code must still perform two checks: + * + * -- If the certificate length is 0, then the T0 code will not be + * invoked at all. This invalid condition must thus be reported by the + * C code. + * + * -- When reaching the end of certificate, the C code must verify that + * the certificate length has been set to 0, thereby signaling that + * the T0 code properly decoded a certificate. + * + * Processing of a chain works in the following way: + * + * -- The error flag is set to a non-zero value when validation is + * finished. The value is either BR_ERR_X509_OK (validation is + * successful) or another non-zero error code. When a non-zero error + * code is obtained, the remaining bytes in the current certificate and + * the subsequent certificates (if any) are completely ignored. + * + * -- Each certificate is decoded in due course, with the following + * "interesting points": + * + * -- Start of the TBS: the multihash engine is reset and activated. + * + * -- Start of the issuer DN: the secondary hash engine is started, + * to process the encoded issuer DN. + * + * -- End of the issuer DN: the secondary hash engine is stopped. The + * resulting hash value is computed and then copied into the + * next_dn_hash[] buffer. + * + * -- Start of the subject DN: the secondary hash engine is started, + * to process the encoded subject DN. + * + * -- For the EE certificate only: the Common Name, if any, is matched + * against the expected server name. + * + * -- End of the subject DN: the secondary hash engine is stopped. The + * resulting hash value is computed into the pad. It is then processed: + * + * -- If this is the EE certificate, then the hash is ignored + * (except for direct trust processing, see later; the hash is + * simply left in current_dn_hash[]). + * + * -- Otherwise, the hashed subject DN is compared with the saved + * hash value (in saved_dn_hash[]). They must match. + * + * Either way, the next_dn_hash[] value is then copied into the + * saved_dn_hash[] value. Thus, at that point, saved_dn_hash[] + * contains the hash of the issuer DN for the current certificate, + * and current_dn_hash[] contains the hash of the subject DN for the + * current certificate. + * + * -- Public key: it is decoded into the cert_pkey[] buffer. Unknown + * key types are reported at that point. + * + * -- If this is the EE certificate, then the key type is compared + * with the expected key type (initialization parameter). The public + * key data is copied to ee_pkey_data[]. The key and hashed subject + * DN are also compared with the "direct trust" keys; if the key + * and DN are matched, then validation ends with a success. + * + * -- Otherwise, the saved signature (cert_sig[]) is verified + * against the saved TBS hash (tbs_hash[]) and that freshly + * decoded public key. Failure here ends validation with an error. + * + * -- Extensions: extension values are processed in due order. + * + * -- Basic Constraints: for all certificates except EE, must be + * present, indicate a CA, and have a path length compatible with + * the chain length so far. + * + * -- Key Usage: for the EE, if present, must allow signatures + * or encryption/key exchange, as required for the cipher suite. + * For non-EE, if present, must have the "certificate sign" bit. + * + * -- Subject Alt Name: for the EE, dNSName names are matched + * against the server name. Ignored for non-EE. + * + * -- Authority Key Identifier, Subject Key Identifier, Issuer + * Alt Name, Subject Directory Attributes, CRL Distribution Points + * Freshest CRL, Authority Info Access and Subject Info Access + * extensions are always ignored: they either contain only + * informative data, or they relate to revocation processing, which + * we explicitly do not support. + * + * -- All other extensions are ignored if non-critical. If a + * critical extension other than the ones above is encountered, + * then a failure is reported. + * + * -- End of the TBS: the multihash engine is stopped. + * + * -- Signature algorithm: the signature algorithm on the + * certificate is decoded. A failure is reported if that algorithm + * is unknown. The hashed TBS corresponding to the signature hash + * function is computed and stored in tbs_hash[] (if not supported, + * then a failure is reported). The hash OID and length are stored + * in cert_sig_hash_oid and cert_sig_hash_len. + * + * -- Signature value: the signature value is copied into the + * cert_sig[] array. + * + * -- Certificate end: the hashed issuer DN (saved_dn_hash[]) is + * looked up in the trust store (CA trust anchors only); for all + * that match, the signature (cert_sig[]) is verified against the + * anchor public key (hashed TBS is in tbs_hash[]). If one of these + * signatures is valid, then validation ends with a success. + * + * -- If the chain end is reached without obtaining a validation success, + * then validation is reported as failed. + */ + +/* + * The T0 compiler will produce these prototypes declarations in the + * header. + * +void br_x509_minimal_init_main(void *ctx); +void br_x509_minimal_run(void *ctx); + */ + +/* see bearssl_x509.h */ +void +br_x509_minimal_init(br_x509_minimal_context *ctx, + const br_hash_class *dn_hash_impl, + const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num) +{ + memset(ctx, 0, sizeof *ctx); + ctx->vtable = &br_x509_minimal_vtable; + ctx->dn_hash_impl = dn_hash_impl; + ctx->trust_anchors = trust_anchors; + ctx->trust_anchors_num = trust_anchors_num; +} + +static void +xm_start_chain(const br_x509_class **ctx, const char *server_name) +{ + br_x509_minimal_context *cc; + size_t u; + + cc = (br_x509_minimal_context *)(void *)ctx; + for (u = 0; u < cc->num_name_elts; u ++) { + cc->name_elts[u].status = 0; + cc->name_elts[u].buf[0] = 0; + } + memset(&cc->pkey, 0, sizeof cc->pkey); + cc->num_certs = 0; + cc->err = 0; + cc->cpu.dp = cc->dp_stack; + cc->cpu.rp = cc->rp_stack; + br_x509_minimal_init_main(&cc->cpu); + if (server_name == NULL || *server_name == 0) { + cc->server_name = NULL; + } else { + cc->server_name = server_name; + } +} + +static void +xm_start_cert(const br_x509_class **ctx, uint32_t length) +{ + br_x509_minimal_context *cc; + + cc = (br_x509_minimal_context *)(void *)ctx; + if (cc->err != 0) { + return; + } + if (length == 0) { + cc->err = BR_ERR_X509_TRUNCATED; + return; + } + cc->cert_length = length; +} + +static void +xm_append(const br_x509_class **ctx, const unsigned char *buf, size_t len) +{ + br_x509_minimal_context *cc; + + cc = (br_x509_minimal_context *)(void *)ctx; + if (cc->err != 0) { + return; + } + cc->hbuf = buf; + cc->hlen = len; + br_x509_minimal_run(&cc->cpu); +} + +static void +xm_end_cert(const br_x509_class **ctx) +{ + br_x509_minimal_context *cc; + + cc = (br_x509_minimal_context *)(void *)ctx; + if (cc->err == 0 && cc->cert_length != 0) { + cc->err = BR_ERR_X509_TRUNCATED; + } + cc->num_certs ++; +} + +static unsigned +xm_end_chain(const br_x509_class **ctx) +{ + br_x509_minimal_context *cc; + + cc = (br_x509_minimal_context *)(void *)ctx; + if (cc->err == 0) { + if (cc->num_certs == 0) { + cc->err = BR_ERR_X509_EMPTY_CHAIN; + } else { + cc->err = BR_ERR_X509_NOT_TRUSTED; + } + } else if (cc->err == BR_ERR_X509_OK) { + return 0; + } + return (unsigned)cc->err; +} + +static const br_x509_pkey * +xm_get_pkey(const br_x509_class *const *ctx, unsigned *usages) +{ + br_x509_minimal_context *cc; + + cc = (br_x509_minimal_context *)(void *)ctx; + if (cc->err == BR_ERR_X509_OK + || cc->err == BR_ERR_X509_NOT_TRUSTED) + { + if (usages != NULL) { + *usages = cc->key_usages; + } + return &((br_x509_minimal_context *)(void *)ctx)->pkey; + } else { + return NULL; + } +} + +/* see bearssl_x509.h */ +const br_x509_class br_x509_minimal_vtable = { + sizeof(br_x509_minimal_context), + xm_start_chain, + xm_start_cert, + xm_append, + xm_end_cert, + xm_end_chain, + xm_get_pkey +}; + +#define CTX ((br_x509_minimal_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_minimal_context, cpu))) +#define CONTEXT_NAME br_x509_minimal_context + +#define DNHASH_LEN ((CTX->dn_hash_impl->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK) + +/* + * Hash a DN (from a trust anchor) into the provided buffer. This uses the + * DN hash implementation and context structure from the X.509 engine + * context. + */ +static void +hash_dn(br_x509_minimal_context *ctx, const void *dn, size_t len, + unsigned char *out) +{ + ctx->dn_hash_impl->init(&ctx->dn_hash.vtable); + ctx->dn_hash_impl->update(&ctx->dn_hash.vtable, dn, len); + ctx->dn_hash_impl->out(&ctx->dn_hash.vtable, out); +} + +/* + * Compare two big integers for equality. The integers use unsigned big-endian + * encoding; extra leading bytes (of value 0) are allowed. + */ +static int +eqbigint(const unsigned char *b1, size_t len1, + const unsigned char *b2, size_t len2) +{ + while (len1 > 0 && *b1 == 0) { + b1 ++; + len1 --; + } + while (len2 > 0 && *b2 == 0) { + b2 ++; + len2 --; + } + if (len1 != len2) { + return 0; + } + return memcmp(b1, b2, len1) == 0; +} + +/* + * Compare two strings for equality, in a case-insensitive way. This + * function handles casing only for ASCII letters. + */ +static int +eqnocase(const void *s1, const void *s2, size_t len) +{ + const unsigned char *buf1, *buf2; + + buf1 = s1; + buf2 = s2; + while (len -- > 0) { + int x1, x2; + + x1 = *buf1 ++; + x2 = *buf2 ++; + if (x1 >= 'A' && x1 <= 'Z') { + x1 += 'a' - 'A'; + } + if (x2 >= 'A' && x2 <= 'Z') { + x2 += 'a' - 'A'; + } + if (x1 != x2) { + return 0; + } + } + return 1; +} + +static int verify_signature(br_x509_minimal_context *ctx, + const br_x509_pkey *pk); + + + +static const unsigned char t0_datablock[] = { + 0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x09, + 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05, 0x09, 0x2A, 0x86, + 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E, 0x09, 0x2A, 0x86, 0x48, 0x86, + 0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, + 0x01, 0x01, 0x0C, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, + 0x0D, 0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A, 0x09, 0x60, 0x86, 0x48, 0x01, + 0x65, 0x03, 0x04, 0x02, 0x04, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, + 0x04, 0x02, 0x01, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, + 0x02, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x07, + 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE, + 0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2B, + 0x81, 0x04, 0x00, 0x23, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x01, + 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01, 0x08, 0x2A, 0x86, + 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, + 0x04, 0x03, 0x03, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x04, + 0x03, 0x55, 0x04, 0x03, 0x00, 0x1F, 0x03, 0xFC, 0x07, 0x7F, 0x0B, 0x5E, + 0x0F, 0x1F, 0x12, 0xFE, 0x16, 0xBF, 0x1A, 0x9F, 0x1E, 0x7E, 0x22, 0x3F, + 0x26, 0x1E, 0x29, 0xDF, 0x00, 0x1F, 0x03, 0xFD, 0x07, 0x9F, 0x0B, 0x7E, + 0x0F, 0x3F, 0x13, 0x1E, 0x16, 0xDF, 0x1A, 0xBF, 0x1E, 0x9E, 0x22, 0x5F, + 0x26, 0x3E, 0x29, 0xFF, 0x03, 0x55, 0x1D, 0x13, 0x03, 0x55, 0x1D, 0x0F, + 0x03, 0x55, 0x1D, 0x11, 0x03, 0x55, 0x1D, 0x20, 0x08, 0x2B, 0x06, 0x01, + 0x05, 0x05, 0x07, 0x02, 0x01, 0x03, 0x55, 0x1D, 0x23, 0x03, 0x55, 0x1D, + 0x0E, 0x03, 0x55, 0x1D, 0x12, 0x03, 0x55, 0x1D, 0x09, 0x03, 0x55, 0x1D, + 0x1F, 0x03, 0x55, 0x1D, 0x2E, 0x08, 0x2B, 0x06, 0x01, 0x05, 0x05, 0x07, + 0x01, 0x01, 0x08, 0x2B, 0x06, 0x01, 0x05, 0x05, 0x07, 0x01, 0x0B +}; + +static const unsigned char t0_codeblock[] = { + 0x00, 0x01, 0x00, 0x0D, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x01, + 0x00, 0x11, 0x00, 0x00, 0x01, 0x01, 0x09, 0x00, 0x00, 0x01, 0x01, 0x0A, + 0x00, 0x00, 0x25, 0x25, 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_BOOLEAN), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_DN), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_SERVER_NAME), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_BAD_TIME), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_CRITICAL_EXTENSION), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_DN_MISMATCH), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_EXPIRED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_FORBIDDEN_KEY_USAGE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_CA), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_PARTIAL_BYTE), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01, + T0_INT1(BR_ERR_X509_WEAK_PUBLIC_KEY), 0x00, 0x00, 0x01, + T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA), + 0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_length)), 0x00, + 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_hash_len)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_hash_oid)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_len)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, cert_signer_key_type)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(CONTEXT_NAME, current_dn_hash)), 0x00, 0x00, + 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_usages)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(br_x509_minimal_context, pkey_data)), 0x01, + T0_INT2(BR_X509_BUFSIZE_KEY), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, min_rsa_size)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, next_dn_hash)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, num_certs)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, pad)), 0x00, 0x00, 0x01, + T0_INT2(offsetof(CONTEXT_NAME, saved_dn_hash)), 0x00, 0x00, 0x01, 0x80, + 0x73, 0x00, 0x00, 0x01, 0x80, 0x7C, 0x00, 0x00, 0x01, 0x81, 0x02, 0x00, + 0x00, 0x8F, 0x05, 0x05, 0x33, 0x41, 0x01, 0x00, 0x00, 0x33, 0x01, 0x0A, + 0x0E, 0x09, 0x01, 0x9A, 0xFF, 0xB8, 0x00, 0x0A, 0x00, 0x00, 0x01, 0x82, + 0x19, 0x00, 0x00, 0x01, 0x82, 0x01, 0x00, 0x00, 0x01, 0x81, 0x68, 0x00, + 0x02, 0x03, 0x00, 0x03, 0x01, 0x26, 0x02, 0x01, 0x13, 0x3A, 0x02, 0x00, + 0x0F, 0x15, 0x00, 0x00, 0x01, 0x81, 0x74, 0x00, 0x00, 0x05, 0x02, 0x51, + 0x29, 0x00, 0x00, 0x06, 0x02, 0x52, 0x29, 0x00, 0x00, 0x01, 0x10, 0x74, + 0x00, 0x00, 0x11, 0x05, 0x02, 0x55, 0x29, 0x71, 0x00, 0x00, 0x11, 0x05, + 0x02, 0x55, 0x29, 0x72, 0x00, 0x00, 0x06, 0x02, 0x4B, 0x29, 0x00, 0x00, + 0x01, 0x82, 0x11, 0x00, 0x00, 0x26, 0x21, 0x01, 0x08, 0x0E, 0x3A, 0x3F, + 0x21, 0x09, 0x00, 0x0B, 0x03, 0x00, 0x5A, 0x2B, 0xAC, 0x38, 0xAC, 0xB0, + 0x26, 0x01, 0x20, 0x11, 0x06, 0x11, 0x25, 0x71, 0xAA, 0xB0, 0x01, 0x02, + 0x75, 0xAD, 0x01, 0x02, 0x12, 0x06, 0x02, 0x56, 0x29, 0x76, 0xB0, 0x01, + 0x02, 0x75, 0xAB, 0xAC, 0xBF, 0x99, 0x64, 0x60, 0x22, 0x16, 0xAC, 0xA4, + 0x03, 0x01, 0x03, 0x02, 0xA4, 0x02, 0x02, 0x02, 0x01, 0x19, 0x06, 0x02, + 0x4A, 0x29, 0x76, 0x02, 0x00, 0x06, 0x05, 0x9A, 0x03, 0x03, 0x04, 0x09, + 0x99, 0x60, 0x67, 0x22, 0x28, 0x05, 0x02, 0x49, 0x29, 0x67, 0x64, 0x22, + 0x16, 0xAC, 0xAC, 0x9B, 0x05, 0x02, 0x56, 0x29, 0xB9, 0x27, 0x06, 0x27, + 0xBF, 0xA1, 0xAC, 0x62, 0xA7, 0x03, 0x05, 0x62, 0x3A, 0x02, 0x05, 0x09, + 0x3A, 0x02, 0x05, 0x0A, 0xA7, 0x03, 0x06, 0x76, 0x63, 0x2A, 0x01, 0x81, + 0x00, 0x09, 0x02, 0x05, 0x12, 0x06, 0x02, 0x57, 0x29, 0x76, 0x59, 0x03, + 0x04, 0x04, 0x3A, 0x85, 0x27, 0x06, 0x34, 0x9B, 0x05, 0x02, 0x56, 0x29, + 0x68, 0x27, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x69, 0x27, 0x06, 0x04, + 0x01, 0x18, 0x04, 0x0A, 0x6A, 0x27, 0x06, 0x04, 0x01, 0x19, 0x04, 0x02, + 0x56, 0x29, 0x03, 0x07, 0x76, 0xA1, 0x26, 0x03, 0x08, 0x26, 0x62, 0x33, + 0x0D, 0x06, 0x02, 0x4F, 0x29, 0xA2, 0x58, 0x03, 0x04, 0x04, 0x02, 0x56, + 0x29, 0x76, 0x02, 0x00, 0x06, 0x21, 0x02, 0x04, 0x59, 0x30, 0x11, 0x06, + 0x08, 0x25, 0x02, 0x05, 0x02, 0x06, 0x1E, 0x04, 0x10, 0x58, 0x30, 0x11, + 0x06, 0x08, 0x25, 0x02, 0x07, 0x02, 0x08, 0x1D, 0x04, 0x03, 0x56, 0x29, + 0x25, 0x04, 0x24, 0x02, 0x04, 0x59, 0x30, 0x11, 0x06, 0x08, 0x25, 0x02, + 0x05, 0x02, 0x06, 0x24, 0x04, 0x10, 0x58, 0x30, 0x11, 0x06, 0x08, 0x25, + 0x02, 0x07, 0x02, 0x08, 0x23, 0x04, 0x03, 0x56, 0x29, 0x25, 0x26, 0x06, + 0x01, 0x29, 0x25, 0x01, 0x00, 0x03, 0x09, 0xB1, 0x01, 0x21, 0x8C, 0x01, + 0x22, 0x8C, 0x26, 0x01, 0x23, 0x11, 0x06, 0x81, 0x26, 0x25, 0x71, 0xAA, + 0xAC, 0x26, 0x06, 0x81, 0x1A, 0x01, 0x00, 0x03, 0x0A, 0xAC, 0x9B, 0x25, + 0xB0, 0x26, 0x01, 0x01, 0x11, 0x06, 0x04, 0xA3, 0x03, 0x0A, 0xB0, 0x01, + 0x04, 0x75, 0xAA, 0x6E, 0x27, 0x06, 0x0F, 0x02, 0x00, 0x06, 0x03, 0xC0, + 0x04, 0x05, 0x96, 0x01, 0x7F, 0x03, 0x09, 0x04, 0x80, 0x6C, 0x8E, 0x27, + 0x06, 0x06, 0x02, 0x00, 0x98, 0x04, 0x80, 0x62, 0xC2, 0x27, 0x06, 0x11, + 0x02, 0x00, 0x06, 0x09, 0x01, 0x00, 0x03, 0x03, 0x95, 0x03, 0x03, 0x04, + 0x01, 0xC0, 0x04, 0x80, 0x4D, 0x70, 0x27, 0x06, 0x0A, 0x02, 0x0A, 0x06, + 0x03, 0x97, 0x04, 0x01, 0xC0, 0x04, 0x3F, 0x6D, 0x27, 0x06, 0x03, 0xC0, + 0x04, 0x38, 0xC5, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x31, 0x8D, 0x27, 0x06, + 0x03, 0xC0, 0x04, 0x2A, 0xC3, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x23, 0x77, + 0x27, 0x06, 0x03, 0xC0, 0x04, 0x1C, 0x82, 0x27, 0x06, 0x03, 0xC0, 0x04, + 0x15, 0x6C, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x0E, 0xC4, 0x27, 0x06, 0x03, + 0xC0, 0x04, 0x07, 0x02, 0x0A, 0x06, 0x02, 0x48, 0x29, 0xC0, 0x76, 0x76, + 0x04, 0xFE, 0x62, 0x76, 0x76, 0x04, 0x08, 0x01, 0x7F, 0x11, 0x05, 0x02, + 0x55, 0x29, 0x25, 0x76, 0x39, 0x02, 0x00, 0x06, 0x08, 0x02, 0x03, 0x3B, + 0x2F, 0x05, 0x02, 0x44, 0x29, 0x02, 0x00, 0x06, 0x01, 0x17, 0x02, 0x00, + 0x02, 0x09, 0x2F, 0x05, 0x02, 0x50, 0x29, 0xB0, 0x73, 0xAA, 0x9B, 0x06, + 0x80, 0x77, 0xBA, 0x27, 0x06, 0x07, 0x01, 0x02, 0x59, 0x87, 0x04, 0x80, + 0x5E, 0xBB, 0x27, 0x06, 0x07, 0x01, 0x03, 0x59, 0x88, 0x04, 0x80, 0x53, + 0xBC, 0x27, 0x06, 0x07, 0x01, 0x04, 0x59, 0x89, 0x04, 0x80, 0x48, 0xBD, + 0x27, 0x06, 0x06, 0x01, 0x05, 0x59, 0x8A, 0x04, 0x3E, 0xBE, 0x27, 0x06, + 0x06, 0x01, 0x06, 0x59, 0x8B, 0x04, 0x34, 0x7C, 0x27, 0x06, 0x06, 0x01, + 0x02, 0x58, 0x87, 0x04, 0x2A, 0x7D, 0x27, 0x06, 0x06, 0x01, 0x03, 0x58, + 0x88, 0x04, 0x20, 0x7E, 0x27, 0x06, 0x06, 0x01, 0x04, 0x58, 0x89, 0x04, + 0x16, 0x7F, 0x27, 0x06, 0x06, 0x01, 0x05, 0x58, 0x8A, 0x04, 0x0C, 0x80, + 0x27, 0x06, 0x06, 0x01, 0x06, 0x58, 0x8B, 0x04, 0x02, 0x56, 0x29, 0x5D, + 0x34, 0x5F, 0x36, 0x1C, 0x26, 0x05, 0x02, 0x56, 0x29, 0x5C, 0x36, 0x04, + 0x02, 0x56, 0x29, 0xBF, 0xA1, 0x26, 0x01, T0_INT2(BR_X509_BUFSIZE_SIG), + 0x12, 0x06, 0x02, 0x4F, 0x29, 0x26, 0x5E, 0x34, 0x5B, 0xA2, 0x76, 0x76, + 0x01, 0x00, 0x5A, 0x35, 0x18, 0x00, 0x00, 0x01, 0x30, 0x0A, 0x26, 0x01, + 0x00, 0x01, 0x09, 0x6F, 0x05, 0x02, 0x47, 0x29, 0x00, 0x00, 0x30, 0x30, + 0x00, 0x00, 0x01, 0x81, 0x08, 0x00, 0x00, 0x01, 0x81, 0x10, 0x00, 0x00, + 0x01, 0x81, 0x19, 0x00, 0x00, 0x01, 0x81, 0x22, 0x00, 0x00, 0x01, 0x81, + 0x2B, 0x00, 0x01, 0x7B, 0x01, 0x01, 0x11, 0x3A, 0x01, 0x83, 0xFD, 0x7F, + 0x11, 0x15, 0x06, 0x03, 0x3A, 0x25, 0x00, 0x3A, 0x26, 0x03, 0x00, 0x26, + 0xC6, 0x05, 0x04, 0x41, 0x01, 0x00, 0x00, 0x26, 0x01, 0x81, 0x00, 0x0D, + 0x06, 0x04, 0x93, 0x04, 0x80, 0x49, 0x26, 0x01, 0x90, 0x00, 0x0D, 0x06, + 0x0F, 0x01, 0x06, 0x14, 0x01, 0x81, 0x40, 0x2F, 0x93, 0x02, 0x00, 0x01, + 0x00, 0x94, 0x04, 0x33, 0x26, 0x01, 0x83, 0xFF, 0x7F, 0x0D, 0x06, 0x14, + 0x01, 0x0C, 0x14, 0x01, 0x81, 0x60, 0x2F, 0x93, 0x02, 0x00, 0x01, 0x06, + 0x94, 0x02, 0x00, 0x01, 0x00, 0x94, 0x04, 0x17, 0x01, 0x12, 0x14, 0x01, + 0x81, 0x70, 0x2F, 0x93, 0x02, 0x00, 0x01, 0x0C, 0x94, 0x02, 0x00, 0x01, + 0x06, 0x94, 0x02, 0x00, 0x01, 0x00, 0x94, 0x00, 0x00, 0x01, 0x82, 0x15, + 0x00, 0x00, 0x26, 0x01, 0x83, 0xB0, 0x00, 0x01, 0x83, 0xB7, 0x7F, 0x6F, + 0x00, 0x00, 0x01, 0x81, 0x34, 0x00, 0x00, 0x01, 0x80, 0x6B, 0x00, 0x00, + 0x01, 0x81, 0x78, 0x00, 0x00, 0x01, 0x3D, 0x00, 0x00, 0x01, 0x80, 0x43, + 0x00, 0x00, 0x01, 0x80, 0x4D, 0x00, 0x00, 0x01, 0x80, 0x57, 0x00, 0x00, + 0x01, 0x80, 0x61, 0x00, 0x00, 0x30, 0x11, 0x06, 0x04, 0x41, 0xAA, 0xBF, + 0xB1, 0x00, 0x00, 0x01, 0x82, 0x09, 0x00, 0x00, 0x01, 0x81, 0x6C, 0x00, + 0x00, 0x26, 0x01, 0x83, 0xB8, 0x00, 0x01, 0x83, 0xBF, 0x7F, 0x6F, 0x00, + 0x00, 0x01, 0x30, 0x61, 0x36, 0x01, 0x7F, 0x79, 0x1A, 0x01, 0x00, 0x79, + 0x1A, 0x04, 0x7A, 0x00, 0x01, 0x81, 0x38, 0x00, 0x01, 0x7B, 0x0D, 0x06, + 0x02, 0x4E, 0x29, 0x26, 0x03, 0x00, 0x0A, 0x02, 0x00, 0x00, 0x00, 0x30, + 0x26, 0x3E, 0x3A, 0x01, 0x82, 0x00, 0x13, 0x2F, 0x06, 0x04, 0x41, 0x01, + 0x00, 0x00, 0x30, 0x66, 0x09, 0x36, 0x3F, 0x00, 0x00, 0x14, 0x01, 0x3F, + 0x15, 0x01, 0x81, 0x00, 0x2F, 0x93, 0x00, 0x02, 0x01, 0x00, 0x03, 0x00, + 0xAC, 0x26, 0x06, 0x80, 0x59, 0xB0, 0x01, 0x20, 0x30, 0x11, 0x06, 0x17, + 0x25, 0x71, 0xAA, 0x9B, 0x25, 0x01, 0x7F, 0x2E, 0x03, 0x01, 0xB0, 0x01, + 0x20, 0x74, 0xAA, 0xAF, 0x02, 0x01, 0x20, 0x76, 0x76, 0x04, 0x38, 0x01, + 0x21, 0x30, 0x11, 0x06, 0x08, 0x25, 0x72, 0xB3, 0x01, 0x01, 0x1F, 0x04, + 0x2A, 0x01, 0x22, 0x30, 0x11, 0x06, 0x11, 0x25, 0x72, 0xB3, 0x26, 0x06, + 0x06, 0x2C, 0x02, 0x00, 0x2F, 0x03, 0x00, 0x01, 0x02, 0x1F, 0x04, 0x13, + 0x01, 0x26, 0x30, 0x11, 0x06, 0x08, 0x25, 0x72, 0xB3, 0x01, 0x06, 0x1F, + 0x04, 0x05, 0x41, 0xAB, 0x01, 0x00, 0x25, 0x04, 0xFF, 0x23, 0x76, 0x02, + 0x00, 0x00, 0x00, 0xAC, 0xB1, 0x26, 0x01, 0x01, 0x11, 0x06, 0x08, 0xA3, + 0x05, 0x02, 0x50, 0x29, 0xB1, 0x04, 0x02, 0x50, 0x29, 0x26, 0x01, 0x02, + 0x11, 0x06, 0x0C, 0x25, 0x72, 0xAD, 0x65, 0x2B, 0x40, 0x0D, 0x06, 0x02, + 0x50, 0x29, 0xB1, 0x01, 0x7F, 0x10, 0x06, 0x02, 0x55, 0x29, 0x25, 0x76, + 0x00, 0x00, 0xAC, 0x26, 0x06, 0x1A, 0xAC, 0x9B, 0x25, 0x26, 0x06, 0x11, + 0xAC, 0x26, 0x06, 0x0C, 0xAC, 0x9B, 0x25, 0x86, 0x27, 0x05, 0x02, 0x48, + 0x29, 0xBF, 0x04, 0x71, 0x76, 0x76, 0x04, 0x63, 0x76, 0x00, 0x02, 0x03, + 0x00, 0xB0, 0x01, 0x03, 0x75, 0xAA, 0xB7, 0x03, 0x01, 0x02, 0x01, 0x01, + 0x07, 0x12, 0x06, 0x02, 0x55, 0x29, 0x26, 0x01, 0x00, 0x30, 0x11, 0x06, + 0x05, 0x25, 0x4C, 0x29, 0x04, 0x15, 0x01, 0x01, 0x30, 0x11, 0x06, 0x0A, + 0x25, 0xB7, 0x02, 0x01, 0x14, 0x02, 0x01, 0x0E, 0x04, 0x05, 0x25, 0xB7, + 0x01, 0x00, 0x25, 0x02, 0x00, 0x06, 0x19, 0x01, 0x00, 0x30, 0x01, 0x38, + 0x15, 0x06, 0x03, 0x01, 0x10, 0x2F, 0x3A, 0x01, 0x81, 0x40, 0x15, 0x06, + 0x03, 0x01, 0x20, 0x2F, 0x61, 0x36, 0x04, 0x07, 0x01, 0x04, 0x15, 0x05, + 0x02, 0x4C, 0x29, 0xBF, 0x00, 0x00, 0x37, 0xAC, 0xBF, 0x1B, 0x00, 0x03, + 0x01, 0x00, 0x03, 0x00, 0x37, 0xAC, 0x26, 0x06, 0x30, 0xB0, 0x01, 0x11, + 0x74, 0xAA, 0x26, 0x05, 0x02, 0x43, 0x29, 0x26, 0x06, 0x20, 0xAC, 0x9B, + 0x25, 0x84, 0x27, 0x03, 0x01, 0x01, 0x00, 0x2E, 0x03, 0x02, 0xAF, 0x26, + 0x02, 0x01, 0x15, 0x06, 0x07, 0x2C, 0x06, 0x04, 0x01, 0x7F, 0x03, 0x00, + 0x02, 0x02, 0x20, 0x76, 0x04, 0x5D, 0x76, 0x04, 0x4D, 0x76, 0x1B, 0x02, + 0x00, 0x00, 0x00, 0xB0, 0x01, 0x06, 0x75, 0xAE, 0x00, 0x00, 0xB5, 0x83, + 0x06, 0x0E, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, 0x00, 0x01, 0x00, 0x00, + 0xB5, 0x6B, 0x04, 0x08, 0x8F, 0x06, 0x05, 0x25, 0x01, 0x00, 0x04, 0x00, + 0x00, 0x00, 0xB6, 0x83, 0x06, 0x0E, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, + 0x00, 0x01, 0x00, 0x00, 0xB6, 0x6B, 0x04, 0x08, 0x8F, 0x06, 0x05, 0x25, + 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0xB7, 0x26, 0x01, 0x81, 0x00, 0x0D, + 0x06, 0x04, 0x00, 0x04, 0x80, 0x55, 0x26, 0x01, 0x81, 0x40, 0x0D, 0x06, + 0x07, 0x25, 0x01, 0x00, 0x00, 0x04, 0x80, 0x47, 0x26, 0x01, 0x81, 0x60, + 0x0D, 0x06, 0x0E, 0x01, 0x1F, 0x15, 0x01, 0x01, 0xA0, 0x01, 0x81, 0x00, + 0x01, 0x8F, 0x7F, 0x04, 0x32, 0x26, 0x01, 0x81, 0x70, 0x0D, 0x06, 0x0F, + 0x01, 0x0F, 0x15, 0x01, 0x02, 0xA0, 0x01, 0x90, 0x00, 0x01, 0x83, 0xFF, + 0x7F, 0x04, 0x1C, 0x26, 0x01, 0x81, 0x78, 0x0D, 0x06, 0x11, 0x01, 0x07, + 0x15, 0x01, 0x03, 0xA0, 0x01, 0x84, 0x80, 0x00, 0x01, 0x80, 0xC3, 0xFF, + 0x7F, 0x04, 0x04, 0x25, 0x01, 0x00, 0x00, 0x6F, 0x05, 0x03, 0x25, 0x01, + 0x00, 0x00, 0x00, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, 0x00, 0x01, 0x7F, + 0x00, 0xB7, 0x33, 0x26, 0x3C, 0x06, 0x03, 0x3A, 0x25, 0x00, 0x01, 0x06, + 0x0E, 0x3A, 0x26, 0x01, 0x06, 0x14, 0x01, 0x02, 0x10, 0x06, 0x04, 0x41, + 0x01, 0x7F, 0x00, 0x01, 0x3F, 0x15, 0x09, 0x00, 0x00, 0x26, 0x06, 0x06, + 0x0B, 0x9F, 0x33, 0x40, 0x04, 0x77, 0x25, 0x26, 0x00, 0x00, 0xB0, 0x01, + 0x03, 0x75, 0xAA, 0xB7, 0x06, 0x02, 0x54, 0x29, 0x00, 0x00, 0x3A, 0x26, + 0x06, 0x07, 0x31, 0x26, 0x06, 0x01, 0x1A, 0x04, 0x76, 0x41, 0x00, 0x00, + 0x01, 0x01, 0x75, 0xA9, 0x01, 0x01, 0x10, 0x06, 0x02, 0x42, 0x29, 0xB7, + 0x3D, 0x00, 0x04, 0xB0, 0x26, 0x01, 0x17, 0x01, 0x18, 0x6F, 0x05, 0x02, + 0x47, 0x29, 0x01, 0x18, 0x11, 0x03, 0x00, 0x72, 0xAA, 0xA5, 0x02, 0x00, + 0x06, 0x0C, 0x01, 0x80, 0x64, 0x08, 0x03, 0x01, 0xA5, 0x02, 0x01, 0x09, + 0x04, 0x0E, 0x26, 0x01, 0x32, 0x0D, 0x06, 0x04, 0x01, 0x80, 0x64, 0x09, + 0x01, 0x8E, 0x6C, 0x09, 0x03, 0x01, 0x02, 0x01, 0x01, 0x82, 0x6D, 0x08, + 0x02, 0x01, 0x01, 0x03, 0x09, 0x01, 0x04, 0x0C, 0x09, 0x02, 0x01, 0x01, + 0x80, 0x63, 0x09, 0x01, 0x80, 0x64, 0x0C, 0x0A, 0x02, 0x01, 0x01, 0x83, + 0x0F, 0x09, 0x01, 0x83, 0x10, 0x0C, 0x09, 0x03, 0x03, 0x01, 0x01, 0x01, + 0x0C, 0xA6, 0x40, 0x01, 0x01, 0x0E, 0x02, 0x01, 0x01, 0x04, 0x07, 0x3E, + 0x02, 0x01, 0x01, 0x80, 0x64, 0x07, 0x3D, 0x02, 0x01, 0x01, 0x83, 0x10, + 0x07, 0x3E, 0x2F, 0x15, 0x06, 0x03, 0x01, 0x18, 0x09, 0x91, 0x09, 0x78, + 0x26, 0x01, 0x05, 0x14, 0x02, 0x03, 0x09, 0x03, 0x03, 0x01, 0x1F, 0x15, + 0x01, 0x01, 0x3A, 0xA6, 0x02, 0x03, 0x09, 0x40, 0x03, 0x03, 0x01, 0x00, + 0x01, 0x17, 0xA6, 0x01, 0x9C, 0x10, 0x08, 0x03, 0x02, 0x01, 0x00, 0x01, + 0x3B, 0xA6, 0x01, 0x3C, 0x08, 0x02, 0x02, 0x09, 0x03, 0x02, 0x01, 0x00, + 0x01, 0x3C, 0xA6, 0x02, 0x02, 0x09, 0x03, 0x02, 0xB7, 0x26, 0x01, 0x2E, + 0x11, 0x06, 0x0D, 0x25, 0xB7, 0x26, 0x01, 0x30, 0x01, 0x39, 0x6F, 0x06, + 0x03, 0x25, 0x04, 0x74, 0x01, 0x80, 0x5A, 0x10, 0x06, 0x02, 0x47, 0x29, + 0x76, 0x02, 0x03, 0x02, 0x02, 0x00, 0x01, 0xB7, 0x7A, 0x01, 0x0A, 0x08, + 0x03, 0x00, 0xB7, 0x7A, 0x02, 0x00, 0x09, 0x00, 0x02, 0x03, 0x00, 0x03, + 0x01, 0xA5, 0x26, 0x02, 0x01, 0x02, 0x00, 0x6F, 0x05, 0x02, 0x47, 0x29, + 0x00, 0x00, 0x33, 0xB0, 0x01, 0x02, 0x75, 0x0B, 0xA8, 0x00, 0x03, 0x26, + 0x03, 0x00, 0x03, 0x01, 0x03, 0x02, 0xAA, 0xB7, 0x26, 0x01, 0x81, 0x00, + 0x13, 0x06, 0x02, 0x53, 0x29, 0x26, 0x01, 0x00, 0x11, 0x06, 0x0B, 0x25, + 0x26, 0x05, 0x04, 0x25, 0x01, 0x00, 0x00, 0xB7, 0x04, 0x6F, 0x02, 0x01, + 0x26, 0x05, 0x02, 0x4F, 0x29, 0x40, 0x03, 0x01, 0x02, 0x02, 0x36, 0x02, + 0x02, 0x3F, 0x03, 0x02, 0x26, 0x06, 0x03, 0xB7, 0x04, 0x68, 0x25, 0x02, + 0x00, 0x02, 0x01, 0x0A, 0x00, 0x01, 0xB7, 0x26, 0x01, 0x81, 0x00, 0x0D, + 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x0A, 0x26, 0x05, 0x02, 0x4D, 0x29, + 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x12, 0x06, 0x19, 0x02, + 0x00, 0x40, 0x03, 0x00, 0x26, 0x01, 0x83, 0xFF, 0xFF, 0x7F, 0x12, 0x06, + 0x02, 0x4E, 0x29, 0x01, 0x08, 0x0E, 0x3A, 0xB7, 0x33, 0x09, 0x04, 0x60, + 0x00, 0x00, 0xA9, 0x92, 0x00, 0x00, 0xAA, 0xBF, 0x00, 0x00, 0xB0, 0x73, + 0xAA, 0x00, 0x01, 0xAA, 0x26, 0x05, 0x02, 0x53, 0x29, 0xB7, 0x26, 0x01, + 0x81, 0x00, 0x13, 0x06, 0x02, 0x53, 0x29, 0x03, 0x00, 0x26, 0x06, 0x16, + 0xB7, 0x02, 0x00, 0x26, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x13, 0x06, 0x02, + 0x53, 0x29, 0x01, 0x08, 0x0E, 0x09, 0x03, 0x00, 0x04, 0x67, 0x25, 0x02, + 0x00, 0x00, 0x00, 0xAA, 0x26, 0x01, 0x81, 0x7F, 0x12, 0x06, 0x08, 0xBF, + 0x01, 0x00, 0x66, 0x36, 0x01, 0x00, 0x00, 0x26, 0x66, 0x36, 0x66, 0x3F, + 0xA2, 0x01, 0x7F, 0x00, 0x00, 0xB0, 0x01, 0x0C, 0x30, 0x11, 0x06, 0x05, + 0x25, 0x72, 0xB3, 0x04, 0x3E, 0x01, 0x12, 0x30, 0x11, 0x06, 0x05, 0x25, + 0x72, 0xB4, 0x04, 0x33, 0x01, 0x13, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, + 0xB4, 0x04, 0x28, 0x01, 0x14, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB4, + 0x04, 0x1D, 0x01, 0x16, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB4, 0x04, + 0x12, 0x01, 0x1E, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB2, 0x04, 0x07, + 0x41, 0xAB, 0x01, 0x00, 0x01, 0x00, 0x25, 0x00, 0x01, 0xB7, 0x03, 0x00, + 0x02, 0x00, 0x01, 0x05, 0x14, 0x01, 0x01, 0x15, 0x2D, 0x02, 0x00, 0x01, + 0x06, 0x14, 0x26, 0x01, 0x01, 0x15, 0x06, 0x02, 0x45, 0x29, 0x01, 0x04, + 0x0E, 0x02, 0x00, 0x01, 0x1F, 0x15, 0x26, 0x01, 0x1F, 0x11, 0x06, 0x02, + 0x46, 0x29, 0x09, 0x00, 0x00, 0x26, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, + 0x00, 0xB0, 0x00, 0x01, 0xAA, 0x26, 0x05, 0x05, 0x66, 0x36, 0x01, 0x7F, + 0x00, 0x01, 0x01, 0x03, 0x00, 0x9C, 0x26, 0x01, 0x83, 0xFF, 0x7E, 0x11, + 0x06, 0x16, 0x25, 0x26, 0x06, 0x10, 0x9D, 0x26, 0x05, 0x05, 0x25, 0xBF, + 0x01, 0x00, 0x00, 0x02, 0x00, 0x81, 0x03, 0x00, 0x04, 0x6D, 0x04, 0x1B, + 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, 0x02, 0x00, 0x81, 0x03, + 0x00, 0x26, 0x06, 0x0B, 0x9C, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, + 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, 0x01, 0x00, 0x40, 0x66, + 0x36, 0x01, 0x7F, 0x00, 0x01, 0xAA, 0x01, 0x01, 0x03, 0x00, 0x26, 0x06, + 0x10, 0x9E, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, 0x02, 0x00, + 0x81, 0x03, 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, 0x01, 0x00, + 0x40, 0x66, 0x36, 0x01, 0x7F, 0x00, 0x01, 0xAA, 0x01, 0x01, 0x03, 0x00, + 0x26, 0x06, 0x10, 0xB7, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, + 0x02, 0x00, 0x81, 0x03, 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, + 0x01, 0x00, 0x40, 0x66, 0x36, 0x01, 0x7F, 0x00, 0x00, 0xB7, 0x01, 0x08, + 0x0E, 0x3A, 0xB7, 0x33, 0x09, 0x00, 0x00, 0xB7, 0x3A, 0xB7, 0x01, 0x08, + 0x0E, 0x33, 0x09, 0x00, 0x00, 0x26, 0x05, 0x02, 0x4E, 0x29, 0x40, 0xB8, + 0x00, 0x00, 0x32, 0x26, 0x01, 0x00, 0x13, 0x06, 0x01, 0x00, 0x25, 0x1A, + 0x04, 0x74, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x0B, 0x00, 0x00, 0x01, + 0x15, 0x00, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x01, + 0x33, 0x00, 0x00, 0xC0, 0x25, 0x00, 0x00, 0x26, 0x06, 0x07, 0xC1, 0x26, + 0x06, 0x01, 0x1A, 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x30, 0x31, 0x0B, + 0x41, 0x00, 0x00, 0x01, 0x81, 0x70, 0x00, 0x00, 0x01, 0x82, 0x0D, 0x00, + 0x00, 0x01, 0x82, 0x22, 0x00, 0x00, 0x01, 0x82, 0x05, 0x00, 0x00, 0x26, + 0x01, 0x83, 0xFB, 0x50, 0x01, 0x83, 0xFB, 0x6F, 0x6F, 0x06, 0x04, 0x25, + 0x01, 0x00, 0x00, 0x26, 0x01, 0x83, 0xB0, 0x00, 0x01, 0x83, 0xBF, 0x7F, + 0x6F, 0x06, 0x04, 0x25, 0x01, 0x00, 0x00, 0x01, 0x83, 0xFF, 0x7F, 0x15, + 0x01, 0x83, 0xFF, 0x7E, 0x0D, 0x00 +}; + +static const uint16_t t0_caddr[] = { + 0, + 5, + 10, + 15, + 20, + 25, + 29, + 33, + 37, + 41, + 45, + 49, + 53, + 57, + 61, + 65, + 69, + 73, + 77, + 81, + 85, + 89, + 93, + 97, + 101, + 105, + 109, + 113, + 117, + 121, + 125, + 130, + 135, + 140, + 145, + 150, + 155, + 160, + 165, + 173, + 178, + 183, + 188, + 193, + 198, + 203, + 208, + 213, + 234, + 239, + 244, + 249, + 264, + 269, + 275, + 281, + 286, + 294, + 302, + 308, + 313, + 324, + 960, + 975, + 979, + 984, + 989, + 994, + 999, + 1004, + 1118, + 1123, + 1135, + 1140, + 1145, + 1150, + 1154, + 1159, + 1164, + 1169, + 1174, + 1184, + 1189, + 1194, + 1206, + 1221, + 1226, + 1240, + 1262, + 1273, + 1376, + 1423, + 1456, + 1547, + 1553, + 1616, + 1623, + 1651, + 1679, + 1784, + 1826, + 1839, + 1851, + 1865, + 1880, + 2100, + 2114, + 2131, + 2140, + 2207, + 2263, + 2267, + 2271, + 2276, + 2324, + 2350, + 2426, + 2470, + 2481, + 2566, + 2604, + 2642, + 2652, + 2662, + 2671, + 2684, + 2688, + 2692, + 2696, + 2700, + 2704, + 2708, + 2712, + 2724, + 2732, + 2737, + 2742, + 2747, + 2752 +}; + +#define T0_INTERPRETED 60 + +#define T0_ENTER(ip, rp, slot) do { \ + const unsigned char *t0_newip; \ + uint32_t t0_lnum; \ + t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \ + t0_lnum = t0_parse7E_unsigned(&t0_newip); \ + (rp) += t0_lnum; \ + *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \ + (ip) = t0_newip; \ + } while (0) + +#define T0_DEFENTRY(name, slot) \ +void \ +name(void *ctx) \ +{ \ + t0_context *t0ctx = ctx; \ + t0ctx->ip = &t0_codeblock[0]; \ + T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \ +} + +T0_DEFENTRY(br_x509_minimal_init_main, 144) + +#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++) + +void +br_x509_minimal_run(void *t0ctx) +{ + uint32_t *dp, *rp; + const unsigned char *ip; + +#define T0_LOCAL(x) (*(rp - 2 - (x))) +#define T0_POP() (*-- dp) +#define T0_POPi() (*(int32_t *)(-- dp)) +#define T0_PEEK(x) (*(dp - 1 - (x))) +#define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x))) +#define T0_PUSH(v) do { *dp = (v); dp ++; } while (0) +#define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0) +#define T0_RPOP() (*-- rp) +#define T0_RPOPi() (*(int32_t *)(-- rp)) +#define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0) +#define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0) +#define T0_ROLL(x) do { \ + size_t t0len = (size_t)(x); \ + uint32_t t0tmp = *(dp - 1 - t0len); \ + memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_SWAP() do { \ + uint32_t t0tmp = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_ROT() do { \ + uint32_t t0tmp = *(dp - 3); \ + *(dp - 3) = *(dp - 2); \ + *(dp - 2) = *(dp - 1); \ + *(dp - 1) = t0tmp; \ +} while (0) +#define T0_NROT() do { \ + uint32_t t0tmp = *(dp - 1); \ + *(dp - 1) = *(dp - 2); \ + *(dp - 2) = *(dp - 3); \ + *(dp - 3) = t0tmp; \ +} while (0) +#define T0_PICK(x) do { \ + uint32_t t0depth = (x); \ + T0_PUSH(T0_PEEK(t0depth)); \ +} while (0) +#define T0_CO() do { \ + goto t0_exit; \ +} while (0) +#define T0_RET() goto t0_next + + dp = ((t0_context *)t0ctx)->dp; + rp = ((t0_context *)t0ctx)->rp; + ip = ((t0_context *)t0ctx)->ip; + goto t0_next; + for (;;) { + uint32_t t0x; + + t0_next: + t0x = T0_NEXT(&ip); + if (t0x < T0_INTERPRETED) { + switch (t0x) { + int32_t t0off; + + case 0: /* ret */ + t0x = T0_RPOP(); + rp -= (t0x >> 16); + t0x &= 0xFFFF; + if (t0x == 0) { + ip = NULL; + goto t0_exit; + } + ip = &t0_codeblock[t0x]; + break; + case 1: /* literal constant */ + T0_PUSHi(t0_parse7E_signed(&ip)); + break; + case 2: /* read local */ + T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip))); + break; + case 3: /* write local */ + T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP(); + break; + case 4: /* jump */ + t0off = t0_parse7E_signed(&ip); + ip += t0off; + break; + case 5: /* jump if */ + t0off = t0_parse7E_signed(&ip); + if (T0_POP()) { + ip += t0off; + } + break; + case 6: /* jump if not */ + t0off = t0_parse7E_signed(&ip); + if (!T0_POP()) { + ip += t0off; + } + break; + case 7: { + /* %25 */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSHi(a % b); + + } + break; + case 8: { + /* * */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a * b); + + } + break; + case 9: { + /* + */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a + b); + + } + break; + case 10: { + /* - */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a - b); + + } + break; + case 11: { + /* -rot */ + T0_NROT(); + } + break; + case 12: { + /* / */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSHi(a / b); + + } + break; + case 13: { + /* < */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a < b)); + + } + break; + case 14: { + /* << */ + + int c = (int)T0_POPi(); + uint32_t x = T0_POP(); + T0_PUSH(x << c); + + } + break; + case 15: { + /* <= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a <= b)); + + } + break; + case 16: { + /* <> */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a != b)); + + } + break; + case 17: { + /* = */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(-(uint32_t)(a == b)); + + } + break; + case 18: { + /* > */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a > b)); + + } + break; + case 19: { + /* >= */ + + int32_t b = T0_POPi(); + int32_t a = T0_POPi(); + T0_PUSH(-(uint32_t)(a >= b)); + + } + break; + case 20: { + /* >> */ + + int c = (int)T0_POPi(); + int32_t x = T0_POPi(); + T0_PUSHi(x >> c); + + } + break; + case 21: { + /* and */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a & b); + + } + break; + case 22: { + /* blobcopy */ + + size_t len = T0_POP(); + unsigned char *src = (unsigned char *)CTX + T0_POP(); + unsigned char *dst = (unsigned char *)CTX + T0_POP(); + memcpy(dst, src, len); + + } + break; + case 23: { + /* check-direct-trust */ + + size_t u; + + for (u = 0; u < CTX->trust_anchors_num; u ++) { + const br_x509_trust_anchor *ta; + unsigned char hashed_DN[64]; + int kt; + + ta = &CTX->trust_anchors[u]; + if (ta->flags & BR_X509_TA_CA) { + continue; + } + hash_dn(CTX, ta->dn.data, ta->dn.len, hashed_DN); + if (memcmp(hashed_DN, CTX->current_dn_hash, DNHASH_LEN)) { + continue; + } + kt = CTX->pkey.key_type; + if ((ta->pkey.key_type & 0x0F) != kt) { + continue; + } + switch (kt) { + + case BR_KEYTYPE_RSA: + if (!eqbigint(CTX->pkey.key.rsa.n, + CTX->pkey.key.rsa.nlen, + ta->pkey.key.rsa.n, + ta->pkey.key.rsa.nlen) + || !eqbigint(CTX->pkey.key.rsa.e, + CTX->pkey.key.rsa.elen, + ta->pkey.key.rsa.e, + ta->pkey.key.rsa.elen)) + { + continue; + } + break; + + case BR_KEYTYPE_EC: + if (CTX->pkey.key.ec.curve != ta->pkey.key.ec.curve + || CTX->pkey.key.ec.qlen != ta->pkey.key.ec.qlen + || memcmp(CTX->pkey.key.ec.q, + ta->pkey.key.ec.q, + ta->pkey.key.ec.qlen) != 0) + { + continue; + } + break; + + default: + continue; + } + + /* + * Direct trust match! + */ + CTX->err = BR_ERR_X509_OK; + T0_CO(); + } + + } + break; + case 24: { + /* check-trust-anchor-CA */ + + size_t u; + + for (u = 0; u < CTX->trust_anchors_num; u ++) { + const br_x509_trust_anchor *ta; + unsigned char hashed_DN[64]; + + ta = &CTX->trust_anchors[u]; + if (!(ta->flags & BR_X509_TA_CA)) { + continue; + } + hash_dn(CTX, ta->dn.data, ta->dn.len, hashed_DN); + if (memcmp(hashed_DN, CTX->saved_dn_hash, DNHASH_LEN)) { + continue; + } + if (verify_signature(CTX, &ta->pkey) == 0) { + CTX->err = BR_ERR_X509_OK; + T0_CO(); + } + } + + } + break; + case 25: { + /* check-validity-range */ + + uint32_t nbs = T0_POP(); + uint32_t nbd = T0_POP(); + uint32_t nas = T0_POP(); + uint32_t nad = T0_POP(); + int r; + if (CTX->itime != 0) { + r = CTX->itime(CTX->itime_ctx, nbd, nbs, nad, nas); + if (r < -1 || r > 1) { + CTX->err = BR_ERR_X509_TIME_UNKNOWN; + T0_CO(); + } + } else { + uint32_t vd = CTX->days; + uint32_t vs = CTX->seconds; + if (vd == 0 && vs == 0) { + CTX->err = BR_ERR_X509_TIME_UNKNOWN; + T0_CO(); + } + if (vd < nbd || (vd == nbd && vs < nbs)) { + r = -1; + } else if (vd > nad || (vd == nad && vs > nas)) { + r = 1; + } else { + r = 0; + } + } + T0_PUSHi(r); + + } + break; + case 26: { + /* co */ + T0_CO(); + } + break; + case 27: { + /* compute-dn-hash */ + + CTX->dn_hash_impl->out(&CTX->dn_hash.vtable, CTX->current_dn_hash); + CTX->do_dn_hash = 0; + + } + break; + case 28: { + /* compute-tbs-hash */ + + int id = T0_POPi(); + size_t len; + len = br_multihash_out(&CTX->mhash, id, CTX->tbs_hash); + T0_PUSH(len); + + } + break; + case 29: { + /* copy-ee-ec-pkey */ + + size_t qlen = T0_POP(); + uint32_t curve = T0_POP(); + memcpy(CTX->ee_pkey_data, CTX->pkey_data, qlen); + CTX->pkey.key_type = BR_KEYTYPE_EC; + CTX->pkey.key.ec.curve = curve; + CTX->pkey.key.ec.q = CTX->ee_pkey_data; + CTX->pkey.key.ec.qlen = qlen; + + } + break; + case 30: { + /* copy-ee-rsa-pkey */ + + size_t elen = T0_POP(); + size_t nlen = T0_POP(); + memcpy(CTX->ee_pkey_data, CTX->pkey_data, nlen + elen); + CTX->pkey.key_type = BR_KEYTYPE_RSA; + CTX->pkey.key.rsa.n = CTX->ee_pkey_data; + CTX->pkey.key.rsa.nlen = nlen; + CTX->pkey.key.rsa.e = CTX->ee_pkey_data + nlen; + CTX->pkey.key.rsa.elen = elen; + + } + break; + case 31: { + /* copy-name-SAN */ + + unsigned tag = T0_POP(); + unsigned ok = T0_POP(); + size_t u, len; + + len = CTX->pad[0]; + for (u = 0; u < CTX->num_name_elts; u ++) { + br_name_element *ne; + + ne = &CTX->name_elts[u]; + if (ne->status == 0 && ne->oid[0] == 0 && ne->oid[1] == tag) { + if (ok && ne->len > len) { + memcpy(ne->buf, CTX->pad + 1, len); + ne->buf[len] = 0; + ne->status = 1; + } else { + ne->status = -1; + } + break; + } + } + + } + break; + case 32: { + /* copy-name-element */ + + size_t len; + int32_t off = T0_POPi(); + int ok = T0_POPi(); + + if (off >= 0) { + br_name_element *ne = &CTX->name_elts[off]; + + if (ok) { + len = CTX->pad[0]; + if (len < ne->len) { + memcpy(ne->buf, CTX->pad + 1, len); + ne->buf[len] = 0; + ne->status = 1; + } else { + ne->status = -1; + } + } else { + ne->status = -1; + } + } + + } + break; + case 33: { + /* data-get8 */ + + size_t addr = T0_POP(); + T0_PUSH(t0_datablock[addr]); + + } + break; + case 34: { + /* dn-hash-length */ + + T0_PUSH(DNHASH_LEN); + + } + break; + case 35: { + /* do-ecdsa-vrfy */ + + size_t qlen = T0_POP(); + int curve = T0_POP(); + br_x509_pkey pk; + + pk.key_type = BR_KEYTYPE_EC; + pk.key.ec.curve = curve; + pk.key.ec.q = CTX->pkey_data; + pk.key.ec.qlen = qlen; + T0_PUSH(verify_signature(CTX, &pk)); + + } + break; + case 36: { + /* do-rsa-vrfy */ + + size_t elen = T0_POP(); + size_t nlen = T0_POP(); + br_x509_pkey pk; + + pk.key_type = BR_KEYTYPE_RSA; + pk.key.rsa.n = CTX->pkey_data; + pk.key.rsa.nlen = nlen; + pk.key.rsa.e = CTX->pkey_data + nlen; + pk.key.rsa.elen = elen; + T0_PUSH(verify_signature(CTX, &pk)); + + } + break; + case 37: { + /* drop */ + (void)T0_POP(); + } + break; + case 38: { + /* dup */ + T0_PUSH(T0_PEEK(0)); + } + break; + case 39: { + /* eqOID */ + + const unsigned char *a2 = &t0_datablock[T0_POP()]; + const unsigned char *a1 = &CTX->pad[0]; + size_t len = a1[0]; + int x; + if (len == a2[0]) { + x = -(memcmp(a1 + 1, a2 + 1, len) == 0); + } else { + x = 0; + } + T0_PUSH((uint32_t)x); + + } + break; + case 40: { + /* eqblob */ + + size_t len = T0_POP(); + const unsigned char *a2 = (const unsigned char *)CTX + T0_POP(); + const unsigned char *a1 = (const unsigned char *)CTX + T0_POP(); + T0_PUSHi(-(memcmp(a1, a2, len) == 0)); + + } + break; + case 41: { + /* fail */ + + CTX->err = T0_POPi(); + T0_CO(); + + } + break; + case 42: { + /* get16 */ + + uint32_t addr = T0_POP(); + T0_PUSH(*(uint16_t *)(void *)((unsigned char *)CTX + addr)); + + } + break; + case 43: { + /* get32 */ + + uint32_t addr = T0_POP(); + T0_PUSH(*(uint32_t *)(void *)((unsigned char *)CTX + addr)); + + } + break; + case 44: { + /* match-server-name */ + + size_t n1, n2; + + if (CTX->server_name == NULL) { + T0_PUSH(0); + T0_RET(); + } + n1 = strlen(CTX->server_name); + n2 = CTX->pad[0]; + if (n1 == n2 && eqnocase(&CTX->pad[1], CTX->server_name, n1)) { + T0_PUSHi(-1); + T0_RET(); + } + if (n2 >= 2 && CTX->pad[1] == '*' && CTX->pad[2] == '.') { + size_t u; + + u = 0; + while (u < n1 && CTX->server_name[u] != '.') { + u ++; + } + u ++; + n1 -= u; + if ((n2 - 2) == n1 + && eqnocase(&CTX->pad[3], CTX->server_name + u, n1)) + { + T0_PUSHi(-1); + T0_RET(); + } + } + T0_PUSH(0); + + } + break; + case 45: { + /* neg */ + + uint32_t a = T0_POP(); + T0_PUSH(-a); + + } + break; + case 46: { + /* offset-name-element */ + + unsigned san = T0_POP(); + size_t u; + + for (u = 0; u < CTX->num_name_elts; u ++) { + if (CTX->name_elts[u].status == 0) { + const unsigned char *oid; + size_t len, off; + + oid = CTX->name_elts[u].oid; + if (san) { + if (oid[0] != 0 || oid[1] != 0) { + continue; + } + off = 2; + } else { + off = 0; + } + len = oid[off]; + if (len != 0 && len == CTX->pad[0] + && memcmp(oid + off + 1, + CTX->pad + 1, len) == 0) + { + T0_PUSH(u); + T0_RET(); + } + } + } + T0_PUSHi(-1); + + } + break; + case 47: { + /* or */ + + uint32_t b = T0_POP(); + uint32_t a = T0_POP(); + T0_PUSH(a | b); + + } + break; + case 48: { + /* over */ + T0_PUSH(T0_PEEK(1)); + } + break; + case 49: { + /* read-blob-inner */ + + uint32_t len = T0_POP(); + uint32_t addr = T0_POP(); + size_t clen = CTX->hlen; + if (clen > len) { + clen = (size_t)len; + } + if (addr != 0) { + memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen); + } + if (CTX->do_mhash) { + br_multihash_update(&CTX->mhash, CTX->hbuf, clen); + } + if (CTX->do_dn_hash) { + CTX->dn_hash_impl->update( + &CTX->dn_hash.vtable, CTX->hbuf, clen); + } + CTX->hbuf += clen; + CTX->hlen -= clen; + T0_PUSH(addr + clen); + T0_PUSH(len - clen); + + } + break; + case 50: { + /* read8-low */ + + if (CTX->hlen == 0) { + T0_PUSHi(-1); + } else { + unsigned char x = *CTX->hbuf ++; + if (CTX->do_mhash) { + br_multihash_update(&CTX->mhash, &x, 1); + } + if (CTX->do_dn_hash) { + CTX->dn_hash_impl->update(&CTX->dn_hash.vtable, &x, 1); + } + CTX->hlen --; + T0_PUSH(x); + } + + } + break; + case 51: { + /* rot */ + T0_ROT(); + } + break; + case 52: { + /* set16 */ + + uint32_t addr = T0_POP(); + *(uint16_t *)(void *)((unsigned char *)CTX + addr) = T0_POP(); + + } + break; + case 53: { + /* set32 */ + + uint32_t addr = T0_POP(); + *(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP(); + + } + break; + case 54: { + /* set8 */ + + uint32_t addr = T0_POP(); + *((unsigned char *)CTX + addr) = (unsigned char)T0_POP(); + + } + break; + case 55: { + /* start-dn-hash */ + + CTX->dn_hash_impl->init(&CTX->dn_hash.vtable); + CTX->do_dn_hash = 1; + + } + break; + case 56: { + /* start-tbs-hash */ + + br_multihash_init(&CTX->mhash); + CTX->do_mhash = 1; + + } + break; + case 57: { + /* stop-tbs-hash */ + + CTX->do_mhash = 0; + + } + break; + case 58: { + /* swap */ + T0_SWAP(); + } + break; + case 59: { + /* zero-server-name */ + + T0_PUSHi(-(CTX->server_name == NULL)); + + } + break; + } + + } else { + T0_ENTER(ip, rp, t0x); + } + } +t0_exit: + ((t0_context *)t0ctx)->dp = dp; + ((t0_context *)t0ctx)->rp = rp; + ((t0_context *)t0ctx)->ip = ip; +} + + + +/* + * Verify the signature on the certificate with the provided public key. + * This function checks the public key type with regards to the expected + * type. Returned value is either 0 on success, or a non-zero error code. + */ +static int +verify_signature(br_x509_minimal_context *ctx, const br_x509_pkey *pk) +{ + int kt; + + kt = ctx->cert_signer_key_type; + if ((pk->key_type & 0x0F) != kt) { + return BR_ERR_X509_WRONG_KEY_TYPE; + } + switch (kt) { + unsigned char tmp[64]; + + case BR_KEYTYPE_RSA: + if (ctx->irsa == 0) { + return BR_ERR_X509_UNSUPPORTED; + } + if (!ctx->irsa(ctx->cert_sig, ctx->cert_sig_len, + &t0_datablock[ctx->cert_sig_hash_oid], + ctx->cert_sig_hash_len, &pk->key.rsa, tmp)) + { + return BR_ERR_X509_BAD_SIGNATURE; + } + if (memcmp(ctx->tbs_hash, tmp, ctx->cert_sig_hash_len) != 0) { + return BR_ERR_X509_BAD_SIGNATURE; + } + return 0; + + case BR_KEYTYPE_EC: + if (ctx->iecdsa == 0) { + return BR_ERR_X509_UNSUPPORTED; + } + if (!ctx->iecdsa(ctx->iec, ctx->tbs_hash, + ctx->cert_sig_hash_len, &pk->key.ec, + ctx->cert_sig, ctx->cert_sig_len)) + { + return BR_ERR_X509_BAD_SIGNATURE; + } + return 0; + + default: + return BR_ERR_X509_UNSUPPORTED; + } +} + + diff --git a/third_party/bearssl/src/x509_minimal_full.c b/third_party/bearssl/src/x509_minimal_full.c new file mode 100644 index 0000000..2b54426 --- /dev/null +++ b/third_party/bearssl/src/x509_minimal_full.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016 Thomas Pornin <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_x509.h */ +void +br_x509_minimal_init_full(br_x509_minimal_context *xc, + const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num) +{ + /* + * All hash functions are activated. + * Note: the X.509 validation engine will nonetheless refuse to + * validate signatures that use MD5 as hash function. + */ + static const br_hash_class *hashes[] = { + &br_md5_vtable, + &br_sha1_vtable, + &br_sha224_vtable, + &br_sha256_vtable, + &br_sha384_vtable, + &br_sha512_vtable + }; + + int id; + + br_x509_minimal_init(xc, &br_sha256_vtable, + trust_anchors, trust_anchors_num); + br_x509_minimal_set_rsa(xc, &br_rsa_i31_pkcs1_vrfy); + br_x509_minimal_set_ecdsa(xc, + &br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1); + for (id = br_md5_ID; id <= br_sha512_ID; id ++) { + const br_hash_class *hc; + + hc = hashes[id - 1]; + br_x509_minimal_set_hash(xc, id, hc); + } +} |