Merge bitcoin/bitcoin#23577: Follow-ups to Bech32 error detection

a4fe70171b Make Bech32 LocateErrors return error list rather than using out-arg (Samuel Dobson)
2fa4fd1961 Use std::iota instead of manually pushing range (Samuel Dobson)
405c96fc9f Use bounds-checked array lookups in Bech32 error detection code (Samuel Dobson)
28d9c2857f Simplify encoding of e in GF(1024) tables to (1,0) (Samuel Dobson)
14358a029d Replace GF1024 tables and syndrome constants with compile-time generated constexprs. (Samuel Dobson)
63f7b69779 Update release note for bech32 error detection (Samuel Dobson)
c8b9a224e7 Report encoding type in bech32 error message (Samuel Dobson)
92f0cafdca Improve Bech32 boost tests (Samuel Dobson)
bb4d3e9b97 Address review comments for Bech32 error validation (Samuel Dobson)

Pull request description:

  A number of follow-ups and improvements to the bech32 error location code, introduced in #16807.

  Notably, this removes the hardcoded GF1024 tables in favour of constexpr table generation.

ACKs for top commit:
  laanwj:
    Re-ACK a4fe70171b

Tree-SHA512: 6312373c20ebd6636f5797304876fa0d70fa777de2f6c507245f51a652b3d1224ebc55b236c9e11e6956c1e88e65faadab51d53587078efccb451455aa2e2276
This commit is contained in:
W. J. van der Laan 2021-12-06 10:37:53 +01:00
commit 22feb7fee9
No known key found for this signature in database
GPG Key ID: 1E4AED62986CD25D
6 changed files with 231 additions and 321 deletions

View File

@ -1,6 +1,10 @@
Updated RPCs
------------
- The `validateaddress` RPC now optionally returns an `error_locations` array, with the indices of
invalid characters in the address. For example, this will return the locations of up to two Bech32
errors.
- The `validateaddress` RPC now returns an `error_locations` array for invalid
addresses, with the indices of invalid character locations in the address (if
known). For example, this will attempt to locate up to two Bech32 errors, and
return their locations if successful. Success and correctness are only guaranteed
if fewer than two substitution errors have been made.
The error message returned in the `error` field now also returns more specific
errors when decoding fails.

View File

@ -6,7 +6,10 @@
#include <bech32.h>
#include <util/vector.h>
#include <array>
#include <assert.h>
#include <numeric>
#include <optional>
namespace bech32
{
@ -31,182 +34,89 @@ const int8_t CHARSET_REV[128] = {
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
};
// We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
// The defining polynomial of the extension is x^2 + 9x + 23
// Let (e) be a primitive element of GF(1024), that is, a generator of the field.
// Every non-zero element of the field can then be represented as (e)^k for some power k.
// The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024).
// Conversely, GF1024_LOG contains the discrete logarithms of these powers, so
// GF1024_LOG[GF1024_EXP[k]] == k
// Each element v of GF(1024) is encoded as a 10 bit integer in the following way:
// v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)).
//
// The element (e) is encoded as 9 || 15. Given (v), we compute (e)*(v) by multiplying in the following way:
// v0' = 27*v1 + 15*v0
// v1' = 6*v1 + 9*v0
// e*v = v1' || v0'
//
// The following sage code can be used to reproduce both _EXP and _LOG arrays
// GF1024_LOG = [-1] + [0] * 1023
// GF1024_EXP = [1] * 1024
// v = 1
// for i in range(1, 1023):
// v0 = v & 31
// v1 = v >> 5
// v0n = F.fetch_int(27)*F.fetch_int(v1) + F.fetch_int(15)*F.fetch_int(v0)
// v1n = F.fetch_int(6)*F.fetch_int(v1) + F.fetch_int(9)*F.fetch_int(v0)
// v = v1n.integer_representation() << 5 | v0n.integer_representation()
// GF1024_EXP[i] = v
// GF1024_LOG[v] = i
/** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
* The defining polynomial of the extension is x^2 + 9x + 23.
* Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024),
* that is, a generator of the field. Every non-zero element of the field can then be represented
* as (e)^k for some power k.
* The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024).
* Conversely, GF1024_LOG contains the discrete logarithms of these powers, so
* GF1024_LOG[GF1024_EXP[k]] == k.
* The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */
constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables()
{
// Build table for GF(32).
// We use these tables to perform arithmetic in GF(32) below, when constructing the
// tables for GF(1024).
std::array<int8_t, 31> GF32_EXP{};
std::array<int8_t, 32> GF32_LOG{};
const int16_t GF1024_EXP[] = {
1, 303, 635, 446, 997, 640, 121, 142, 959, 420, 350, 438, 166, 39, 543,
335, 831, 691, 117, 632, 719, 97, 107, 374, 558, 797, 54, 150, 858, 877,
724, 1013, 294, 23, 354, 61, 164, 633, 992, 538, 469, 659, 174, 868, 184,
809, 766, 563, 866, 851, 257, 520, 45, 770, 535, 524, 408, 213, 436, 760,
472, 330, 933, 799, 616, 361, 15, 391, 756, 814, 58, 608, 554, 680, 993,
821, 942, 813, 843, 484, 193, 935, 321, 919, 572, 741, 423, 559, 562,
589, 296, 191, 493, 685, 891, 665, 435, 60, 395, 2, 606, 511, 853, 746,
32, 219, 284, 631, 840, 661, 837, 332, 78, 311, 670, 887, 111, 195, 505,
190, 194, 214, 709, 380, 819, 69, 261, 957, 1018, 161, 739, 588, 7, 708,
83, 328, 507, 736, 317, 899, 47, 348, 1000, 345, 882, 245, 367, 996, 943,
514, 304, 90, 804, 295, 312, 793, 387, 833, 249, 921, 660, 618, 823, 496,
722, 30, 782, 225, 892, 93, 480, 372, 112, 738, 867, 636, 890, 950, 968,
386, 622, 642, 551, 369, 234, 846, 382, 365, 442, 592, 343, 986, 122,
1023, 59, 847, 81, 790, 4, 437, 983, 931, 244, 64, 415, 529, 487, 944,
35, 938, 664, 156, 583, 53, 999, 222, 390, 987, 341, 388, 389, 170, 721,
879, 138, 522, 627, 765, 322, 230, 440, 14, 168, 143, 656, 991, 224, 595,
550, 94, 657, 752, 667, 1005, 451, 734, 744, 638, 292, 585, 157, 872,
590, 601, 827, 774, 930, 475, 571, 33, 500, 871, 969, 173, 21, 828, 450,
1009, 147, 960, 705, 201, 228, 998, 497, 1021, 613, 688, 772, 508, 36,
366, 715, 468, 956, 725, 730, 861, 425, 647, 701, 221, 759, 95, 958, 139,
805, 8, 835, 679, 614, 449, 128, 791, 299, 974, 617, 70, 628, 57, 273,
430, 67, 750, 405, 780, 703, 643, 776, 778, 340, 171, 1022, 276, 308,
495, 243, 644, 460, 857, 28, 336, 286, 41, 695, 448, 431, 364, 149, 43,
233, 63, 762, 902, 181, 240, 501, 584, 434, 275, 1008, 444, 443, 895,
812, 612, 927, 383, 66, 961, 1006, 690, 346, 3, 881, 900, 747, 271, 672,
162, 402, 456, 748, 971, 755, 490, 105, 808, 977, 72, 732, 182, 897, 625,
163, 189, 947, 850, 46, 115, 403, 231, 151, 629, 278, 874, 16, 934, 110,
492, 898, 256, 807, 598, 700, 498, 140, 481, 91, 523, 860, 134, 252, 771,
824, 119, 38, 816, 820, 641, 342, 757, 513, 577, 990, 463, 40, 920, 955,
17, 649, 533, 82, 103, 896, 862, 728, 259, 86, 466, 87, 253, 556, 323,
457, 963, 432, 845, 527, 745, 849, 863, 1015, 888, 488, 567, 727, 132,
674, 764, 109, 669, 6, 1003, 552, 246, 542, 96, 324, 781, 912, 248, 694,
239, 980, 210, 880, 683, 144, 177, 325, 546, 491, 326, 339, 623, 941, 92,
207, 783, 462, 263, 483, 517, 1012, 9, 620, 220, 984, 548, 512, 878, 421,
113, 973, 280, 962, 159, 310, 945, 268, 465, 806, 889, 199, 76, 873, 865,
34, 645, 227, 290, 418, 693, 926, 80, 569, 639, 11, 50, 291, 141, 206,
544, 949, 185, 518, 133, 909, 135, 467, 376, 646, 914, 678, 841, 954,
318, 242, 939, 951, 743, 1017, 976, 359, 167, 264, 100, 241, 218, 51, 12,
758, 368, 453, 309, 192, 648, 826, 553, 473, 101, 478, 673, 397, 1001,
118, 265, 331, 650, 356, 982, 652, 655, 510, 634, 145, 414, 830, 924,
526, 966, 298, 737, 18, 504, 401, 697, 360, 288, 1020, 842, 203, 698,
537, 676, 279, 581, 619, 536, 907, 876, 1019, 398, 152, 1010, 994, 68,
42, 454, 580, 836, 99, 565, 137, 379, 503, 22, 77, 582, 282, 412, 352,
611, 347, 300, 266, 570, 270, 911, 729, 44, 557, 108, 946, 637, 597, 461,
630, 615, 238, 763, 681, 718, 334, 528, 200, 459, 413, 79, 24, 229, 713,
906, 579, 384, 48, 893, 370, 923, 202, 917, 98, 794, 754, 197, 530, 662,
52, 712, 677, 56, 62, 981, 509, 267, 789, 885, 561, 316, 684, 596, 226,
13, 985, 779, 123, 720, 576, 753, 948, 406, 125, 315, 104, 519, 426, 502,
313, 566, 1016, 767, 796, 281, 749, 740, 136, 84, 908, 424, 936, 198,
355, 274, 735, 967, 5, 154, 428, 541, 785, 704, 486, 671, 600, 532, 381,
540, 574, 187, 88, 378, 216, 621, 499, 419, 922, 485, 494, 476, 255, 114,
188, 668, 297, 400, 918, 787, 158, 25, 458, 178, 564, 422, 768, 73, 1011,
717, 575, 404, 547, 196, 829, 237, 394, 301, 37, 65, 176, 106, 89, 85,
675, 979, 534, 803, 995, 363, 593, 120, 417, 452, 26, 699, 822, 223, 169,
416, 235, 609, 773, 211, 607, 208, 302, 852, 965, 603, 357, 761, 247,
817, 539, 250, 232, 272, 129, 568, 848, 624, 396, 710, 525, 183, 686, 10,
285, 856, 307, 811, 160, 972, 55, 441, 289, 723, 305, 373, 351, 153, 733,
409, 506, 975, 838, 573, 970, 988, 913, 471, 205, 337, 49, 594, 777, 549,
815, 277, 27, 916, 333, 353, 844, 800, 146, 751, 186, 375, 769, 358, 392,
883, 474, 788, 602, 74, 130, 329, 212, 155, 131, 102, 687, 293, 870, 742,
726, 427, 217, 834, 904, 29, 127, 869, 407, 338, 832, 470, 482, 810, 399,
439, 393, 604, 929, 682, 447, 714, 251, 455, 875, 319, 477, 464, 521,
258, 377, 937, 489, 792, 172, 314, 327, 124, 20, 531, 953, 591, 886, 320,
696, 71, 859, 578, 175, 587, 707, 663, 283, 179, 795, 989, 702, 940, 371,
692, 689, 555, 903, 410, 651, 75, 429, 818, 362, 894, 515, 31, 545, 666,
706, 952, 864, 269, 254, 349, 711, 802, 716, 784, 1007, 925, 801, 445,
148, 260, 658, 385, 287, 262, 204, 126, 586, 1004, 236, 165, 854, 411,
932, 560, 19, 215, 1002, 775, 653, 928, 901, 964, 884, 798, 839, 786,
433, 610, 116, 855, 180, 479, 910, 1014, 599, 915, 905, 306, 516, 731,
626, 978, 825, 344, 605, 654, 209
};
// As above, GF1024_EXP contains all elements of GF(1024) except 0
static_assert(std::size(GF1024_EXP) == 1023, "GF1024_EXP length should be 1023");
// fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1.
// Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001.
const int fmod = 41;
const int16_t GF1024_LOG[] = {
-1, 0, 99, 363, 198, 726, 462, 132, 297, 495, 825, 528, 561, 693, 231,
66, 396, 429, 594, 990, 924, 264, 627, 33, 660, 759, 792, 858, 330, 891,
165, 957, 104, 259, 518, 208, 280, 776, 416, 13, 426, 333, 618, 339, 641,
52, 388, 140, 666, 852, 529, 560, 678, 213, 26, 832, 681, 309, 70, 194,
97, 35, 682, 341, 203, 777, 358, 312, 617, 125, 307, 931, 379, 765, 875,
951, 515, 628, 112, 659, 525, 196, 432, 134, 717, 781, 438, 440, 740,
780, 151, 408, 487, 169, 239, 293, 467, 21, 672, 622, 557, 571, 881, 433,
704, 376, 779, 22, 643, 460, 398, 116, 172, 503, 751, 389, 1004, 18, 576,
415, 789, 6, 192, 696, 923, 702, 981, 892, 302, 816, 876, 880, 457, 537,
411, 539, 716, 624, 224, 295, 406, 531, 7, 233, 478, 586, 864, 268, 974,
338, 27, 392, 614, 839, 727, 879, 211, 250, 758, 507, 830, 129, 369, 384,
36, 985, 12, 555, 232, 796, 221, 321, 920, 263, 42, 934, 778, 479, 761,
939, 1006, 344, 381, 823, 44, 535, 866, 739, 752, 385, 119, 91, 566, 80,
120, 117, 771, 675, 721, 514, 656, 271, 670, 602, 980, 850, 532, 488,
803, 1022, 475, 801, 878, 57, 121, 991, 742, 888, 559, 105, 497, 291,
215, 795, 236, 167, 692, 520, 272, 661, 229, 391, 814, 340, 184, 798,
984, 773, 650, 473, 345, 558, 548, 326, 202, 145, 465, 810, 471, 158,
813, 908, 412, 441, 964, 750, 401, 50, 915, 437, 975, 126, 979, 491, 556,
577, 636, 685, 510, 963, 638, 367, 815, 310, 723, 349, 323, 857, 394,
606, 505, 713, 630, 938, 106, 826, 332, 978, 599, 834, 521, 530, 248,
883, 32, 153, 90, 754, 592, 304, 635, 775, 804, 1, 150, 836, 1013, 828,
324, 565, 508, 113, 154, 708, 921, 703, 689, 138, 547, 911, 929, 82, 228,
443, 468, 480, 483, 922, 135, 877, 61, 578, 111, 860, 654, 15, 331, 851,
895, 484, 320, 218, 420, 190, 1019, 143, 362, 634, 141, 965, 10, 838,
632, 861, 34, 722, 580, 808, 869, 554, 598, 65, 954, 787, 337, 187, 281,
146, 563, 183, 668, 944, 171, 837, 23, 867, 541, 916, 741, 625, 123, 736,
186, 357, 665, 977, 179, 156, 219, 220, 216, 67, 870, 902, 774, 98, 820,
574, 613, 900, 755, 596, 370, 390, 769, 314, 701, 894, 56, 841, 949, 987,
631, 658, 587, 204, 797, 790, 522, 745, 9, 502, 763, 86, 719, 288, 706,
887, 728, 952, 311, 336, 446, 1002, 348, 96, 58, 199, 11, 901, 230, 833,
188, 352, 351, 973, 3, 906, 335, 301, 266, 244, 791, 564, 619, 909, 371,
444, 760, 657, 328, 647, 490, 425, 913, 511, 439, 540, 283, 40, 897, 849,
60, 570, 872, 257, 749, 912, 572, 1007, 170, 407, 898, 492, 79, 747, 732,
206, 454, 918, 375, 482, 399, 92, 748, 325, 163, 274, 405, 744, 260, 346,
707, 626, 595, 118, 842, 136, 279, 684, 584, 101, 500, 422, 149, 956,
1014, 493, 536, 705, 51, 914, 225, 409, 55, 822, 590, 448, 655, 205, 676,
925, 735, 431, 784, 54, 609, 604, 39, 812, 737, 729, 466, 14, 533, 958,
481, 770, 499, 855, 238, 182, 464, 569, 72, 947, 442, 642, 24, 87, 989,
688, 88, 47, 762, 623, 709, 455, 817, 526, 637, 258, 84, 845, 738, 768,
698, 423, 933, 664, 620, 607, 629, 212, 347, 249, 982, 935, 131, 89, 252,
927, 189, 788, 853, 237, 691, 646, 403, 1010, 734, 253, 874, 807, 903,
1020, 100, 802, 71, 799, 1003, 633, 355, 276, 300, 649, 64, 306, 161,
608, 496, 743, 180, 485, 819, 383, 1016, 226, 308, 393, 648, 107, 19, 37,
585, 2, 175, 645, 247, 527, 5, 419, 181, 317, 327, 519, 542, 289, 567,
430, 579, 950, 582, 994, 1021, 583, 234, 240, 976, 41, 160, 109, 677,
937, 210, 95, 959, 242, 753, 461, 114, 733, 368, 573, 458, 782, 605, 680,
544, 299, 73, 652, 905, 477, 690, 93, 824, 882, 277, 946, 361, 17, 945,
523, 472, 334, 930, 597, 603, 793, 404, 290, 942, 316, 731, 270, 960,
936, 133, 122, 821, 966, 679, 662, 907, 282, 968, 767, 653, 20, 697, 222,
164, 835, 30, 285, 886, 456, 436, 640, 286, 1015, 380, 840, 245, 724,
137, 593, 173, 130, 715, 85, 885, 551, 246, 449, 103, 366, 372, 714, 313,
865, 241, 699, 674, 374, 68, 421, 562, 292, 59, 809, 342, 651, 459, 227,
46, 711, 764, 868, 53, 413, 278, 800, 255, 993, 318, 854, 319, 695, 315,
469, 166, 489, 969, 730, 1001, 757, 873, 686, 197, 303, 919, 155, 673,
940, 712, 25, 999, 63, 863, 972, 967, 785, 152, 296, 512, 402, 377, 45,
899, 829, 354, 77, 69, 856, 417, 811, 953, 124, 418, 75, 794, 162, 414,
1018, 568, 254, 265, 772, 588, 16, 896, 157, 889, 298, 621, 110, 844,
1000, 108, 545, 601, 78, 862, 447, 185, 195, 818, 450, 387, 49, 805, 102,
986, 1005, 827, 329, 28, 932, 410, 287, 435, 451, 962, 517, 48, 174, 43,
893, 884, 261, 251, 516, 395, 910, 611, 29, 501, 223, 476, 364, 144, 871,
998, 687, 928, 115, 453, 513, 176, 94, 168, 667, 955, 353, 434, 382, 400,
139, 365, 996, 343, 948, 890, 1012, 663, 610, 718, 538, 1008, 639, 470,
848, 543, 1011, 859, 671, 756, 83, 427, 159, 746, 669, 589, 971, 524,
356, 995, 904, 256, 201, 988, 62, 397, 81, 720, 917, 209, 549, 943, 486,
76, 148, 207, 509, 644, 386, 700, 534, 177, 550, 961, 926, 546, 428, 284,
127, 294, 8, 269, 359, 506, 445, 997, 806, 591, 725, 178, 262, 846, 373,
831, 504, 305, 843, 553, 378, 1017, 783, 474, 683, 581, 200, 498, 694,
191, 217, 847, 941, 424, 235, 38, 74, 616, 786, 147, 4, 273, 214, 142,
575, 992, 463, 983, 243, 360, 970, 350, 267, 615, 766, 494, 31, 1009,
452, 710, 552, 128, 612, 600, 275, 322, 193
};
static_assert(std::size(GF1024_LOG) == 1024, "GF1024_EXP length should be 1024");
// Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is,
// 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial
// b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod).
// For example, 00001 = 1 is the multiplicative identity.
GF32_EXP[0] = 1;
GF32_LOG[0] = -1;
GF32_LOG[1] = 0;
int v = 1;
for (int i = 1; i < 31; ++i) {
// Multiplication by x is the same as shifting left by 1, as
// every coefficient of the polynomial is moved up one place.
v = v << 1;
// If the polynomial now has an x^5 term, we subtract fmod from it
// to remain working modulo fmod. Subtraction is the same as XOR in characteristic
// 2 fields.
if (v & 32) v ^= fmod;
GF32_EXP[i] = v;
GF32_LOG[v] = i;
}
// Build table for GF(1024)
std::array<int16_t, 1023> GF1024_EXP{};
std::array<int16_t, 1024> GF1024_LOG{};
GF1024_EXP[0] = 1;
GF1024_LOG[0] = -1;
GF1024_LOG[1] = 0;
// Each element v of GF(1024) is encoded as a 10 bit integer in the following way:
// v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)).
// The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element
// a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v),
// we compute (e)*(v) by multiplying in the following way:
//
// v0' = 23*v1
// v1' = 9*v1 + v0
// e*v = v1' || v0'
//
// Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32)
// is done using the log/exp tables:
// e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ]
// for non-zero a and b.
v = 1;
for (int i = 1; i < 1023; ++i) {
int v0 = v & 31;
int v1 = v >> 5;
int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0;
int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0;
v = v1n << 5 | v0n;
GF1024_EXP[i] = v;
GF1024_LOG[v] = i;
}
return std::make_pair(GF1024_EXP, GF1024_LOG);
}
constexpr auto tables = GenerateGFTables();
constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first;
constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second;
/* Determine the final constant to use for the specified encoding. */
uint32_t EncodingConstant(Encoding encoding) {
@ -313,69 +223,58 @@ uint32_t PolyMod(const data& v)
* codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all
* of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j).
*
* Syndrome returns the three values packed into a 30-bit integer, where each 10 bits is one value.
* Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6
*
* To compute R((e)^j), we are really computing:
* r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6
*
* Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed.
* But even more than that, we can consider each coefficient as a bit-string.
* For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then:
* r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j)
* where all the (2^i*(e)^j) are constants and can be precomputed.
*
* Then we just add each of these corresponding constants to our final value based on the
* bit values b_i. This is exactly what is done in the Syndrome function below.
*/
constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() {
std::array<uint32_t, 25> SYNDROME_CONSTS{};
for (int k = 1; k < 6; ++k) {
for (int shift = 0; shift < 5; ++shift) {
int16_t b = GF1024_LOG.at(1 << shift);
int16_t c0 = GF1024_EXP.at((997*k + b) % 1023);
int16_t c1 = GF1024_EXP.at((998*k + b) % 1023);
int16_t c2 = GF1024_EXP.at((999*k + b) % 1023);
uint32_t c = c2 << 20 | c1 << 10 | c0;
int ind = 5*(k-1) + shift;
SYNDROME_CONSTS[ind] = c;
}
}
return SYNDROME_CONSTS;
}
constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants();
/**
* Syndrome returns the three values s_997, s_998, and s_999 described above,
* packed into a 30-bit integer, where each group of 10 bits encodes one value.
*/
uint32_t Syndrome(const uint32_t residue) {
// Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6
// low is the first 5 bits, corresponding to the r6 in the residue
// (the constant term of the polynomial).
uint32_t low = residue & 0x1f;
// Recall that XOR corresponds to addition in a characteristic 2 field.
//
// To compute R((e)^j), we are really computing:
// r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6
// Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed
// for efficiency. But even more than that, we can consider each coefficient as a bit-string.
// For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then:
// r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j)
// where all the (2^i*(e)^j) are constants and can be precomputed. Then we just add each
// of these corresponding constants to our final value based on the bit values b_i.
// This is exactly what is done below. Note that all three values of s_j for j in (997, 998,
// 999) are computed simultaneously.
//
// We begin by setting s_j = low = r6 for all three values of j, because these are unconditional.
uint32_t result = low ^ (low << 10) ^ (low << 20);
// Then for each following bit, we add the corresponding precomputed constant if the bit is 1.
// For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10
// bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in
// GF1024_EXP above).
//
// The following sage code reproduces these constants:
// for k in range(1, 6):
// for b in [1,2,4,8,16]:
// c0 = GF1024_EXP[(997*k + GF1024_LOG[b]) % 1023]
// c1 = GF1024_EXP[(998*k + GF1024_LOG[b]) % 1023]
// c2 = GF1024_EXP[(999*k + GF1024_LOG[b]) % 1023]
// c = c2 << 20 | c1 << 10 | c0
// print("0x%x" % c)
return low ^ (low << 10) ^ (low << 20) ^
((residue >> 5) & 1 ? 0x31edd3c4 : 0) ^
((residue >> 6) & 1 ? 0x335f86a8 : 0) ^
((residue >> 7) & 1 ? 0x363b8870 : 0) ^
((residue >> 8) & 1 ? 0x3e6390c9 : 0) ^
((residue >> 9) & 1 ? 0x2ec72192 : 0) ^
((residue >> 10) & 1 ? 0x1046f79d : 0) ^
((residue >> 11) & 1 ? 0x208d4e33 : 0) ^
((residue >> 12) & 1 ? 0x130ebd6f : 0) ^
((residue >> 13) & 1 ? 0x2499fade : 0) ^
((residue >> 14) & 1 ? 0x1b27d4b5 : 0) ^
((residue >> 15) & 1 ? 0x04be1eb4 : 0) ^
((residue >> 16) & 1 ? 0x0968b861 : 0) ^
((residue >> 17) & 1 ? 0x1055f0c2 : 0) ^
((residue >> 18) & 1 ? 0x20ab4584 : 0) ^
((residue >> 19) & 1 ? 0x1342af08 : 0) ^
((residue >> 20) & 1 ? 0x24f1f318 : 0) ^
((residue >> 21) & 1 ? 0x1be34739 : 0) ^
((residue >> 22) & 1 ? 0x35562f7b : 0) ^
((residue >> 23) & 1 ? 0x3a3c5bff : 0) ^
((residue >> 24) & 1 ? 0x266c96f7 : 0) ^
((residue >> 25) & 1 ? 0x25c78b65 : 0) ^
((residue >> 26) & 1 ? 0x1b1f13ea : 0) ^
((residue >> 27) & 1 ? 0x34baa2f4 : 0) ^
((residue >> 28) & 1 ? 0x3b61c0e1 : 0) ^
((residue >> 29) & 1 ? 0x265325c2 : 0);
// GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999)
// simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field.
for (int i = 0; i < 25; ++i) {
result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0);
}
return result;
}
/** Convert to lower case. */
@ -384,14 +283,7 @@ inline unsigned char LowerCase(unsigned char c)
return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c;
}
void push_range(int from, int to, std::vector<int>& vec)
{
for (int i = from; i < to; i++) {
vec.push_back(i);
}
}
/** Return index of first invalid character in a Bech32 string. */
/** Return indices of invalid characters in a Bech32 string. */
bool CheckCharacters(const std::string& str, std::vector<int>& errors) {
bool lower = false, upper = false;
for (size_t i = 0; i < str.size(); ++i) {
@ -504,22 +396,28 @@ DecodeResult Decode(const std::string& str) {
}
/** Find index of an incorrect character in a Bech32 string. */
std::string LocateErrors(const std::string& str, std::vector<int>& error_locations) {
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
std::vector<int> error_locations{};
if (str.size() > 90) {
push_range(90, str.size(), error_locations);
return "Bech32 string too long";
error_locations.resize(str.size() - 90);
std::iota(error_locations.begin(), error_locations.end(), 90);
return std::make_pair("Bech32 string too long", std::move(error_locations));
}
if (!CheckCharacters(str, error_locations)){
return "Invalid character or mixed case";
return std::make_pair("Invalid character or mixed case", std::move(error_locations));
}
size_t pos = str.rfind('1');
if (pos == str.npos) {
return "Missing separator";
return std::make_pair("Missing separator", std::vector<int>{});
}
if (pos == 0 || pos + 7 > str.size()) {
error_locations.push_back(pos);
return "Invalid separator position";
return std::make_pair("Invalid separator position", std::move(error_locations));
}
std::string hrp;
for (size_t i = 0; i < pos; ++i) {
hrp += LowerCase(str[i]);
@ -532,13 +430,14 @@ std::string LocateErrors(const std::string& str, std::vector<int>& error_locatio
int8_t rev = CHARSET_REV[c];
if (rev == -1) {
error_locations.push_back(i);
return "Invalid Base 32 character";
return std::make_pair("Invalid Base 32 character", std::move(error_locations));
}
values[i - pos - 1] = rev;
}
// We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors
// We can't simply use the segwit version, because that may be one of the errors
std::optional<Encoding> error_encoding;
for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
std::vector<int> possible_errors;
// Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
@ -558,9 +457,9 @@ std::string LocateErrors(const std::string& str, std::vector<int>& error_locatio
int s2 = syn >> 20;
// Get the discrete logs of these values in GF1024 for more efficient computation
int l_s0 = GF1024_LOG[s0];
int l_s1 = GF1024_LOG[s1];
int l_s2 = GF1024_LOG[s2];
int l_s0 = GF1024_LOG.at(s0);
int l_s1 = GF1024_LOG.at(s1);
int l_s2 = GF1024_LOG.at(s2);
// First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1
// Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1)
@ -595,15 +494,15 @@ std::string LocateErrors(const std::string& str, std::vector<int>& error_locatio
// (Because we are working in characteristic 2.)
// = e2*(e)^(998*p2) ((e)^p2 + (e)^p1)
//
int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP[(l_s1 + p1) % 1023]);
int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023));
if (s2_s1p1 == 0) continue;
int l_s2_s1p1 = GF1024_LOG[s2_s1p1];
int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1);
// Similarly, s1 + s0*(e)^p1
// = e2*(e)^(997*p2) ((e)^p2 + (e)^p1)
int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p1) % 1023]);
int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023));
if (s1_s0p1 == 0) continue;
int l_s1_s0p1 = GF1024_LOG[s1_s0p1];
int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1);
// So, putting these together, we can compute the second error position as
// (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1)
@ -616,12 +515,12 @@ std::string LocateErrors(const std::string& str, std::vector<int>& error_locatio
// Now we want to compute the error values e1 and e2.
// Similar to above, we compute s1 + s0*(e)^p2
// = e1*(e)^(997*p1) ((e)^p1 + (e)^p2)
int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p2) % 1023]);
int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023));
if (s1_s0p2 == 0) continue;
int l_s1_s0p2 = GF1024_LOG[s1_s0p2];
int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2);
// And compute (the log of) 1/((e)^p1 + (e)^p2))
int inv_p1_p2 = 1023 - GF1024_LOG[GF1024_EXP[p1] ^ GF1024_EXP[p2]];
int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2));
// Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2)))
// = e2*(e)^(997*p2)
@ -651,15 +550,19 @@ std::string LocateErrors(const std::string& str, std::vector<int>& error_locatio
}
} else {
// No errors
error_locations.clear();
return "";
return std::make_pair("", std::vector<int>{});
}
if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) {
error_locations = std::move(possible_errors);
if (!error_locations.empty()) error_encoding = encoding;
}
}
return "Invalid checksum";
std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum"
: error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum"
: "Invalid checksum";
return std::make_pair(error_message, std::move(error_locations));
}
} // namespace bech32

View File

@ -46,7 +46,7 @@ struct DecodeResult
DecodeResult Decode(const std::string& str);
/** Return the positions of errors in a Bech32 string. */
std::string LocateErrors(const std::string& str, std::vector<int>& error_locations);
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str);
} // namespace bech32

View File

@ -102,17 +102,20 @@ CTxDestination DecodeDestination(const std::string& str, const CChainParams& par
return ScriptHash(hash);
}
if (!std::equal(script_prefix.begin(), script_prefix.end(), data.begin()) &&
!std::equal(pubkey_prefix.begin(), pubkey_prefix.end(), data.begin())) {
error_str = "Invalid prefix for Base58-encoded address";
} else {
// If the prefix of data matches either the script or pubkey prefix, the length must have been wrong
if ((data.size() >= script_prefix.size() &&
std::equal(script_prefix.begin(), script_prefix.end(), data.begin())) ||
(data.size() >= pubkey_prefix.size() &&
std::equal(pubkey_prefix.begin(), pubkey_prefix.end(), data.begin()))) {
error_str = "Invalid length for Base58 address";
} else {
error_str = "Invalid prefix for Base58-encoded address";
}
return CNoDestination();
} else if (!is_bech32) {
// Try Base58 decoding without the checksum, using a much larger max length
if (!DecodeBase58(str, data, 100)) {
error_str = "Invalid HRP or Base58 character in address";
error_str = "Not a valid Bech32 or Base58 encoding";
} else {
error_str = "Invalid checksum or length of Base58 address";
}
@ -185,13 +188,9 @@ CTxDestination DecodeDestination(const std::string& str, const CChainParams& par
}
// Perform Bech32 error location
if (!error_locations) {
std::vector<int> dummy_errors;
error_str = bech32::LocateErrors(str, dummy_errors);
} else {
error_str = bech32::LocateErrors(str, *error_locations);
}
auto res = bech32::LocateErrors(str);
error_str = res.first;
if (error_locations) *error_locations = std::move(res.second);
return CNoDestination();
}
} // namespace

View File

@ -70,34 +70,36 @@ BOOST_AUTO_TEST_CASE(bech32_testvectors_invalid)
"a12UEL5L",
"A12uEL5L",
"abcdef1qpzrz9x8gf2tvdw0s3jn54khce6mua7lmqqqxw",
"test1zg69w7y6hn0aqy352euf40x77qddq3dc",
};
static const std::pair<std::string, int> ERRORS[] = {
{"Invalid character or mixed case", 0},
{"Invalid character or mixed case", 0},
{"Invalid character or mixed case", 0},
{"Bech32 string too long", 90},
{"Missing separator", -1},
{"Invalid separator position", 0},
{"Invalid Base 32 character", 2},
{"Invalid separator position", 2},
{"Invalid character or mixed case", 8},
{"Invalid checksum", -1}, // The checksum is calculated using the uppercase form so the entire string is invalid, not just a few characters
{"Invalid separator position", 0},
{"Invalid separator position", 0},
{"Invalid character or mixed case", 3},
{"Invalid character or mixed case", 3},
{"Invalid checksum", 11}
static const std::pair<std::string, std::vector<int>> ERRORS[] = {
{"Invalid character or mixed case", {0}},
{"Invalid character or mixed case", {0}},
{"Invalid character or mixed case", {0}},
{"Bech32 string too long", {90}},
{"Missing separator", {}},
{"Invalid separator position", {0}},
{"Invalid Base 32 character", {2}},
{"Invalid separator position", {2}},
{"Invalid character or mixed case", {8}},
{"Invalid checksum", {}}, // The checksum is calculated using the uppercase form so the entire string is invalid, not just a few characters
{"Invalid separator position", {0}},
{"Invalid separator position", {0}},
{"Invalid character or mixed case", {3, 4, 5, 7}},
{"Invalid character or mixed case", {3}},
{"Invalid Bech32 checksum", {11}},
{"Invalid Bech32 checksum", {9, 16}},
};
static_assert(std::size(CASES) == std::size(ERRORS), "Bech32 CASES and ERRORS should have the same length");
int i = 0;
for (const std::string& str : CASES) {
const auto& err = ERRORS[i];
const auto dec = bech32::Decode(str);
BOOST_CHECK(dec.encoding == bech32::Encoding::INVALID);
std::vector<int> error_locations;
std::string error = bech32::LocateErrors(str, error_locations);
auto [error, error_locations] = bech32::LocateErrors(str);
BOOST_CHECK_EQUAL(err.first, error);
if (err.second == -1) BOOST_CHECK(error_locations.empty());
else BOOST_CHECK_EQUAL(err.second, error_locations[0]);
BOOST_CHECK(err.second == error_locations);
i++;
}
}
@ -120,34 +122,36 @@ BOOST_AUTO_TEST_CASE(bech32m_testvectors_invalid)
"16plkw9",
"1p2gdwpf",
"abcdef1l7aum6echk45nj2s0wdvt2fg8x9yrzpqzd3ryx",
"test1zg69v7y60n00qy352euf40x77qcusag6",
};
static const std::pair<std::string, int> ERRORS[] = {
{"Invalid character or mixed case", 0},
{"Invalid character or mixed case", 0},
{"Invalid character or mixed case", 0},
{"Bech32 string too long", 90},
{"Missing separator", -1},
{"Invalid separator position", 0},
{"Invalid Base 32 character", 2},
{"Invalid Base 32 character", 3},
{"Invalid separator position", 2},
{"Invalid Base 32 character", 8},
{"Invalid Base 32 character", 7},
{"Invalid checksum", -1},
{"Invalid separator position", 0},
{"Invalid separator position", 0},
{"Invalid checksum", 21},
static const std::pair<std::string, std::vector<int>> ERRORS[] = {
{"Invalid character or mixed case", {0}},
{"Invalid character or mixed case", {0}},
{"Invalid character or mixed case", {0}},
{"Bech32 string too long", {90}},
{"Missing separator", {}},
{"Invalid separator position", {0}},
{"Invalid Base 32 character", {2}},
{"Invalid Base 32 character", {3}},
{"Invalid separator position", {2}},
{"Invalid Base 32 character", {8}},
{"Invalid Base 32 character", {7}},
{"Invalid checksum", {}},
{"Invalid separator position", {0}},
{"Invalid separator position", {0}},
{"Invalid Bech32m checksum", {21}},
{"Invalid Bech32m checksum", {13, 32}},
};
static_assert(std::size(CASES) == std::size(ERRORS), "Bech32m CASES and ERRORS should have the same length");
int i = 0;
for (const std::string& str : CASES) {
const auto& err = ERRORS[i];
const auto dec = bech32::Decode(str);
BOOST_CHECK(dec.encoding == bech32::Encoding::INVALID);
std::vector<int> error_locations;
std::string error = bech32::LocateErrors(str, error_locations);
auto [error, error_locations] = bech32::LocateErrors(str);
BOOST_CHECK_EQUAL(err.first, error);
if (err.second == -1) BOOST_CHECK(error_locations.empty());
else BOOST_CHECK_EQUAL(err.second, error_locations[0]);
BOOST_CHECK(err.second == error_locations);
i++;
}
}

View File

@ -61,19 +61,19 @@ class InvalidAddressErrorMessageTest(BitcoinTestFramework):
def test_validateaddress(self):
# Invalid Bech32
self.check_invalid(BECH32_INVALID_SIZE, 'Invalid Bech32 address data size')
self.check_invalid(BECH32_INVALID_PREFIX, 'Invalid HRP or Base58 character in address')
self.check_invalid(BECH32_INVALID_PREFIX, 'Not a valid Bech32 or Base58 encoding')
self.check_invalid(BECH32_INVALID_BECH32, 'Version 1+ witness address must use Bech32m checksum')
self.check_invalid(BECH32_INVALID_BECH32M, 'Version 0 witness address must use Bech32 checksum')
self.check_invalid(BECH32_INVALID_VERSION, 'Invalid Bech32 address witness version')
self.check_invalid(BECH32_INVALID_V0_SIZE, 'Invalid Bech32 v0 address data size')
self.check_invalid(BECH32_TOO_LONG, 'Bech32 string too long', list(range(90, 108)))
self.check_invalid(BECH32_ONE_ERROR, 'Invalid checksum', [9])
self.check_invalid(BECH32_TWO_ERRORS, 'Invalid checksum', [22, 43])
self.check_invalid(BECH32_ONE_ERROR_CAPITALS, 'Invalid checksum', [38])
self.check_invalid(BECH32_ONE_ERROR, 'Invalid Bech32 checksum', [9])
self.check_invalid(BECH32_TWO_ERRORS, 'Invalid Bech32 checksum', [22, 43])
self.check_invalid(BECH32_ONE_ERROR_CAPITALS, 'Invalid Bech32 checksum', [38])
self.check_invalid(BECH32_NO_SEPARATOR, 'Missing separator')
self.check_invalid(BECH32_INVALID_CHAR, 'Invalid Base 32 character', [8])
self.check_invalid(BECH32_MULTISIG_TWO_ERRORS, 'Invalid checksum', [19, 30])
self.check_invalid(BECH32_WRONG_VERSION, 'Invalid checksum', [5])
self.check_invalid(BECH32_MULTISIG_TWO_ERRORS, 'Invalid Bech32 checksum', [19, 30])
self.check_invalid(BECH32_WRONG_VERSION, 'Invalid Bech32 checksum', [5])
# Valid Bech32
self.check_valid(BECH32_VALID)
@ -89,19 +89,19 @@ class InvalidAddressErrorMessageTest(BitcoinTestFramework):
self.check_valid(BASE58_VALID)
# Invalid address format
self.check_invalid(INVALID_ADDRESS, 'Invalid HRP or Base58 character in address')
self.check_invalid(INVALID_ADDRESS_2, 'Invalid HRP or Base58 character in address')
self.check_invalid(INVALID_ADDRESS, 'Not a valid Bech32 or Base58 encoding')
self.check_invalid(INVALID_ADDRESS_2, 'Not a valid Bech32 or Base58 encoding')
def test_getaddressinfo(self):
node = self.nodes[0]
assert_raises_rpc_error(-5, "Invalid Bech32 address data size", node.getaddressinfo, BECH32_INVALID_SIZE)
assert_raises_rpc_error(-5, "Invalid HRP or Base58 character in address", node.getaddressinfo, BECH32_INVALID_PREFIX)
assert_raises_rpc_error(-5, "Not a valid Bech32 or Base58 encoding", node.getaddressinfo, BECH32_INVALID_PREFIX)
assert_raises_rpc_error(-5, "Invalid prefix for Base58-encoded address", node.getaddressinfo, BASE58_INVALID_PREFIX)
assert_raises_rpc_error(-5, "Invalid HRP or Base58 character in address", node.getaddressinfo, INVALID_ADDRESS)
assert_raises_rpc_error(-5, "Not a valid Bech32 or Base58 encoding", node.getaddressinfo, INVALID_ADDRESS)
def run_test(self):
self.test_validateaddress()