Introduced NFC normalization for strings in CryptoUtil and added String hashing functions (#2102)

This commit is contained in:
Nadav Kohen 2020-10-01 14:37:28 -05:00 committed by GitHub
parent ad6c2563bd
commit 4c202fd016
2 changed files with 151 additions and 1 deletions

View File

@ -3,6 +3,7 @@ package org.bitcoins.crypto
import org.bitcoins.core.util.BytesUtil
import org.bitcoins.testkit.core.gen.{CryptoGenerators, NumberGenerator}
import org.bitcoins.testkit.util.BitcoinSUnitTest
import org.scalacheck.Gen
import scodec.bits._
/**
@ -107,4 +108,121 @@ class CryptoUtilTest extends BitcoinSUnitTest {
}
}
// From https://github.com/dgarage/NDLC/blob/d816c0c517611b336f09ceaa43d400ecb5ab909b/NDLC.Tests/Data/normalization_tests.json
it must "normalize and serialize strings correctly" in {
val singletons = Vector("\u00c5", "\u212b", "\u0041\u030a")
assert(
singletons
.map(CryptoUtil.normalize)
.forall(_ == "\u00c5")
)
assert(
CryptoUtil.serializeForHash("\u00c5") == ByteVector.fromValidHex("c385")
)
val canonicalComposites = Vector("\u00f4", "\u006f\u0302")
assert(
canonicalComposites
.map(CryptoUtil.normalize)
.forall(_ == "\u00f4")
)
assert(
CryptoUtil.serializeForHash("\u00f4") == ByteVector.fromValidHex("c3b4")
)
val multipleCombiningMarks = Vector("\u1e69", "\u0073\u0323\u0307")
assert(
multipleCombiningMarks.map(CryptoUtil.normalize).forall(_ == "\u1e69")
)
assert(
CryptoUtil.serializeForHash("\u1e69") == ByteVector.fromValidHex("e1b9a9")
)
val compatibilityComposite = "\ufb01"
assert(
CryptoUtil.serializeForHash(compatibilityComposite) == ByteVector
.fromValidHex("efac81")
)
val nonComposite = "fi"
assert(
CryptoUtil.serializeForHash(nonComposite) == ByteVector.fromValidHex(
"6669")
)
val accentString = "éléphant"
assert(
CryptoUtil.serializeForHash(accentString) == ByteVector.fromValidHex(
"c3a96cc3a97068616e74")
)
}
// From https://github.com/dgarage/NDLC/blob/d816c0c517611b336f09ceaa43d400ecb5ab909b/NDLC.Tests/Data/normalization_tests.json
it must "sha256 unicode strings correctly" in {
val singletons = Vector("\u00c5", "\u212b", "\u0041\u030a")
assert(
singletons
.map(CryptoUtil.sha256)
.forall(_ == Sha256Digest(
"0a94dc9d420d1142d6b71de60f9bf7e2f345a4d62c9f141b091539769ddf3075"))
)
val canonicalComposites = Vector("\u00f4", "\u006f\u0302")
assert(
canonicalComposites
.map(CryptoUtil.sha256)
.forall(_ == Sha256Digest(
"cc912dbca598fd80ca7f5d98ece5d846b447f4a9ae3f73c352e2687eb293eef5"))
)
val multipleCombiningMarks = Vector("\u1e69", "\u0073\u0323\u0307")
assert(
multipleCombiningMarks
.map(CryptoUtil.sha256)
.forall(_ == Sha256Digest(
"ceca1ea456e95ee498463622915209bb08a018e8ee9741b46b64ef1a08fb56ab"))
)
val compatibilityComposite = "\ufb01"
assert(
CryptoUtil.sha256(compatibilityComposite) == Sha256Digest(
"b6554cce8a93f1c8818280e2a768116a79216ad5501a85357d233409db87d340"))
val nonComposite = "fi"
assert(
CryptoUtil.sha256(nonComposite) == Sha256Digest(
"b4bdc848109722a383d0a972c6eb859f2abd29565b8c4cc7199e7c9eb708f1b7"))
val accentString = "éléphant"
assert(
CryptoUtil.sha256(accentString) == Sha256Digest(
"c941ae685f62cbe7bb47d0791af7154788fd9e873e5c57fd2449d1454ed5b16f"))
}
it must "encode strings correctly when hashing" in {
forAll(Gen.alphaStr) { str =>
val serialized = CryptoUtil.serializeForHash(str)
val strHashFuncs: Vector[String => HashDigest] =
Vector(
CryptoUtil.sha256Hash160,
CryptoUtil.sha256,
CryptoUtil.taggedSha256(_, "test"),
CryptoUtil.sha1,
CryptoUtil.ripeMd160
)
val byteHashFuncs: Vector[ByteVector => HashDigest] =
Vector(
CryptoUtil.sha256Hash160,
CryptoUtil.sha256,
CryptoUtil.taggedSha256(_, "test"),
CryptoUtil.sha1,
CryptoUtil.ripeMd160
)
val hashFuncs = strHashFuncs.zip(byteHashFuncs)
assert(hashFuncs.forall {
case (strHash, byteHash) =>
strHash(str) == byteHash(serialized)
})
}
}
}

View File

@ -14,18 +14,34 @@ import scodec.bits.{BitVector, ByteVector}
*/
trait CryptoUtil {
def normalize(str: String): String = {
java.text.Normalizer.normalize(str, java.text.Normalizer.Form.NFC)
}
def serializeForHash(str: String): ByteVector = {
ByteVector(normalize(str).getBytes("UTF-8"))
}
/** Does the following computation: RIPEMD160(SHA256(hex)). */
def sha256Hash160(bytes: ByteVector): Sha256Hash160Digest = {
val hash = ripeMd160(sha256(bytes).bytes).bytes
Sha256Hash160Digest(hash)
}
def sha256Hash160(str: String): Sha256Hash160Digest = {
sha256Hash160(serializeForHash(str))
}
/** Performs sha256(sha256(bytes)). */
def doubleSHA256(bytes: ByteVector): DoubleSha256Digest = {
val hash: ByteVector = sha256(sha256(bytes).bytes).bytes
DoubleSha256Digest(hash)
}
def doubleSHA256(str: String): DoubleSha256Digest = {
doubleSHA256(serializeForHash(str))
}
/** Takes sha256(bytes). */
def sha256(bytes: ByteVector): Sha256Digest = {
val hash = MessageDigest.getInstance("SHA-256").digest(bytes.toArray)
@ -37,12 +53,20 @@ trait CryptoUtil {
sha256(bits.toByteVector)
}
def sha256(str: String): Sha256Digest = {
sha256(serializeForHash(str))
}
def taggedSha256(bytes: ByteVector, tag: String): Sha256Digest = {
val tagHash = sha256(ByteVector(tag.getBytes()))
val tagHash = sha256(tag)
val tagBytes = tagHash.bytes ++ tagHash.bytes
sha256(tagBytes ++ bytes)
}
def taggedSha256(str: String, tag: String): Sha256Digest = {
taggedSha256(serializeForHash(str), tag)
}
// The tag "BIP0340/challenge"
private val schnorrChallengeTagBytes = {
ByteVector
@ -85,6 +109,10 @@ trait CryptoUtil {
Sha1Digest(ByteVector(hash))
}
def sha1(str: String): Sha1Digest = {
sha1(serializeForHash(str))
}
/** Performs RIPEMD160(bytes). */
def ripeMd160(bytes: ByteVector): RipeMd160Digest = {
//from this tutorial http://rosettacode.org/wiki/RIPEMD-160#Scala
@ -96,6 +124,10 @@ trait CryptoUtil {
RipeMd160Digest(ByteVector(out))
}
def ripeMd160(str: String): RipeMd160Digest = {
ripeMd160(serializeForHash(str))
}
/**
* Calculates `HMAC-SHA512(key, data)`
*/