txscript: Optimize script disasm.

This converts the DisasmString function to make use of the new
zero-allocation script tokenizer instead of the far less efficient
parseScript thereby significantly optimizing the function.

In order to facilitate this, the opcode disassembly functionality is
split into a separate function called disasmOpcode that accepts the
opcode struct and data independently as opposed to requiring a parsed
opcode.  The new function also accepts a pointer to a string builder so
the disassembly can be more efficiently be built.

While here, the comment is modified to explicitly call out the script
version semantics.

The following is a before and after comparison of a large script:

benchmark                   old ns/op     new ns/op     delta
BenchmarkDisasmString-8     102902        40124         -61.01%

benchmark                   old allocs     new allocs     delta
BenchmarkDisasmString-8     46             51             +10.87%

benchmark                   old bytes     new bytes     delta
BenchmarkDisasmString-8     389324        130552        -66.47%
This commit is contained in:
Dave Collins 2019-03-13 01:11:05 -05:00 committed by Olaoluwa Osuntokun
parent 099784267e
commit f980c9a28d
No known key found for this signature in database
GPG Key ID: 3BBD59E99B280306
2 changed files with 61 additions and 34 deletions

View File

@ -9,8 +9,10 @@ import (
"crypto/sha1"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"fmt"
"hash"
"strings"
"golang.org/x/crypto/ripemd160"
@ -815,45 +817,60 @@ func (pop *parsedOpcode) checkMinimalDataPush() error {
return nil
}
// print returns a human-readable string representation of the opcode for use
// in script disassembly.
func (pop *parsedOpcode) print(oneline bool) string {
// The reference implementation one-line disassembly replaces opcodes
// which represent values (e.g. OP_0 through OP_16 and OP_1NEGATE)
// with the raw value. However, when not doing a one-line dissassembly,
// we prefer to show the actual opcode names. Thus, only replace the
// opcodes in question when the oneline flag is set.
opcodeName := pop.opcode.name
if oneline {
// disasmOpcode writes a human-readable disassembly of the provided opcode and
// data into the provided buffer. The compact flag indicates the disassembly
// should print a more compact representation of data-carrying and small integer
// opcodes. For example, OP_0 through OP_16 are replaced with the numeric value
// and data pushes are printed as only the hex representation of the data as
// opposed to including the opcode that specifies the amount of data to push as
// well.
func disasmOpcode(buf *strings.Builder, op *opcode, data []byte, compact bool) {
// Replace opcode which represent values (e.g. OP_0 through OP_16 and
// OP_1NEGATE) with the raw value when performing a compact disassembly.
opcodeName := op.name
if compact {
if replName, ok := opcodeOnelineRepls[opcodeName]; ok {
opcodeName = replName
}
// Nothing more to do for non-data push opcodes.
if pop.opcode.length == 1 {
return opcodeName
// Either write the human-readable opcode or the parsed data in hex for
// data-carrying opcodes.
switch {
case op.length == 1:
buf.WriteString(opcodeName)
default:
buf.WriteString(hex.EncodeToString(data))
}
return fmt.Sprintf("%x", pop.data)
return
}
// Nothing more to do for non-data push opcodes.
if pop.opcode.length == 1 {
return opcodeName
}
buf.WriteString(opcodeName)
switch op.length {
// Only write the opcode name for non-data push opcodes.
case 1:
return
// Add length for the OP_PUSHDATA# opcodes.
retString := opcodeName
switch pop.opcode.length {
case -1:
retString += fmt.Sprintf(" 0x%02x", len(pop.data))
buf.WriteString(fmt.Sprintf(" 0x%02x", len(data)))
case -2:
retString += fmt.Sprintf(" 0x%04x", len(pop.data))
buf.WriteString(fmt.Sprintf(" 0x%04x", len(data)))
case -4:
retString += fmt.Sprintf(" 0x%08x", len(pop.data))
buf.WriteString(fmt.Sprintf(" 0x%08x", len(data)))
}
return fmt.Sprintf("%s 0x%02x", retString, pop.data)
buf.WriteString(fmt.Sprintf(" 0x%02x", data))
}
// print returns a human-readable string representation of the opcode for use
// in script disassembly.
func (pop *parsedOpcode) print(compact bool) string {
var buf strings.Builder
disasmOpcode(&buf, pop.opcode, pop.data, compact)
return buf.String()
}
// bytes returns any data associated with the opcode encoded as it would be in

View File

@ -8,6 +8,7 @@ import (
"bytes"
"encoding/binary"
"fmt"
"strings"
"time"
"github.com/btcsuite/btcd/chaincfg/chainhash"
@ -275,20 +276,29 @@ func unparseScript(pops []parsedOpcode) ([]byte, error) {
// script up to the point the failure occurred along with the string '[error]'
// appended. In addition, the reason the script failed to parse is returned
// if the caller wants more information about the failure.
func DisasmString(buf []byte) (string, error) {
var disbuf bytes.Buffer
opcodes, err := parseScript(buf)
for _, pop := range opcodes {
disbuf.WriteString(pop.print(true))
//
// NOTE: This function is only valid for version 0 scripts. Since the function
// does not accept a script version, the results are undefined for other script
// versions.
func DisasmString(script []byte) (string, error) {
const scriptVersion = 0
var disbuf strings.Builder
tokenizer := MakeScriptTokenizer(scriptVersion, script)
if tokenizer.Next() {
disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
}
for tokenizer.Next() {
disbuf.WriteByte(' ')
disasmOpcode(&disbuf, tokenizer.op, tokenizer.Data(), true)
}
if disbuf.Len() > 0 {
disbuf.Truncate(disbuf.Len() - 1)
}
if err != nil {
if tokenizer.Err() != nil {
if tokenizer.ByteIndex() != 0 {
disbuf.WriteByte(' ')
}
disbuf.WriteString("[error]")
}
return disbuf.String(), err
return disbuf.String(), tokenizer.Err()
}
// removeOpcode will remove any opcode matching ``opcode'' from the opcode