Add inscriptions parsing code

2025-02-24 14:50:52 +01:00 · 2024-10-07 20:01:55 +09:00 · 2024-10-07 20:01:55 +09:00 · d31c2665ee
commit d31c2665ee
parent a700bd0ef1
1 changed files with 401 additions and 0 deletions
--- a/frontend/src/app/shared/ord/inscription.utils.ts
+++ b/frontend/src/app/shared/ord/inscription.utils.ts
@ -0,0 +1,401 @@
+// Adapted from https://github.com/ordpool-space/ordpool-parser/tree/ce04d7a5b6bb1cf37b9fdadd77ba430f5bd6e7d6/src
+// Utils functions to decode ord inscriptions
+
+import { Inscription } from "../../components/ord-data/ord-data.component";
+
+export const OP_FALSE = 0x00;
+export const OP_IF = 0x63;
+export const OP_0 = 0x00;
+
+export const OP_PUSHBYTES_3 = 0x03; //  3 -- not an actual opcode, but used in documentation --> pushes the next 3 bytes onto the stack.
+export const OP_PUSHDATA1 = 0x4c;   // 76 -- The next byte contains the number of bytes to be pushed onto the stack.
+export const OP_PUSHDATA2 = 0x4d;   // 77 -- The next two bytes contain the number of bytes to be pushed onto the stack in little endian order.
+export const OP_PUSHDATA4 = 0x4e;   // 78 -- The next four bytes contain the number of bytes to be pushed onto the stack in little endian order.
+export const OP_ENDIF = 0x68;       // 104 -- Ends an if/else block.
+
+export const OP_1NEGATE = 0x4f;            // 79 -- The number -1 is pushed onto the stack.
+export const OP_RESERVED = 0x50;           // 80 -- Transaction is invalid unless occuring in an unexecuted OP_IF branch
+export const OP_PUSHNUM_1 = 0x51;          // 81 -- also known as OP_1
+export const OP_PUSHNUM_2 = 0x52;          // 82 -- also known as OP_2
+export const OP_PUSHNUM_3 = 0x53;          // 83 -- also known as OP_3
+export const OP_PUSHNUM_4 = 0x54;          // 84 -- also known as OP_4
+export const OP_PUSHNUM_5 = 0x55;          // 85 -- also known as OP_5
+export const OP_PUSHNUM_6 = 0x56;          // 86 -- also known as OP_6
+export const OP_PUSHNUM_7 = 0x57;          // 87 -- also known as OP_7
+export const OP_PUSHNUM_8 = 0x58;          // 88 -- also known as OP_8
+export const OP_PUSHNUM_9 = 0x59;          // 89 -- also known as OP_9
+export const OP_PUSHNUM_10 = 0x5a;         // 90 -- also known as OP_10
+export const OP_PUSHNUM_11 = 0x5b;         // 91 -- also known as OP_11
+export const OP_PUSHNUM_12 = 0x5c;         // 92 -- also known as OP_12
+export const OP_PUSHNUM_13 = 0x5d;         // 93 -- also known as OP_13
+export const OP_PUSHNUM_14 = 0x5e;         // 94 -- also known as OP_14
+export const OP_PUSHNUM_15 = 0x5f;         // 95 -- also known as OP_15
+export const OP_PUSHNUM_16 = 0x60;         // 96 -- also known as OP_16
+
+export const OP_RETURN = 0x6a;             // 106 -- a standard way of attaching extra data to transactions is to add a zero-value output with a scriptPubKey consisting of OP_RETURN followed by data
+
+//////////////////////////// Helper ///////////////////////////////
+
+/**
+ * Inscriptions may include fields before an optional body. Each field consists of two data pushes, a tag and a value.
+ * Currently, there are six defined fields:
+ */
+export const knownFields = {
+  // content_type, with a tag of 1, whose value is the MIME type of the body.
+  content_type: 0x01,
+
+  // pointer, with a tag of 2, see pointer docs: https://docs.ordinals.com/inscriptions/pointer.html
+  pointer: 0x02,
+
+  // parent, with a tag of 3, see provenance docs: https://docs.ordinals.com/inscriptions/provenance.html
+  parent: 0x03,
+
+  // metadata, with a tag of 5, see metadata docs: https://docs.ordinals.com/inscriptions/metadata.html
+  metadata: 0x05,
+
+  // metaprotocol, with a tag of 7, whose value is the metaprotocol identifier.
+  metaprotocol: 0x07,
+
+  // content_encoding, with a tag of 9, whose value is the encoding of the body.
+  content_encoding: 0x09,
+
+  // delegate, with a tag of 11, see delegate docs: https://docs.ordinals.com/inscriptions/delegate.html
+  delegate: 0xb
+}
+
+/**
+ * Retrieves the value for a given field from an array of field objects.
+ * It returns the value of the first object where the tag matches the specified field.
+ *
+ * @param fields - An array of objects containing tag and value properties.
+ * @param field - The field number to search for.
+ * @returns The value associated with the first matching field, or undefined if no match is found.
+ */
+export function getKnownFieldValue(fields: { tag: number; value: Uint8Array }[], field: number): Uint8Array | undefined {
+  const knownField = fields.find(x =>
+    x.tag === field);
+
+  if (knownField === undefined) {
+    return undefined;
+  }
+
+  return knownField.value;
+}
+
+/**
+ * Retrieves the values for a given field from an array of field objects.
+ * It returns the values of all objects where the tag matches the specified field.
+ *
+ * @param fields - An array of objects containing tag and value properties.
+ * @param field - The field number to search for.
+ * @returns An array of Uint8Array values associated with the matching fields. If no matches are found, an empty array is returned.
+ */
+export function getKnownFieldValues(fields: { tag: number; value: Uint8Array }[], field: number): Uint8Array[] {
+  const knownFields = fields.filter(x =>
+    x.tag === field
+  );
+
+  return knownFields.map(field => field.value);
+}
+
+/**
+ * Searches for the next position of the ordinal inscription mark (0063036f7264)
+ * within the raw transaction data, starting from a given position.
+ *
+ * This function looks for a specific sequence of 6 bytes that represents the start of an ordinal inscription.
+ * If the sequence is found, the function returns the index immediately following the inscription mark.
+ * If the sequence is not found, the function returns -1, indicating no inscription mark was found.
+ *
+ * Note: This function uses a simple hardcoded approach based on the fixed length of the inscription mark.
+ *
+ * @returns The position immediately after the inscription mark, or -1 if not found.
+ */
+export function getNextInscriptionMark(raw: Uint8Array, startPosition: number): number {
+
+  // OP_FALSE
+  // OP_IF
+  // OP_PUSHBYTES_3: This pushes the next 3 bytes onto the stack.
+  // 0x6f, 0x72, 0x64: These bytes translate to the ASCII string "ord"
+  const inscriptionMark = new Uint8Array([OP_FALSE, OP_IF, OP_PUSHBYTES_3, 0x6f, 0x72, 0x64]);
+
+  for (let index = startPosition; index <= raw.length - 6; index++) {
+    if (raw[index] === inscriptionMark[0] &&
+      raw[index + 1] === inscriptionMark[1] &&
+      raw[index + 2] === inscriptionMark[2] &&
+      raw[index + 3] === inscriptionMark[3] &&
+      raw[index + 4] === inscriptionMark[4] &&
+      raw[index + 5] === inscriptionMark[5]) {
+      return index + 6;
+    }
+  }
+
+  return -1;
+}
+
+/////////////////////////////// Reader ///////////////////////////////
+
+/**
+ * Reads a specified number of bytes from a Uint8Array starting from a given pointer.
+ *
+ * @param raw - The Uint8Array from which bytes are to be read.
+ * @param pointer - The position in the array from where to start reading.
+ * @param n - The number of bytes to read.
+ * @returns A tuple containing the read bytes as Uint8Array and the updated pointer position.
+ */
+export function readBytes(raw: Uint8Array, pointer: number, n: number): [Uint8Array, number] {
+  const slice = raw.slice(pointer, pointer + n);
+  return [slice, pointer + n];
+}
+
+/**
+ * Reads data based on the Bitcoin script push opcode starting from a specified pointer in the raw data.
+ * Handles different opcodes and direct push (where the opcode itself signifies the number of bytes to push).
+ *
+ * @param raw - The raw transaction data as a Uint8Array.
+ * @param pointer - The current position in the raw data array.
+ * @returns A tuple containing the read data as Uint8Array and the updated pointer position.
+ */
+export function readPushdata(raw: Uint8Array, pointer: number): [Uint8Array, number] {
+
+  let [opcodeSlice, newPointer] = readBytes(raw, pointer, 1);
+  const opcode = opcodeSlice[0];
+
+  // Handle the special case of OP_0 (0x00) which pushes an empty array (interpreted as zero)
+  // fixes #18
+  if (opcode === OP_0) {
+    return [new Uint8Array(), newPointer];
+  }
+
+  // Handle the special case of OP_1NEGATE (-1)
+  if (opcode === OP_1NEGATE) {
+    // OP_1NEGATE pushes the value -1 onto the stack, represented as 0x81 in Bitcoin Script
+    return [new Uint8Array([0x81]), newPointer];
+  }
+
+  // Handle minimal push numbers OP_PUSHNUM_1 (0x51) to OP_PUSHNUM_16 (0x60)
+  // which are used to push the values 0x01 (decimal 1) through 0x10 (decimal 16) onto the stack.
+  // To get the value, we can subtract OP_RESERVED (0x50) from the opcode to get the value to be pushed.
+  if (opcode >= OP_PUSHNUM_1 && opcode <= OP_PUSHNUM_16) {
+    // Convert opcode to corresponding byte value
+    const byteValue = opcode - OP_RESERVED;
+    return [Uint8Array.from([byteValue]), newPointer];
+  }
+
+  // Handle direct push of 1 to 75 bytes (OP_PUSHBYTES_1 to OP_PUSHBYTES_75)
+  if (1 <= opcode && opcode <= 75) {
+    return readBytes(raw, newPointer, opcode);
+  }
+
+  let numBytes: number;
+  switch (opcode) {
+    case OP_PUSHDATA1: numBytes = 1; break;
+    case OP_PUSHDATA2: numBytes = 2; break;
+    case OP_PUSHDATA4: numBytes = 4; break;
+    default:
+      throw new Error(`Invalid push opcode ${opcode} at position ${pointer}`);
+  }
+
+  let [dataSizeArray, nextPointer] = readBytes(raw, newPointer, numBytes);
+  let dataSize = littleEndianBytesToNumber(dataSizeArray);
+  return readBytes(raw, nextPointer, dataSize);
+}
+
+//////////////////////////// Conversion ////////////////////////////
+
+/**
+ * Converts a Uint8Array containing UTF-8 encoded data to a normal a UTF-16 encoded string.
+ *
+ * @param bytes - The Uint8Array containing UTF-8 encoded data.
+ * @returns The corresponding UTF-16 encoded JavaScript string.
+ */
+export function bytesToUnicodeString(bytes: Uint8Array): string {
+  const decoder = new TextDecoder('utf-8');
+  return decoder.decode(bytes);
+}
+
+/**
+ * Convert a Uint8Array to a string by treating each byte as a character code.
+ * It avoids interpreting bytes as UTF-8 encoded sequences.
+ * --> Again: it ignores UTF-8 encoding, which is necessary for binary content!
+ *
+ * Note: This method is different from just using `String.fromCharCode(...combinedData)` which can
+ * cause a "Maximum call stack size exceeded" error for large arrays due to the limitation of
+ * the spread operator in JavaScript. (previously the parser broke here, because of large content)
+ *
+ * @param bytes - The byte array to convert.
+ * @returns The resulting string where each byte value is treated as a direct character code.
+ */
+export function bytesToBinaryString(bytes: Uint8Array): string {
+  let resultStr = '';
+  for (let i = 0; i < bytes.length; i++) {
+    resultStr += String.fromCharCode(bytes[i]);
+  }
+  return resultStr;
+}
+
+/**
+ * Converts a hexadecimal string to a Uint8Array.
+ *
+ * @param hex - A string of hexadecimal characters.
+ * @returns A Uint8Array representing the hex string.
+ */
+export function hexToBytes(hex: string): Uint8Array {
+  const bytes = new Uint8Array(hex.length / 2);
+  for (let i = 0, j = 0; i < hex.length; i += 2, j++) {
+    bytes[j] = parseInt(hex.slice(i, i + 2), 16);
+  }
+  return bytes;
+}
+
+/**
+ * Converts a Uint8Array to a hexadecimal string.
+ *
+ * @param bytes - A Uint8Array to convert.
+ * @returns A string of hexadecimal characters representing the byte array.
+ */
+export function bytesToHex(bytes: Uint8Array): string {
+  if (!bytes) {
+    return null;
+  }
+  return Array.from(bytes, byte => byte.toString(16).padStart(2, '0')).join('');
+}
+
+/**
+ * Converts a little-endian byte array to a JavaScript number.
+ *
+ * This function interprets the provided bytes in little-endian format, where the least significant byte comes first.
+ * It constructs an integer value representing the number encoded by the bytes.
+ *
+ * @param byteArray - An array containing the bytes in little-endian format.
+ * @returns The number represented by the byte array.
+ */
+export function littleEndianBytesToNumber(byteArray: Uint8Array): number {
+  let number = 0;
+  for (let i = 0; i < byteArray.length; i++) {
+    // Extract each byte from byteArray, shift it to the left by 8 * i bits, and combine it with number.
+    // The shifting accounts for the little-endian format where the least significant byte comes first.
+    number |= byteArray[i] << (8 * i);
+  }
+  return number;
+}
+
+/**
+ * Concatenates multiple Uint8Array objects into a single Uint8Array.
+ *
+ * @param arrays - An array of Uint8Array objects to concatenate.
+ * @returns A new Uint8Array containing the concatenated results of the input arrays.
+ */
+export function concatUint8Arrays(arrays: Uint8Array[]): Uint8Array {
+  if (arrays.length === 0) {
+      return new Uint8Array();
+  }
+
+  const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
+  const result = new Uint8Array(totalLength);
+  let offset = 0;
+
+  for (const array of arrays) {
+      result.set(array, offset);
+      offset += array.length;
+  }
+
+  return result;
+}
+
+////////////////////////////// Inscription ///////////////////////////
+
+/**
+ * Extracts fields from the raw data until OP_0 is encountered.
+ *
+ * @param raw - The raw data to read.
+ * @param pointer - The current pointer where the reading starts.
+ * @returns An array of fields and the updated pointer position.
+ */
+export function extractFields(raw: Uint8Array, pointer: number): [{ tag: number; value: Uint8Array }[], number] {
+
+  const fields: { tag: number; value: Uint8Array }[] = [];
+  let newPointer = pointer;
+  let slice: Uint8Array;
+
+  while (newPointer < raw.length &&
+    // normal inscription - content follows now
+    (raw[newPointer] !== OP_0) &&
+    // delegate - inscription has no further content and ends directly here
+    (raw[newPointer] !== OP_ENDIF)
+  ) {
+
+    // tags are encoded by ord as single-byte data pushes, but are accepted by ord as either single-byte pushes, or as OP_NUM data pushes.
+    // tags greater than or equal to 256 should be encoded as little endian integers with trailing zeros omitted.
+    // see: https://github.com/ordinals/ord/issues/2505
+    [slice, newPointer] = readPushdata(raw, newPointer);
+    const tag = slice.length === 1 ? slice[0] : littleEndianBytesToNumber(slice);
+
+    [slice, newPointer] = readPushdata(raw, newPointer);
+    const value = slice;
+
+    fields.push({ tag, value });
+  }
+
+  return [fields, newPointer];
+}
+
+
+/**
+ * Extracts inscription data starting from the current pointer.
+ * @param raw - The raw data to read.
+ * @param pointer - The current pointer where the reading starts.
+ * @returns The parsed inscription or nullx
+ */
+export function extractInscriptionData(raw: Uint8Array, pointer: number): Inscription | null {
+
+  try {
+
+    let fields: { tag: number; value: Uint8Array }[];
+    let newPointer: number;
+    let slice: Uint8Array;
+
+    [fields, newPointer] = extractFields(raw, pointer);
+
+    // Now we are at the beginning of the body
+    // (or at the end of the raw data if there's no body)
+    if (newPointer < raw.length && raw[newPointer] === OP_0) {
+      newPointer++; // Skip OP_0
+    }
+
+    // Collect body data until OP_ENDIF
+    const data: Uint8Array[] = [];
+    while (newPointer < raw.length && raw[newPointer] !== OP_ENDIF) {
+      [slice, newPointer] = readPushdata(raw, newPointer);
+      data.push(slice);
+    }
+
+    const combinedLengthOfAllArrays = data.reduce((acc, curr) => acc + curr.length, 0);
+    let combinedData = new Uint8Array(combinedLengthOfAllArrays);
+
+    // Copy all segments from data into combinedData, forming a single contiguous Uint8Array
+    let idx = 0;
+    for (const segment of data) {
+      combinedData.set(segment, idx);
+      idx += segment.length;
+    }
+
+    const contentTypeRaw = getKnownFieldValue(fields, knownFields.content_type);
+    let contentType: string;
+
+    if (!contentTypeRaw) {
+      contentType = 'undefined';
+    } else {
+      contentType = bytesToUnicodeString(contentTypeRaw);
+    }
+
+    return {
+      content_type_str: contentType,
+      body: combinedData.slice(0, 150), // Limit body to 150 bytes for now
+      body_length: combinedData.length,
+      delegate_txid: getKnownFieldValue(fields, knownFields.delegate) ? bytesToHex(getKnownFieldValue(fields, knownFields.delegate).reverse()) : null
+    };
+
+  } catch (ex) {
+    return null;
+  }
+}