Rich Text metafield JSON input via GraphQL

Topic summary

A developer encountered the challenge of updating Shopify rich text metafields via GraphQL, which requires a specific JSON structure rather than direct HTML conversion.

Problem Context:
Shopify’s rich text metafields need JSON in a nested tree format with nodes like “root”, “heading”, “paragraph”, “text”, “link”, and “list” types, each with specific properties (e.g., bold/italic flags, heading levels, list types).

Solutions Shared:

  • PHP Implementation: The original poster provided a complete PHP solution using DOMDocument to parse HTML and recursively convert elements to the required JSON structure. The code handles headings (h2-h3), paragraphs, links, ordered/unordered lists, and text formatting (bold/italic).

  • Node.js Port: Another contributor adapted the solution for Node.js using the jsdom library, expanding support to handle h1-h6 headings, both <strong> and <b> tags for bold text, <em> for italics, and special handling for self-closing tags like <br>.

Status: The discussion provides working code examples in both languages. No unresolved issues remain, though the solutions could potentially be extended to support additional HTML elements or edge cases.

Summarized with AI on October 24. AI used: claude-sonnet-4-5-20250929.

Hi All,

I was looking to use GraphQL to update a rich text metafield, however found that the metafield required JSON but not in a format converted directly from HTML.

I looked for a solution but failed to find one, I implemented my own solution in PHP and thought I’d share here in case anyone else needs it or could improve my code.

An example of the JSON required by Shopify:

{
  "type": "root",
  "children": [
    {
      "type": "heading",
      "children": [
        {
          "type": "text",
          "value": "Heading"
        }
      ],
      "level": 1
    },
    {
      "type": "paragraph",
      "children": [
        {
          "type": "text",
          "value": "bold ",
          "bold": true
        },
        {
          "type": "text",
          "value": "normal text "
        },
        {
          "type": "text",
          "value": "italics",
          "italic": true
        }
      ]
    },
    {
      "type": "paragraph",
      "children": [
        {
          "type": "text",
          "value": ""
        },
        {
          "url": "link url",
          "title": "link title",
          "type": "link",
          "children": [
            {
              "type": "text",
              "value": "link",
              "italic": true
            }
          ]
        },
        {
          "type": "text",
          "value": ""
        }
      ]
    },
    {
      "listType": "unordered",
      "type": "list",
      "children": [
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "unordered list 1"
            }
          ]
        },
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "unordered list 2"
            }
          ]
        }
      ]
    },
    {
      "listType": "ordered",
      "type": "list",
      "children": [
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "list item 1"
            }
          ]
        },
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "list item 2"
            }
          ]
        }
      ]
    }
  ]
}
2 Likes

Here is my solution where you pass the html required for your rich text to html_to_obj and then get the first child within the returned object.

function html_to_obj($html)
{
  $dom = new DOMDocument();
  $dom->loadHTML($html);
  return element_to_obj($dom->documentElement);
}

function element_to_obj($element)
{
  $loop = true;
  if ($element->tagName == "body") {
    $obj = array("type" => "root");
  } else {
    if (str_starts_with($element->tagName, "h2")) {
      $obj = array("type" => "heading", "level" => 2);
    } elseif (str_starts_with($element->tagName, "h3")) {
      $obj = array("type" => "heading", "level" => 3);
    } elseif (str_starts_with($element->tagName, "p")) {
      $obj = array("type" => "paragraph");
    } elseif (str_starts_with($element->tagName, "a")) {
      $obj = array("type" => "link");
    } elseif (str_starts_with($element->tagName, "ol")) {
      $obj = array("type" => "list", "listType" => "ordered");
    } elseif (str_starts_with($element->tagName, "ul")) {
      $obj = array("type" => "list", "listType" => "unordered");
    } elseif (str_starts_with($element->tagName, "li")) {
      $obj = array("type" => "list-item");
    } elseif (str_starts_with($element->tagName, "strong")) {
      $obj = array("type" => "text", "bold" => true, "value" => $element->childNodes[0]->wholeText);
      $loop = false;
    } elseif (str_starts_with($element->tagName, "em")) {
      $obj = array("type" => "text", "italic" => true, "value" => $element->childNodes[0]->wholeText);
      $loop = false;
    }
  }
  if ($loop == true) {
    foreach ($element->attributes as $attribute) {
      if ($attribute->name == "href") {
        $obj["url"] = $attribute->value;
      } else {
        $obj[$attribute->name] = $attribute->value;
      }
    }
    foreach ($element->childNodes as $subElement) {
      echo $subElement->nodeType;
      if ($subElement->nodeType == XML_TEXT_NODE) {
        $obj["children"][] =  array("type" => "text", "value" => $subElement->wholeText);
      } elseif ($subElement->nodeType == XML_TEXT_NODE) {
        $obj["type"] = "paragraph";
        $obj["children"][] = element_to_obj($subElement);
      } else {
        $obj["children"][] = element_to_obj($subElement);
      }
    }
  }

  return $obj;
}

$json = html_to_obj('## Heading### subheadfing

**Bold** normal text *italics*

[link](https://www.linkurl.com)

- unordered list 1
- unordered list 2
1. list item 1
1. list item 2
');
echo json_encode($json["children"][0]);
3 Likes

Great work. It inspired me to replicate it in nodejs:

npm install jsdom
import { JSDOM } from "jsdom";

function elementToRichtextObject(element) {
  let loop = true;
  let obj = {};

  if (element.tagName === "BODY") {
    obj = { type: "root" };
  } else {
    if (element.tagName.startsWith("H") && element.tagName.length === 2) {
      const level = parseInt(element.tagName.charAt(1));
      if (level >= 1 && level <= 6) {
        obj = { type: "heading", level: level };
      }
    } else if (element.tagName.startsWith("P")) {
      obj = { type: "paragraph" };
    } else if (element.tagName.startsWith("A")) {
      obj = { type: "link" };
    } else if (element.tagName.startsWith("OL")) {
      obj = { type: "list", listType: "ordered" };
    } else if (element.tagName.startsWith("UL")) {
      obj = { type: "list", listType: "unordered" };
    } else if (element.tagName.startsWith("LI")) {
      obj = { type: "list-item" };
    } else if (element.tagName.startsWith("STRONG") || element.tagName.startsWith("B")) {
      obj = {
        type: "text",
        bold: true,
        value: element.childNodes[0]?.textContent || "",
      };
      loop = false;
    } else if (element.tagName.startsWith("EM")) {
      obj = {
        type: "text",
        italic: true,
        value: element.childNodes[0]?.textContent || "",
      };
      loop = false;
    } else {
      // Default to paragraph for unknown elements
      obj = { type: "paragraph" };
    }
  }

  if (loop === true) {
    // Process attributes
    for (let i = 0; i < element.attributes.length; i++) {
      const attribute = element.attributes[i];
      if (attribute.name === "href") {
        obj.url = attribute.value;
      }
    }

    obj.children = [];

    // Process child nodes
    element.childNodes.forEach((subElement) => {
      if (subElement.nodeType === 3) {
        // TEXT_NODE
        obj.children.push({
          type: "text",
          value: subElement.textContent,
        });
      } else if (subElement.nodeType === 1) {
        // ELEMENT_NODE
        // Skip self-closing and non-content elements
        const skipTags = ["BR", "HR", "IMG", "INPUT", "META", "LINK"];
        if (skipTags.includes(subElement.tagName)) {
          return;
        }
        obj.children.push(elementToRichtextObject(subElement));
      }
    });
  }

  return obj;
}

export function htmlToRichtextObject(html) {
  // Replace <br> tags with newline characters
  html = html.replace(/<br\s*\/?>/gi, "\n");
  const dom = new JSDOM(html);
  const result = elementToRichtextObject(dom.window.document.body);
  return result;
}

import { htmlToRichtextObject } from "#root/utils/html-to-richtext-object.js";

const content = '<p>Your HTML content</p>';
const fields = [
  {
    key: "content",
    value: JSON.stringify(htmlToRichtextObject(content)),
  },
  // other fields
];