Rich Text metafield JSON input via GraphQL

Hi All,

I was looking to use GraphQL to update a rich text metafield, however found that the metafield required JSON but not in a format converted directly from HTML.

I looked for a solution but failed to find one, I implemented my own solution in PHP and thought I’d share here in case anyone else needs it or could improve my code.

An example of the JSON required by Shopify:

{
  "type": "root",
  "children": [
    {
      "type": "heading",
      "children": [
        {
          "type": "text",
          "value": "Heading"
        }
      ],
      "level": 1
    },
    {
      "type": "paragraph",
      "children": [
        {
          "type": "text",
          "value": "bold ",
          "bold": true
        },
        {
          "type": "text",
          "value": "normal text "
        },
        {
          "type": "text",
          "value": "italics",
          "italic": true
        }
      ]
    },
    {
      "type": "paragraph",
      "children": [
        {
          "type": "text",
          "value": ""
        },
        {
          "url": "link url",
          "title": "link title",
          "type": "link",
          "children": [
            {
              "type": "text",
              "value": "link",
              "italic": true
            }
          ]
        },
        {
          "type": "text",
          "value": ""
        }
      ]
    },
    {
      "listType": "unordered",
      "type": "list",
      "children": [
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "unordered list 1"
            }
          ]
        },
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "unordered list 2"
            }
          ]
        }
      ]
    },
    {
      "listType": "ordered",
      "type": "list",
      "children": [
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "list item 1"
            }
          ]
        },
        {
          "type": "list-item",
          "children": [
            {
              "type": "text",
              "value": "list item 2"
            }
          ]
        }
      ]
    }
  ]
}
2 Likes

Here is my solution where you pass the html required for your rich text to html_to_obj and then get the first child within the returned object.

function html_to_obj($html)
{
  $dom = new DOMDocument();
  $dom->loadHTML($html);
  return element_to_obj($dom->documentElement);
}

function element_to_obj($element)
{
  $loop = true;
  if ($element->tagName == "body") {
    $obj = array("type" => "root");
  } else {
    if (str_starts_with($element->tagName, "h2")) {
      $obj = array("type" => "heading", "level" => 2);
    } elseif (str_starts_with($element->tagName, "h3")) {
      $obj = array("type" => "heading", "level" => 3);
    } elseif (str_starts_with($element->tagName, "p")) {
      $obj = array("type" => "paragraph");
    } elseif (str_starts_with($element->tagName, "a")) {
      $obj = array("type" => "link");
    } elseif (str_starts_with($element->tagName, "ol")) {
      $obj = array("type" => "list", "listType" => "ordered");
    } elseif (str_starts_with($element->tagName, "ul")) {
      $obj = array("type" => "list", "listType" => "unordered");
    } elseif (str_starts_with($element->tagName, "li")) {
      $obj = array("type" => "list-item");
    } elseif (str_starts_with($element->tagName, "strong")) {
      $obj = array("type" => "text", "bold" => true, "value" => $element->childNodes[0]->wholeText);
      $loop = false;
    } elseif (str_starts_with($element->tagName, "em")) {
      $obj = array("type" => "text", "italic" => true, "value" => $element->childNodes[0]->wholeText);
      $loop = false;
    }
  }
  if ($loop == true) {
    foreach ($element->attributes as $attribute) {
      if ($attribute->name == "href") {
        $obj["url"] = $attribute->value;
      } else {
        $obj[$attribute->name] = $attribute->value;
      }
    }
    foreach ($element->childNodes as $subElement) {
      echo $subElement->nodeType;
      if ($subElement->nodeType == XML_TEXT_NODE) {
        $obj["children"][] =  array("type" => "text", "value" => $subElement->wholeText);
      } elseif ($subElement->nodeType == XML_TEXT_NODE) {
        $obj["type"] = "paragraph";
        $obj["children"][] = element_to_obj($subElement);
      } else {
        $obj["children"][] = element_to_obj($subElement);
      }
    }
  }

  return $obj;
}

$json = html_to_obj('## Heading### subheadfing

**Bold** normal text *italics*

[link](https://www.linkurl.com)

- unordered list 1
- unordered list 2
1. list item 1
1. list item 2
');
echo json_encode($json["children"][0]);
3 Likes

Great work. It inspired me to replicate it in nodejs:

npm install jsdom
import { JSDOM } from "jsdom";

function elementToRichtextObject(element) {
  let loop = true;
  let obj = {};

  if (element.tagName === "BODY") {
    obj = { type: "root" };
  } else {
    if (element.tagName.startsWith("H") && element.tagName.length === 2) {
      const level = parseInt(element.tagName.charAt(1));
      if (level >= 1 && level <= 6) {
        obj = { type: "heading", level: level };
      }
    } else if (element.tagName.startsWith("P")) {
      obj = { type: "paragraph" };
    } else if (element.tagName.startsWith("A")) {
      obj = { type: "link" };
    } else if (element.tagName.startsWith("OL")) {
      obj = { type: "list", listType: "ordered" };
    } else if (element.tagName.startsWith("UL")) {
      obj = { type: "list", listType: "unordered" };
    } else if (element.tagName.startsWith("LI")) {
      obj = { type: "list-item" };
    } else if (element.tagName.startsWith("STRONG") || element.tagName.startsWith("B")) {
      obj = {
        type: "text",
        bold: true,
        value: element.childNodes[0]?.textContent || "",
      };
      loop = false;
    } else if (element.tagName.startsWith("EM")) {
      obj = {
        type: "text",
        italic: true,
        value: element.childNodes[0]?.textContent || "",
      };
      loop = false;
    } else {
      // Default to paragraph for unknown elements
      obj = { type: "paragraph" };
    }
  }

  if (loop === true) {
    // Process attributes
    for (let i = 0; i < element.attributes.length; i++) {
      const attribute = element.attributes[i];
      if (attribute.name === "href") {
        obj.url = attribute.value;
      }
    }

    obj.children = [];

    // Process child nodes
    element.childNodes.forEach((subElement) => {
      if (subElement.nodeType === 3) {
        // TEXT_NODE
        obj.children.push({
          type: "text",
          value: subElement.textContent,
        });
      } else if (subElement.nodeType === 1) {
        // ELEMENT_NODE
        // Skip self-closing and non-content elements
        const skipTags = ["BR", "HR", "IMG", "INPUT", "META", "LINK"];
        if (skipTags.includes(subElement.tagName)) {
          return;
        }
        obj.children.push(elementToRichtextObject(subElement));
      }
    });
  }

  return obj;
}

export function htmlToRichtextObject(html) {
  // Replace <br> tags with newline characters
  html = html.replace(/<br\s*\/?>/gi, "\n");
  const dom = new JSDOM(html);
  const result = elementToRichtextObject(dom.window.document.body);
  return result;
}

import { htmlToRichtextObject } from "#root/utils/html-to-richtext-object.js";

const content = '<p>Your HTML content</p>';
const fields = [
  {
    key: "content",
    value: JSON.stringify(htmlToRichtextObject(content)),
  },
  // other fields
];