Here is my solution where you pass the html required for your rich text to html_to_obj and then get the first child within the returned object.
function html_to_obj($html)
{
$dom = new DOMDocument();
$dom->loadHTML($html);
return element_to_obj($dom->documentElement);
}
function element_to_obj($element)
{
$loop = true;
if ($element->tagName == "body") {
$obj = array("type" => "root");
} else {
if (str_starts_with($element->tagName, "h2")) {
$obj = array("type" => "heading", "level" => 2);
} elseif (str_starts_with($element->tagName, "h3")) {
$obj = array("type" => "heading", "level" => 3);
} elseif (str_starts_with($element->tagName, "p")) {
$obj = array("type" => "paragraph");
} elseif (str_starts_with($element->tagName, "a")) {
$obj = array("type" => "link");
} elseif (str_starts_with($element->tagName, "ol")) {
$obj = array("type" => "list", "listType" => "ordered");
} elseif (str_starts_with($element->tagName, "ul")) {
$obj = array("type" => "list", "listType" => "unordered");
} elseif (str_starts_with($element->tagName, "li")) {
$obj = array("type" => "list-item");
} elseif (str_starts_with($element->tagName, "strong")) {
$obj = array("type" => "text", "bold" => true, "value" => $element->childNodes[0]->wholeText);
$loop = false;
} elseif (str_starts_with($element->tagName, "em")) {
$obj = array("type" => "text", "italic" => true, "value" => $element->childNodes[0]->wholeText);
$loop = false;
}
}
if ($loop == true) {
foreach ($element->attributes as $attribute) {
if ($attribute->name == "href") {
$obj["url"] = $attribute->value;
} else {
$obj[$attribute->name] = $attribute->value;
}
}
foreach ($element->childNodes as $subElement) {
echo $subElement->nodeType;
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["children"][] = array("type" => "text", "value" => $subElement->wholeText);
} elseif ($subElement->nodeType == XML_TEXT_NODE) {
$obj["type"] = "paragraph";
$obj["children"][] = element_to_obj($subElement);
} else {
$obj["children"][] = element_to_obj($subElement);
}
}
}
return $obj;
}
$json = html_to_obj('## Heading### subheadfing
**Bold** normal text *italics*
[link](https://www.linkurl.com)
- unordered list 1
- unordered list 2
1. list item 1
1. list item 2
');
echo json_encode($json["children"][0]);