BookStack/resources/js/editor/markdown-parser.js

import schema from "./schema";
import markdownit from "markdown-it";
import {MarkdownParser, defaultMarkdownParser} from "prosemirror-markdown";
import {htmlToDoc, KeyedMultiStack} from "./util";

const tokens = defaultMarkdownParser.tokens;

// These are really a placeholder on the object to allow the below
// parser.tokenHandlers.html_[block/inline] hacks to work as desired.
tokens.html_block = {block: "callout", noCloseToken: true};
tokens.html_inline = {mark: "underline"};

const tokenizer = markdownit("commonmark", {html: true});
const parser = new MarkdownParser(schema, tokenizer, tokens);

// When we come across HTML blocks we use the document schema to parse them
// into nodes then re-add those back into the parser state.
parser.tokenHandlers.html_block = function(state, tok, tokens, i) {
    const contentDoc = htmlToDoc(tok.content || '');
    for (const node of contentDoc.content.content) {
        state.addNode(node.type, node.attrs, node.content);
    }
};

// When we come across inline HTML we parse out the tag and keep track of
// that in a stack, along with the marks they parse out to.
// We open/close the marks within the state depending on the tag open/close type.
const tagStack = new KeyedMultiStack();
parser.tokenHandlers.html_inline = function(state, tok, tokens, i) {
    const isClosing = tok.content.startsWith('</');
    const isSelfClosing = tok.content.endsWith('/>');
    const tagName = parseTagNameFromHtmlTokenContent(tok.content);

    if (!isClosing) {
        const completeTag = isSelfClosing ?  tok.content : `${tok.content}a</${tagName}>`;
        const marks = extractMarksFromHtml(completeTag);
        tagStack.push(tagName, marks);
        for (const mark of marks) {
            state.openMark(mark);
        }
    }

    if (isSelfClosing || isClosing) {
        const marks = (tagStack.pop(tagName) || []).reverse();
        for (const mark of marks) {
            state.closeMark(mark);
        }
    }
}

function extractMarksFromHtml(html) {
    const contentDoc = htmlToDoc('<p>' + (html || '') + '</p>');
    const marks = contentDoc?.content?.content?.[0]?.content?.content?.[0]?.marks;
    return marks || [];
}

/**
 * @param {string} tokenContent
 * @return {string}
 */
function parseTagNameFromHtmlTokenContent(tokenContent) {
    return tokenContent.split(' ')[0].replace(/[<>\/]/g, '').toLowerCase();
}

export default parser;
Got callouts about working, simplified markdown setup 2022-01-07 21:22:07 +00:00			`import schema from "./schema";`
			`import markdownit from "markdown-it";`
			`import {MarkdownParser, defaultMarkdownParser} from "prosemirror-markdown";`
Got underline working in editor Major step, since this is the first inline HTML element which needed advanced parsing out on the markdown side, since not commonmark supported. 2022-01-10 13:38:32 +00:00			`import {htmlToDoc, KeyedMultiStack} from "./util";`
Got callouts about working, simplified markdown setup 2022-01-07 21:22:07 +00:00
			`const tokens = defaultMarkdownParser.tokens;`

Got underline working in editor Major step, since this is the first inline HTML element which needed advanced parsing out on the markdown side, since not commonmark supported. 2022-01-10 13:38:32 +00:00			`// These are really a placeholder on the object to allow the below`
			`// parser.tokenHandlers.html_[block/inline] hacks to work as desired.`
Got callouts about working, simplified markdown setup 2022-01-07 21:22:07 +00:00			`tokens.html_block = {block: "callout", noCloseToken: true};`
Got underline working in editor Major step, since this is the first inline HTML element which needed advanced parsing out on the markdown side, since not commonmark supported. 2022-01-10 13:38:32 +00:00			`tokens.html_inline = {mark: "underline"};`
Got callouts about working, simplified markdown setup 2022-01-07 21:22:07 +00:00
			`const tokenizer = markdownit("commonmark", {html: true});`
			`const parser = new MarkdownParser(schema, tokenizer, tokens);`

Got underline working in editor Major step, since this is the first inline HTML element which needed advanced parsing out on the markdown side, since not commonmark supported. 2022-01-10 13:38:32 +00:00			`// When we come across HTML blocks we use the document schema to parse them`
			`// into nodes then re-add those back into the parser state.`
Got callouts about working, simplified markdown setup 2022-01-07 21:22:07 +00:00			`parser.tokenHandlers.html_block = function(state, tok, tokens, i) {`
			`const contentDoc = htmlToDoc(tok.content \|\| '');`
			`for (const node of contentDoc.content.content) {`
			`state.addNode(node.type, node.attrs, node.content);`
			`}`
			`};`

Got underline working in editor Major step, since this is the first inline HTML element which needed advanced parsing out on the markdown side, since not commonmark supported. 2022-01-10 13:38:32 +00:00			`// When we come across inline HTML we parse out the tag and keep track of`
			`// that in a stack, along with the marks they parse out to.`
			`// We open/close the marks within the state depending on the tag open/close type.`
			`const tagStack = new KeyedMultiStack();`
			`parser.tokenHandlers.html_inline = function(state, tok, tokens, i) {`
			`const isClosing = tok.content.startsWith('</');`
			`const isSelfClosing = tok.content.endsWith('/>');`
			`const tagName = parseTagNameFromHtmlTokenContent(tok.content);`

			`if (!isClosing) {`
			const completeTag = isSelfClosing ? tok.content : `${tok.content}a</${tagName}>`;
			`const marks = extractMarksFromHtml(completeTag);`
			`tagStack.push(tagName, marks);`
			`for (const mark of marks) {`
			`state.openMark(mark);`
			`}`
			`}`

			`if (isSelfClosing \|\| isClosing) {`
			`const marks = (tagStack.pop(tagName) \|\| []).reverse();`
			`for (const mark of marks) {`
			`state.closeMark(mark);`
			`}`
			`}`
			`}`

			`function extractMarksFromHtml(html) {`
			`const contentDoc = htmlToDoc('<p>' + (html \|\| '') + '</p>');`
			`const marks = contentDoc?.content?.content?.[0]?.content?.content?.[0]?.marks;`
			`return marks \|\| [];`
			`}`

			`/**`
			`* @param {string} tokenContent`
			`* @return {string}`
			`*/`
			`function parseTagNameFromHtmlTokenContent(tokenContent) {`
			`return tokenContent.split(' ')[0].replace(/[<>\/]/g, '').toLowerCase();`
			`}`

Got callouts about working, simplified markdown setup 2022-01-07 21:22:07 +00:00			`export default parser;`