gitlab-org--gitlab-foss/app/assets/javascripts/content_editor/services/hast_to_prosemirror_convert...

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

651 lines
20 KiB
JavaScript
Raw Normal View History

/**
* This module implements a function that converts a Hast Abstract
* Syntax Tree (AST) to a ProseMirror document.
*
* It is based on the prosemirror-markdowns from_markdown module
* https://github.com/ProseMirror/prosemirror-markdown/blob/master/src/from_markdown.js.
*
* It deviates significantly from the original because
* prosemirror-markdown supports converting an markdown-it AST instead of a
* HAST one. It also adds sourcemap attributes automatically to every
* ProseMirror node and mark created during the conversion process.
*
* We recommend becoming familiar with HAST and ProseMirror documents to
* facilitate the understanding of the behavior implemented in this module.
*
* Unist syntax tree documentation: https://github.com/syntax-tree/unist
* Hast tree documentation: https://github.com/syntax-tree/hast
* ProseMirror document documentation: https://prosemirror.net/docs/ref/#model.Document_Structure
* visit-parents documentation: https://github.com/syntax-tree/unist-util-visit-parents
*/
import { Mark } from 'prosemirror-model';
import { visitParents, SKIP } from 'unist-util-visit-parents';
import { isFunction, isString, noop, mapValues } from 'lodash';
const NO_ATTRIBUTES = {};
/**
* Merges two ProseMirror text nodes if both text nodes
* have the same set of marks.
*
* @param {ProseMirror.Node} a first ProseMirror node
* @param {ProseMirror.Node} b second ProseMirror node
* @returns {model.Node} A new text node that results from combining
* the text of the two text node parameters or null.
*/
function maybeMerge(a, b) {
if (a && a.isText && b && b.isText && Mark.sameSet(a.marks, b.marks)) {
return a.withText(a.text + b.text);
}
return null;
}
/**
* Creates an object that contains sourcemap position information
* included in a Hast Abstract Syntax Tree. The Content
* Editor uses the sourcemap information to restore the
* original source of a node when the user doesnt change it.
*
* Unist syntax tree documentation: https://github.com/syntax-tree/unist
* Hast node documentation: https://github.com/syntax-tree/hast
*
* @param {HastNode} hastNode A Hast node
* @param {String} markdown Markdown source file
*
* @returns It returns an object with the following attributes:
*
* - sourceMapKey: A string that uniquely identifies what is
* the position of the hast node in the Markdown source file.
* - sourceMarkdown: A nodes original Markdown source extrated
* from the Markdown source file.
*/
function createSourceMapAttributes(hastNode, markdown) {
const { position } = hastNode;
return position && position.end
? {
sourceMapKey: `${position.start.offset}:${position.end.offset}`,
sourceMarkdown: markdown.substring(position.start.offset, position.end.offset),
}
: {};
}
/**
* Creates a function that resolves the attributes
* of a ProseMirror node based on a hast node.
*
* @param {Object} params Parameters
* @param {String} params.markdown Markdown source from which the AST was generated
* @param {Object} params.attributeTransformer An object that allows applying a transformation
* function to all the attributes listed in the attributes property.
* @param {Array} params.attributeTransformer.attributes A list of attributes names
* that the getAttrs function should apply the transformation
* @param {Function} params.attributeTransformer.transform A function that applies
* a transform operation on an attribute value.
* @returns A `getAttrs` function
*/
const getAttrsFactory = ({ attributeTransformer, markdown }) =>
/**
* Compute ProseMirror nodes attributes from a Hast node.
* By default, this function includes sourcemap position
* information in the object returned.
*
* Other attributes are retrieved by invoking a getAttrs
* function provided by the ProseMirror node factory spec.
*
* @param {Object} proseMirrorNodeSpec ProseMirror node spec object
* @param {Object} hastNode A hast node
* @param {Array} hastParents All the ancestors of the hastNode
* @param {String} markdown Markdown source files content
* @returns An object that contains a ProseMirror nodes attributes
*/
function getAttrs(proseMirrorNodeSpec, hastNode, hastParents) {
const { getAttrs: specGetAttrs } = proseMirrorNodeSpec;
const attributes = {
...(isFunction(specGetAttrs) ? specGetAttrs(hastNode, hastParents, markdown) : {}),
};
const { transform } = attributeTransformer;
return {
...createSourceMapAttributes(hastNode, markdown),
...mapValues(attributes, (attributeValue, attributeName) =>
transform(attributeName, attributeValue, hastNode),
),
};
};
/**
* Keeps track of the Hast -> ProseMirror conversion process.
*
* When the `openNode` method is invoked, it adds the node to a stack
* data structure. When the `closeNode` method is invoked, it removes the
* last element from the Stack, creates a ProseMirror node, and adds that
* ProseMirror node to the previous node in the Stack.
*
* For example, given a Hast tree with three levels of nodes:
*
* - blockquote
* - paragraph
* - text
*
* 3. text
* 2. paragraph
* 1. blockquote
*
* Calling `closeNode` will fold the text node into paragraph. A 2nd
* call to this method will fold "paragraph" into "blockquote".
*
* Mark state
*
* When the `openMark` method is invoked, this class adds the Mark to a `MarkSet`
* object. When a text node is added, it assigns all the opened marks to that text
* node and cleans the marks. It takes care of merging text nodes with the same
* set of marks as well.
*/
class HastToProseMirrorConverterState {
constructor() {
this.stack = [];
this.marks = Mark.none;
}
/**
* Gets the first element of the node stack
*/
get top() {
return this.stack[this.stack.length - 1];
}
get topNode() {
return this.findInStack((item) => item.type === 'node');
}
/**
* Detects if the node stack is empty
*/
get empty() {
return this.stack.length === 0;
}
findInStack(fn) {
const last = this.stack.length - 1;
for (let i = last; i >= 0; i -= 1) {
const item = this.stack[i];
if (fn(item) === true) {
return item;
}
}
return null;
}
/**
* Creates a text node and adds it to
* the top node in the stack.
*
* It applies the marks stored temporarily
* by calling the `addMark` method. After
* the text node is added, it clears the mark
* set afterward.
*
* If the top block node has a text
* node with the same set of marks as the
* text node created, this method merges
* both text nodes
*
* @param {ProseMirror.Schema} schema ProseMirror schema
* @param {String} text Text
* @returns
*/
addText(schema, text) {
if (!text) return;
const nodes = this.topNode?.content;
const last = nodes[nodes.length - 1];
const node = schema.text(text, this.marks);
const merged = maybeMerge(last, node);
if (last && merged) {
nodes[nodes.length - 1] = merged;
} else {
nodes.push(node);
}
}
/**
* Adds a mark to the set of marks stored temporarily
* until an inline node is created.
* @param {https://prosemirror.net/docs/ref/#model.MarkType} schemaType Mark schema type
* @param {https://github.com/syntax-tree/hast#nodes} hastNode AST node that the mark is based on
* @param {Object} attrs Mark attributes
* @param {Object} factorySpec Specifications on how th mark should be created
*/
openMark(schemaType, hastNode, attrs, factorySpec) {
const mark = schemaType.create(attrs);
this.stack.push({
type: 'mark',
mark,
attrs,
hastNode,
factorySpec,
});
this.marks = mark.addToSet(this.marks);
}
/**
* Removes a mark from the list of active marks that
* are applied to inline nodes.
*/
closeMark() {
const { mark } = this.stack.pop();
this.marks = mark.removeFromSet(this.marks);
}
/**
* Adds a node to the stack data structure.
*
* @param {https://prosemirror.net/docs/ref/#model.NodeType} schemaType ProseMirror Schema for the node
* @param {https://github.com/syntax-tree/hast#nodes} hastNode Hast node from which the ProseMirror node will be created
* @param {*} attrs Nodes attributes
* @param {*} factorySpec The factory spec used to create the node factory
*/
openNode(schemaType, hastNode, attrs, factorySpec) {
this.stack.push({
type: 'node',
schemaType,
attrs,
content: [],
hastNode,
factorySpec,
});
}
/**
* Removes the top ProseMirror node from the
* conversion stack and adds the node to the
* previous element.
*/
closeNode() {
const { schemaType, attrs, content, factorySpec } = this.stack.pop();
const node =
factorySpec.type === 'inline' && this.marks.length
? schemaType.createAndFill(attrs, content, this.marks)
: schemaType.createAndFill(attrs, content);
if (!node) {
/*
When the node returned by `createAndFill` is null is because the `content` passed as a parameter
doesnt conform with the document schema. We are handling the most likely scenario here that happens
when a paragraph is inside another paragraph.
This scenario happens when the converter encounters a mark wrapping one or more paragraphs.
In this case, the converter will wrap the mark in a paragraph as well because ProseMirror does
not allow marks wrapping block nodes or being direct children of certain nodes like the root nodes
or list items.
*/
if (
schemaType.name === 'paragraph' &&
content.some((child) => child.type.name === 'paragraph')
) {
this.topNode.content.push(...content);
}
return null;
}
if (!this.empty) {
this.topNode.content.push(node);
}
return node;
}
closeUntil(hastNode) {
while (hastNode !== this.top?.hastNode) {
if (this.top.type === 'node') {
this.closeNode();
} else {
this.closeMark();
}
}
}
buildDoc() {
let doc;
do {
if (this.top.type === 'node') {
doc = this.closeNode();
} else {
this.closeMark();
}
} while (!this.empty);
return doc;
}
}
/**
* Create ProseMirror node/mark factories based on one or more
* factory specifications.
*
* Note: Read `createProseMirrorDocFromMdastTree` documentation
* for instructions about how to define these specifications.
*
* @param {model.ProseMirrorSchema} schema A ProseMirror schema used to create the
* ProseMirror nodes and marks.
* @param {Object} proseMirrorFactorySpecs ProseMirror nodes factory specifications.
* @param {String} markdown Markdown source files content
*
* @returns An object that contains ProseMirror node factories
*/
const createProseMirrorNodeFactories = (
schema,
proseMirrorFactorySpecs,
attributeTransformer,
markdown,
) => {
const getAttrs = getAttrsFactory({ attributeTransformer, markdown });
const factories = {
root: {
selector: 'root',
wrapInParagraph: true,
handle: (state, hastNode) =>
state.openNode(schema.topNodeType, hastNode, NO_ATTRIBUTES, factories.root),
},
text: {
selector: 'text',
handle: (state, hastNode, parent) => {
const found = state.findInStack((node) => isFunction(node.factorySpec.processText));
const { value: text } = hastNode;
if (/^\s+$/.test(text)) {
return;
}
state.closeUntil(parent);
state.addText(schema, found ? found.factorySpec.processText(text) : text);
},
},
};
for (const [proseMirrorName, factorySpec] of Object.entries(proseMirrorFactorySpecs)) {
const factory = {
...factorySpec,
};
if (factorySpec.type === 'block') {
factory.handle = (state, hastNode, parent) => {
const nodeType = schema.nodeType(proseMirrorName);
state.closeUntil(parent);
state.openNode(nodeType, hastNode, getAttrs(factory, hastNode, parent), factory);
};
} else if (factory.type === 'inline') {
const nodeType = schema.nodeType(proseMirrorName);
factory.handle = (state, hastNode, parent) => {
state.closeUntil(parent);
state.openNode(nodeType, hastNode, getAttrs(factory, hastNode, parent), factory);
// Inline nodes do not have children therefore they are immediately closed
state.closeNode();
};
} else if (factory.type === 'mark') {
const markType = schema.marks[proseMirrorName];
factory.handle = (state, hastNode, parent) => {
state.openMark(markType, hastNode, getAttrs(factory, hastNode, parent), factory);
};
} else if (factory.type === 'ignore') {
factory.handle = noop;
} else {
throw new RangeError(
`Unrecognized ProseMirror object type ${JSON.stringify(factorySpec.type)}`,
);
}
factories[proseMirrorName] = factory;
}
return factories;
};
const findFactory = (hastNode, ancestors, factories) =>
Object.entries(factories).find(([, factorySpec]) => {
const { selector } = factorySpec;
return isFunction(selector)
? selector(hastNode, ancestors)
: [hastNode.tagName, hastNode.type].includes(selector);
})?.[1];
const findParent = (ancestors, parent) => {
if (isString(parent)) {
return ancestors.reverse().find((ancestor) => ancestor.tagName === parent);
}
return ancestors[ancestors.length - 1];
};
const resolveNodePosition = (textNode) => {
const { position, value, type } = textNode;
if (type !== 'text' || (!position.start && !position.end) || (position.start && position.end)) {
return textNode.position;
}
const span = value.length - 1;
if (position.start && !position.end) {
const { start } = position;
return {
start,
end: {
row: start.row,
column: start.column + span,
offset: start.offset + span,
},
};
}
const { end } = position;
return {
start: {
row: end.row,
column: end.column - span,
offset: end.offset - span,
},
end,
};
};
const removeEmptyTextNodes = (nodes) =>
nodes.filter(
(node) => node.type !== 'text' || (node.type === 'text' && !/^\s+$/.test(node.value)),
);
const wrapInlineElements = (nodes, wrappableTags) =>
nodes.reduce((children, child) => {
const previous = children[children.length - 1];
if (
child.type === 'comment' ||
(child.type !== 'text' && !wrappableTags.includes(child.tagName))
) {
return [...children, child];
}
const wrapperExists = previous?.properties?.wrapper;
if (wrapperExists) {
const wrapper = previous;
wrapper.position.end = child.position.end;
wrapper.children.push(child);
return children;
}
const wrapper = {
type: 'element',
tagName: 'p',
position: resolveNodePosition(child),
children: [child],
properties: { wrapper: true },
};
return [...children, wrapper];
}, []);
/**
* Converts a Hast AST to a ProseMirror document based on a series
* of specifications that describe how to map all the nodes of the former
* to ProseMirror nodes or marks.
*
* The specification object describes how to map a Hast node to a ProseMirror node or mark.
* The converter will trigger an error if it doesnt find a specification
* for a Hast node while traversing the AST.
*
* The object should have the following shape:
*
* {
* [ProseMirrorNodeOrMarkName]: {
* type: 'block' | 'inline' | 'mark',
* selector: String | hastNode -> Boolean,
* ...configurationOptions
* }
* }
*
* Where each property in the object represents a HAST node with a given tag name, for example:
*
* {
* horizontalRule: {
* type: 'block',
* selector: 'hr',
* },
* heading: {
* type: 'block',
* selector: (hastNode) => ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(hastNode),
* },
* bold: {
* type: 'mark'
* selector: (hastNode) => ['b', 'strong'].includes(hastNode),
* },
* // etc
* }
*
*
* Configuration options
* ----------------------
*
* You can customize the conversion process for every node or mark
* setting the following properties in the specification object:
*
* **type**
*
* The `type` property should have one of following three values:
*
* 1. "block": A ProseMirror node that contains one or more children.
* 2. "inline": A ProseMirror node that doesnt contain any children although
* it can have inline content like an image or a mention object.
* 3. "mark": A ProseMirror mark.
* 4. "ignore": A hast node that should be ignored and wont be mapped to a
* ProseMirror node.
*
* **selector**
*
* The `selector` property matches a HastNode to a ProseMirror node or
* Mark. If you assign a string value to this property, the converter
* will match the first hast node with a `tagName` or `type` property
* that equals the string value.
*
* If you assign a function, the converter will invoke the function with
* the hast node and its ancestors. The function should return `true`
* if the hastNode matches the custom criteria implemented in the function
*
* **getAttrs**
*
* Computes a ProseMirror node or mark attributes. The converter will invoke
* `getAttrs` with the following parameters:
*
* 1. hastNode: The hast node
* 2. hasParents: All the hast nodes ancestors up to the root node
* 3. source: Markdown source files content
*
* **wrapInParagraph**
*
* This property only applies to block nodes. If a block node contains inline
* elements like text, images, links, etc, the converter will wrap those inline
* elements in a paragraph. This is useful for ProseMirror block
* nodes that dont allow text directly such as list items and tables.
*
* **processText**
*
* This property only applies to block nodes. If a block node contains text,
* it allows applying a processing function to that text. This is useful when
* you can transform the text node, i.e trim(), substring(), etc.
*
* **parent**
*
* Specifies what is the nodes parent. This is useful when the nodes parent is not
* its direct ancestor in Abstract Syntax Tree. For example, imagine that you want
* to make <tr> elements a direct children of tables and skip `<thead>` and `<tbody>`
* altogether.
*
* @param {model.Document_Schema} params.schema A ProseMirror schema that specifies the shape
* of the ProseMirror document.
* @param {Object} params.factorySpec A factory specification as described above
* @param {Hast} params.tree https://github.com/syntax-tree/hast
* @param {String} params.source Markdown source from which the MDast tree was generated
*
* @returns A ProseMirror document
*/
export const createProseMirrorDocFromMdastTree = ({
schema,
factorySpecs,
wrappableTags,
tree,
attributeTransformer,
markdown,
}) => {
const proseMirrorNodeFactories = createProseMirrorNodeFactories(
schema,
factorySpecs,
attributeTransformer,
markdown,
);
const state = new HastToProseMirrorConverterState();
visitParents(tree, (hastNode, ancestors) => {
const factory = findFactory(hastNode, ancestors, proseMirrorNodeFactories);
if (!factory) {
return SKIP;
}
const parent = findParent(ancestors, factory.parent);
if (factory.wrapInParagraph) {
/**
* Modifying parameters is a bad practice. For performance reasons,
* the author of the unist-util-visit-parents function recommends
* modifying nodes in place to avoid traversing the Abstract Syntax
* Tree more than once
*/
// eslint-disable-next-line no-param-reassign
hastNode.children = wrapInlineElements(
removeEmptyTextNodes(hastNode.children),
wrappableTags,
);
}
factory.handle(state, hastNode, parent);
return true;
});
return state.buildDoc();
};