You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
582 lines
18 KiB
582 lines
18 KiB
// Main parser class |
|
|
|
'use strict'; |
|
|
|
|
|
var utils = require('./common/utils'); |
|
var helpers = require('./helpers'); |
|
var Renderer = require('./renderer'); |
|
var ParserCore = require('./parser_core'); |
|
var ParserBlock = require('./parser_block'); |
|
var ParserInline = require('./parser_inline'); |
|
var LinkifyIt = require('linkify-it'); |
|
var mdurl = require('mdurl'); |
|
var punycode = require('punycode'); |
|
|
|
|
|
var config = { |
|
default: require('./presets/default'), |
|
zero: require('./presets/zero'), |
|
commonmark: require('./presets/commonmark') |
|
}; |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// This validator can prohibit more than really needed to prevent XSS. It's a |
|
// tradeoff to keep code simple and to be secure by default. |
|
// |
|
// If you need different setup - override validator method as you wish. Or |
|
// replace it with dummy function and use external sanitizer. |
|
// |
|
|
|
var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/; |
|
var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/; |
|
|
|
function validateLink(url) { |
|
// url should be normalized at this point, and existing entities are decoded |
|
var str = url.trim().toLowerCase(); |
|
|
|
return BAD_PROTO_RE.test(str) ? (GOOD_DATA_RE.test(str) ? true : false) : true; |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
|
|
|
|
var RECODE_HOSTNAME_FOR = [ 'http:', 'https:', 'mailto:' ]; |
|
|
|
function normalizeLink(url) { |
|
var parsed = mdurl.parse(url, true); |
|
|
|
if (parsed.hostname) { |
|
// Encode hostnames in urls like: |
|
// `http://host/`, `https://host/`, `mailto:user@host`, `//host/` |
|
// |
|
// We don't encode unknown schemas, because it's likely that we encode |
|
// something we shouldn't (e.g. `skype:name` treated as `skype:host`) |
|
// |
|
if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) { |
|
try { |
|
parsed.hostname = punycode.toASCII(parsed.hostname); |
|
} catch (er) { /**/ } |
|
} |
|
} |
|
|
|
return mdurl.encode(mdurl.format(parsed)); |
|
} |
|
|
|
function normalizeLinkText(url) { |
|
var parsed = mdurl.parse(url, true); |
|
|
|
if (parsed.hostname) { |
|
// Encode hostnames in urls like: |
|
// `http://host/`, `https://host/`, `mailto:user@host`, `//host/` |
|
// |
|
// We don't encode unknown schemas, because it's likely that we encode |
|
// something we shouldn't (e.g. `skype:name` treated as `skype:host`) |
|
// |
|
if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) { |
|
try { |
|
parsed.hostname = punycode.toUnicode(parsed.hostname); |
|
} catch (er) { /**/ } |
|
} |
|
} |
|
|
|
// add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720 |
|
return mdurl.decode(mdurl.format(parsed), mdurl.decode.defaultChars + '%'); |
|
} |
|
|
|
|
|
/** |
|
* class MarkdownIt |
|
* |
|
* Main parser/renderer class. |
|
* |
|
* ##### Usage |
|
* |
|
* ```javascript |
|
* // node.js, "classic" way: |
|
* var MarkdownIt = require('markdown-it'), |
|
* md = new MarkdownIt(); |
|
* var result = md.render('# markdown-it rulezz!'); |
|
* |
|
* // node.js, the same, but with sugar: |
|
* var md = require('markdown-it')(); |
|
* var result = md.render('# markdown-it rulezz!'); |
|
* |
|
* // browser without AMD, added to "window" on script load |
|
* // Note, there are no dash. |
|
* var md = window.markdownit(); |
|
* var result = md.render('# markdown-it rulezz!'); |
|
* ``` |
|
* |
|
* Single line rendering, without paragraph wrap: |
|
* |
|
* ```javascript |
|
* var md = require('markdown-it')(); |
|
* var result = md.renderInline('__markdown-it__ rulezz!'); |
|
* ``` |
|
**/ |
|
|
|
/** |
|
* new MarkdownIt([presetName, options]) |
|
* - presetName (String): optional, `commonmark` / `zero` |
|
* - options (Object) |
|
* |
|
* Creates parser instanse with given config. Can be called without `new`. |
|
* |
|
* ##### presetName |
|
* |
|
* MarkdownIt provides named presets as a convenience to quickly |
|
* enable/disable active syntax rules and options for common use cases. |
|
* |
|
* - ["commonmark"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/commonmark.js) - |
|
* configures parser to strict [CommonMark](http://commonmark.org/) mode. |
|
* - [default](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/default.js) - |
|
* similar to GFM, used when no preset name given. Enables all available rules, |
|
* but still without html, typographer & autolinker. |
|
* - ["zero"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/zero.js) - |
|
* all rules disabled. Useful to quickly setup your config via `.enable()`. |
|
* For example, when you need only `bold` and `italic` markup and nothing else. |
|
* |
|
* ##### options: |
|
* |
|
* - __html__ - `false`. Set `true` to enable HTML tags in source. Be careful! |
|
* That's not safe! You may need external sanitizer to protect output from XSS. |
|
* It's better to extend features via plugins, instead of enabling HTML. |
|
* - __xhtmlOut__ - `false`. Set `true` to add '/' when closing single tags |
|
* (`<br />`). This is needed only for full CommonMark compatibility. In real |
|
* world you will need HTML output. |
|
* - __breaks__ - `false`. Set `true` to convert `\n` in paragraphs into `<br>`. |
|
* - __langPrefix__ - `language-`. CSS language class prefix for fenced blocks. |
|
* Can be useful for external highlighters. |
|
* - __linkify__ - `false`. Set `true` to autoconvert URL-like text to links. |
|
* - __typographer__ - `false`. Set `true` to enable [some language-neutral |
|
* replacement](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/replacements.js) + |
|
* quotes beautification (smartquotes). |
|
* - __quotes__ - `“”‘’`, String or Array. Double + single quotes replacement |
|
* pairs, when typographer enabled and smartquotes on. For example, you can |
|
* use `'«»„“'` for Russian, `'„“‚‘'` for German, and |
|
* `['«\xA0', '\xA0»', '‹\xA0', '\xA0›']` for French (including nbsp). |
|
* - __highlight__ - `null`. Highlighter function for fenced code blocks. |
|
* Highlighter `function (str, lang)` should return escaped HTML. It can also |
|
* return empty string if the source was not changed and should be escaped |
|
* externaly. If result starts with <pre... internal wrapper is skipped. |
|
* |
|
* ##### Example |
|
* |
|
* ```javascript |
|
* // commonmark mode |
|
* var md = require('markdown-it')('commonmark'); |
|
* |
|
* // default mode |
|
* var md = require('markdown-it')(); |
|
* |
|
* // enable everything |
|
* var md = require('markdown-it')({ |
|
* html: true, |
|
* linkify: true, |
|
* typographer: true |
|
* }); |
|
* ``` |
|
* |
|
* ##### Syntax highlighting |
|
* |
|
* ```js |
|
* var hljs = require('highlight.js') // https://highlightjs.org/ |
|
* |
|
* var md = require('markdown-it')({ |
|
* highlight: function (str, lang) { |
|
* if (lang && hljs.getLanguage(lang)) { |
|
* try { |
|
* return hljs.highlight(str, { language: lang, ignoreIllegals: true }).value; |
|
* } catch (__) {} |
|
* } |
|
* |
|
* return ''; // use external default escaping |
|
* } |
|
* }); |
|
* ``` |
|
* |
|
* Or with full wrapper override (if you need assign class to `<pre>`): |
|
* |
|
* ```javascript |
|
* var hljs = require('highlight.js') // https://highlightjs.org/ |
|
* |
|
* // Actual default values |
|
* var md = require('markdown-it')({ |
|
* highlight: function (str, lang) { |
|
* if (lang && hljs.getLanguage(lang)) { |
|
* try { |
|
* return '<pre class="hljs"><code>' + |
|
* hljs.highlight(str, { language: lang, ignoreIllegals: true }).value + |
|
* '</code></pre>'; |
|
* } catch (__) {} |
|
* } |
|
* |
|
* return '<pre class="hljs"><code>' + md.utils.escapeHtml(str) + '</code></pre>'; |
|
* } |
|
* }); |
|
* ``` |
|
* |
|
**/ |
|
function MarkdownIt(presetName, options) { |
|
if (!(this instanceof MarkdownIt)) { |
|
return new MarkdownIt(presetName, options); |
|
} |
|
|
|
if (!options) { |
|
if (!utils.isString(presetName)) { |
|
options = presetName || {}; |
|
presetName = 'default'; |
|
} |
|
} |
|
|
|
/** |
|
* MarkdownIt#inline -> ParserInline |
|
* |
|
* Instance of [[ParserInline]]. You may need it to add new rules when |
|
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and |
|
* [[MarkdownIt.enable]]. |
|
**/ |
|
this.inline = new ParserInline(); |
|
|
|
/** |
|
* MarkdownIt#block -> ParserBlock |
|
* |
|
* Instance of [[ParserBlock]]. You may need it to add new rules when |
|
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and |
|
* [[MarkdownIt.enable]]. |
|
**/ |
|
this.block = new ParserBlock(); |
|
|
|
/** |
|
* MarkdownIt#core -> Core |
|
* |
|
* Instance of [[Core]] chain executor. You may need it to add new rules when |
|
* writing plugins. For simple rules control use [[MarkdownIt.disable]] and |
|
* [[MarkdownIt.enable]]. |
|
**/ |
|
this.core = new ParserCore(); |
|
|
|
/** |
|
* MarkdownIt#renderer -> Renderer |
|
* |
|
* Instance of [[Renderer]]. Use it to modify output look. Or to add rendering |
|
* rules for new token types, generated by plugins. |
|
* |
|
* ##### Example |
|
* |
|
* ```javascript |
|
* var md = require('markdown-it')(); |
|
* |
|
* function myToken(tokens, idx, options, env, self) { |
|
* //... |
|
* return result; |
|
* }; |
|
* |
|
* md.renderer.rules['my_token'] = myToken |
|
* ``` |
|
* |
|
* See [[Renderer]] docs and [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js). |
|
**/ |
|
this.renderer = new Renderer(); |
|
|
|
/** |
|
* MarkdownIt#linkify -> LinkifyIt |
|
* |
|
* [linkify-it](https://github.com/markdown-it/linkify-it) instance. |
|
* Used by [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js) |
|
* rule. |
|
**/ |
|
this.linkify = new LinkifyIt(); |
|
|
|
/** |
|
* MarkdownIt#validateLink(url) -> Boolean |
|
* |
|
* Link validation function. CommonMark allows too much in links. By default |
|
* we disable `javascript:`, `vbscript:`, `file:` schemas, and almost all `data:...` schemas |
|
* except some embedded image types. |
|
* |
|
* You can change this behaviour: |
|
* |
|
* ```javascript |
|
* var md = require('markdown-it')(); |
|
* // enable everything |
|
* md.validateLink = function () { return true; } |
|
* ``` |
|
**/ |
|
this.validateLink = validateLink; |
|
|
|
/** |
|
* MarkdownIt#normalizeLink(url) -> String |
|
* |
|
* Function used to encode link url to a machine-readable format, |
|
* which includes url-encoding, punycode, etc. |
|
**/ |
|
this.normalizeLink = normalizeLink; |
|
|
|
/** |
|
* MarkdownIt#normalizeLinkText(url) -> String |
|
* |
|
* Function used to decode link url to a human-readable format` |
|
**/ |
|
this.normalizeLinkText = normalizeLinkText; |
|
|
|
|
|
// Expose utils & helpers for easy acces from plugins |
|
|
|
/** |
|
* MarkdownIt#utils -> utils |
|
* |
|
* Assorted utility functions, useful to write plugins. See details |
|
* [here](https://github.com/markdown-it/markdown-it/blob/master/lib/common/utils.js). |
|
**/ |
|
this.utils = utils; |
|
|
|
/** |
|
* MarkdownIt#helpers -> helpers |
|
* |
|
* Link components parser functions, useful to write plugins. See details |
|
* [here](https://github.com/markdown-it/markdown-it/blob/master/lib/helpers). |
|
**/ |
|
this.helpers = utils.assign({}, helpers); |
|
|
|
|
|
this.options = {}; |
|
this.configure(presetName); |
|
|
|
if (options) { this.set(options); } |
|
} |
|
|
|
|
|
/** chainable |
|
* MarkdownIt.set(options) |
|
* |
|
* Set parser options (in the same format as in constructor). Probably, you |
|
* will never need it, but you can change options after constructor call. |
|
* |
|
* ##### Example |
|
* |
|
* ```javascript |
|
* var md = require('markdown-it')() |
|
* .set({ html: true, breaks: true }) |
|
* .set({ typographer, true }); |
|
* ``` |
|
* |
|
* __Note:__ To achieve the best possible performance, don't modify a |
|
* `markdown-it` instance options on the fly. If you need multiple configurations |
|
* it's best to create multiple instances and initialize each with separate |
|
* config. |
|
**/ |
|
MarkdownIt.prototype.set = function (options) { |
|
utils.assign(this.options, options); |
|
return this; |
|
}; |
|
|
|
|
|
/** chainable, internal |
|
* MarkdownIt.configure(presets) |
|
* |
|
* Batch load of all options and compenent settings. This is internal method, |
|
* and you probably will not need it. But if you will - see available presets |
|
* and data structure [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets) |
|
* |
|
* We strongly recommend to use presets instead of direct config loads. That |
|
* will give better compatibility with next versions. |
|
**/ |
|
MarkdownIt.prototype.configure = function (presets) { |
|
var self = this, presetName; |
|
|
|
if (utils.isString(presets)) { |
|
presetName = presets; |
|
presets = config[presetName]; |
|
if (!presets) { throw new Error('Wrong `markdown-it` preset "' + presetName + '", check name'); } |
|
} |
|
|
|
if (!presets) { throw new Error('Wrong `markdown-it` preset, can\'t be empty'); } |
|
|
|
if (presets.options) { self.set(presets.options); } |
|
|
|
if (presets.components) { |
|
Object.keys(presets.components).forEach(function (name) { |
|
if (presets.components[name].rules) { |
|
self[name].ruler.enableOnly(presets.components[name].rules); |
|
} |
|
if (presets.components[name].rules2) { |
|
self[name].ruler2.enableOnly(presets.components[name].rules2); |
|
} |
|
}); |
|
} |
|
return this; |
|
}; |
|
|
|
|
|
/** chainable |
|
* MarkdownIt.enable(list, ignoreInvalid) |
|
* - list (String|Array): rule name or list of rule names to enable |
|
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found. |
|
* |
|
* Enable list or rules. It will automatically find appropriate components, |
|
* containing rules with given names. If rule not found, and `ignoreInvalid` |
|
* not set - throws exception. |
|
* |
|
* ##### Example |
|
* |
|
* ```javascript |
|
* var md = require('markdown-it')() |
|
* .enable(['sub', 'sup']) |
|
* .disable('smartquotes'); |
|
* ``` |
|
**/ |
|
MarkdownIt.prototype.enable = function (list, ignoreInvalid) { |
|
var result = []; |
|
|
|
if (!Array.isArray(list)) { list = [ list ]; } |
|
|
|
[ 'core', 'block', 'inline' ].forEach(function (chain) { |
|
result = result.concat(this[chain].ruler.enable(list, true)); |
|
}, this); |
|
|
|
result = result.concat(this.inline.ruler2.enable(list, true)); |
|
|
|
var missed = list.filter(function (name) { return result.indexOf(name) < 0; }); |
|
|
|
if (missed.length && !ignoreInvalid) { |
|
throw new Error('MarkdownIt. Failed to enable unknown rule(s): ' + missed); |
|
} |
|
|
|
return this; |
|
}; |
|
|
|
|
|
/** chainable |
|
* MarkdownIt.disable(list, ignoreInvalid) |
|
* - list (String|Array): rule name or list of rule names to disable. |
|
* - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found. |
|
* |
|
* The same as [[MarkdownIt.enable]], but turn specified rules off. |
|
**/ |
|
MarkdownIt.prototype.disable = function (list, ignoreInvalid) { |
|
var result = []; |
|
|
|
if (!Array.isArray(list)) { list = [ list ]; } |
|
|
|
[ 'core', 'block', 'inline' ].forEach(function (chain) { |
|
result = result.concat(this[chain].ruler.disable(list, true)); |
|
}, this); |
|
|
|
result = result.concat(this.inline.ruler2.disable(list, true)); |
|
|
|
var missed = list.filter(function (name) { return result.indexOf(name) < 0; }); |
|
|
|
if (missed.length && !ignoreInvalid) { |
|
throw new Error('MarkdownIt. Failed to disable unknown rule(s): ' + missed); |
|
} |
|
return this; |
|
}; |
|
|
|
|
|
/** chainable |
|
* MarkdownIt.use(plugin, params) |
|
* |
|
* Load specified plugin with given params into current parser instance. |
|
* It's just a sugar to call `plugin(md, params)` with curring. |
|
* |
|
* ##### Example |
|
* |
|
* ```javascript |
|
* var iterator = require('markdown-it-for-inline'); |
|
* var md = require('markdown-it')() |
|
* .use(iterator, 'foo_replace', 'text', function (tokens, idx) { |
|
* tokens[idx].content = tokens[idx].content.replace(/foo/g, 'bar'); |
|
* }); |
|
* ``` |
|
**/ |
|
MarkdownIt.prototype.use = function (plugin /*, params, ... */) { |
|
var args = [ this ].concat(Array.prototype.slice.call(arguments, 1)); |
|
plugin.apply(plugin, args); |
|
return this; |
|
}; |
|
|
|
|
|
/** internal |
|
* MarkdownIt.parse(src, env) -> Array |
|
* - src (String): source string |
|
* - env (Object): environment sandbox |
|
* |
|
* Parse input string and return list of block tokens (special token type |
|
* "inline" will contain list of inline tokens). You should not call this |
|
* method directly, until you write custom renderer (for example, to produce |
|
* AST). |
|
* |
|
* `env` is used to pass data between "distributed" rules and return additional |
|
* metadata like reference info, needed for the renderer. It also can be used to |
|
* inject data in specific cases. Usually, you will be ok to pass `{}`, |
|
* and then pass updated object to renderer. |
|
**/ |
|
MarkdownIt.prototype.parse = function (src, env) { |
|
if (typeof src !== 'string') { |
|
throw new Error('Input data should be a String'); |
|
} |
|
|
|
var state = new this.core.State(src, this, env); |
|
|
|
this.core.process(state); |
|
|
|
return state.tokens; |
|
}; |
|
|
|
|
|
/** |
|
* MarkdownIt.render(src [, env]) -> String |
|
* - src (String): source string |
|
* - env (Object): environment sandbox |
|
* |
|
* Render markdown string into html. It does all magic for you :). |
|
* |
|
* `env` can be used to inject additional metadata (`{}` by default). |
|
* But you will not need it with high probability. See also comment |
|
* in [[MarkdownIt.parse]]. |
|
**/ |
|
MarkdownIt.prototype.render = function (src, env) { |
|
env = env || {}; |
|
|
|
return this.renderer.render(this.parse(src, env), this.options, env); |
|
}; |
|
|
|
|
|
/** internal |
|
* MarkdownIt.parseInline(src, env) -> Array |
|
* - src (String): source string |
|
* - env (Object): environment sandbox |
|
* |
|
* The same as [[MarkdownIt.parse]] but skip all block rules. It returns the |
|
* block tokens list with the single `inline` element, containing parsed inline |
|
* tokens in `children` property. Also updates `env` object. |
|
**/ |
|
MarkdownIt.prototype.parseInline = function (src, env) { |
|
var state = new this.core.State(src, this, env); |
|
|
|
state.inlineMode = true; |
|
this.core.process(state); |
|
|
|
return state.tokens; |
|
}; |
|
|
|
|
|
/** |
|
* MarkdownIt.renderInline(src [, env]) -> String |
|
* - src (String): source string |
|
* - env (Object): environment sandbox |
|
* |
|
* Similar to [[MarkdownIt.render]] but for single paragraph content. Result |
|
* will NOT be wrapped into `<p>` tags. |
|
**/ |
|
MarkdownIt.prototype.renderInline = function (src, env) { |
|
env = env || {}; |
|
|
|
return this.renderer.render(this.parseInline(src, env), this.options, env); |
|
}; |
|
|
|
|
|
module.exports = MarkdownIt;
|
|
|