0
0
mirror of https://github.com/nodejs/node.git synced 2024-11-30 07:27:22 +01:00
nodejs/tools/doctool/markdown.js
2011-07-03 11:27:23 -07:00

1449 lines
42 KiB
JavaScript

// Released under MIT license
// Copyright (c) 2009-2010 Dominic Baggott
// Copyright (c) 2009-2010 Ash Berlin
(function( expose ) {
/**
* class Markdown
*
* Markdown processing in Javascript done right. We have very particular views
* on what constitutes 'right' which include:
*
* - produces well-formed HTML (this means that em and strong nesting is
* important)
*
* - has an intermediate representation to allow processing of parsed data (We
* in fact have two, both as [JsonML]: a markdown tree and an HTML tree).
*
* - is easily extensible to add new dialects without having to rewrite the
* entire parsing mechanics
*
* - has a good test suite
*
* This implementation fulfills all of these (except that the test suite could
* do with expanding to automatically run all the fixtures from other Markdown
* implementations.)
*
* ##### Intermediate Representation
*
* *TODO* Talk about this :) Its JsonML, but document the node names we use.
*
* [JsonML]: http://jsonml.org/ "JSON Markup Language"
**/
var Markdown = expose.Markdown = function Markdown(dialect) {
switch (typeof dialect) {
case "undefined":
this.dialect = Markdown.dialects.Gruber;
break;
case "object":
this.dialect = dialect;
break;
default:
if (dialect in Markdown.dialects) {
this.dialect = Markdown.dialects[dialect];
}
else {
throw new Error("Unknown Markdown dialect '" + String(dialect) + "'");
}
break;
}
this.em_state = [];
this.strong_state = [];
this.debug_indent = "";
}
/**
* parse( markdown, [dialect] ) -> JsonML
* - markdown (String): markdown string to parse
* - dialect (String | Dialect): the dialect to use, defaults to gruber
*
* Parse `markdown` and return a markdown document as a Markdown.JsonML tree.
**/
expose.parse = function( source, dialect ) {
// dialect will default if undefined
var md = new Markdown( dialect );
return md.toTree( source );
}
/**
* toHTML( markdown ) -> String
* toHTML( md_tree ) -> String
* - markdown (String): markdown string to parse
* - md_tree (Markdown.JsonML): parsed markdown tree
*
* Take markdown (either as a string or as a JsonML tree) and run it through
* [[toHTMLTree]] then turn it into a well-formated HTML fragment.
**/
expose.toHTML = function toHTML( source ) {
var input = expose.toHTMLTree( source );
return expose.renderJsonML( input );
}
/**
* toHTMLTree( markdown, [dialect] ) -> JsonML
* toHTMLTree( md_tree ) -> JsonML
* - markdown (String): markdown string to parse
* - dialect (String | Dialect): the dialect to use, defaults to gruber
* - md_tree (Markdown.JsonML): parsed markdown tree
*
* Turn markdown into HTML, represented as a JsonML tree. If a string is given
* to this function, it is first parsed into a markdown tree by calling
* [[parse]].
**/
expose.toHTMLTree = function toHTMLTree( input, dialect ) {
// convert string input to an MD tree
if ( typeof input ==="string" ) input = this.parse( input, dialect );
// Now convert the MD tree to an HTML tree
// remove references from the tree
var attrs = extract_attr( input ),
refs = {};
if ( attrs && attrs.references ) {
refs = attrs.references;
}
var html = convert_tree_to_html( input, refs );
merge_text_nodes( html );
return html;
}
var mk_block = Markdown.mk_block = function(block, trail, line) {
// Be helpful for default case in tests.
if ( arguments.length == 1 ) trail = "\n\n";
var s = new String(block);
s.trailing = trail;
// To make it clear its not just a string
s.toSource = function() {
return "Markdown.mk_block( " +
uneval(block) +
", " +
uneval(trail) +
", " +
uneval(line) +
" )"
}
if (line != undefined)
s.lineNumber = line;
return s;
}
function count_lines( str ) {
var n = 0, i = -1;;
while ( ( i = str.indexOf('\n', i+1) ) != -1) n++;
return n;
}
// Internal - split source into rough blocks
Markdown.prototype.split_blocks = function splitBlocks( input, startLine ) {
// [\s\S] matches _anything_ (newline or space)
var re = /([\s\S]+?)($|\n(?:\s*\n|$)+)/g,
blocks = [],
m;
var line_no = 1;
if ( ( m = /^(\s*\n)/.exec(input) ) != null ) {
// skip (but count) leading blank lines
line_no += count_lines( m[0] );
re.lastIndex = m[0].length;
}
while ( ( m = re.exec(input) ) != null ) {
blocks.push( mk_block( m[1], m[2], line_no ) );
line_no += count_lines( m[0] );
}
return blocks;
}
/**
* Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ]
* - block (String): the block to process
* - next (Array): the following blocks
*
* Process `block` and return an array of JsonML nodes representing `block`.
*
* It does this by asking each block level function in the dialect to process
* the block until one can. Succesful handling is indicated by returning an
* array (with zero or more JsonML nodes), failure by a false value.
*
* Blocks handlers are responsible for calling [[Markdown#processInline]]
* themselves as appropriate.
*
* If the blocks were split incorrectly or adjacent blocks need collapsing you
* can adjust `next` in place using shift/splice etc.
*
* If any of this default behaviour is not right for the dialect, you can
* define a `__call__` method on the dialect that will get invoked to handle
* the block processing.
*/
Markdown.prototype.processBlock = function processBlock( block, next ) {
var cbs = this.dialect.block,
ord = cbs.__order__;
if ( "__call__" in cbs ) {
return cvs.__call__.call(this, block, next);
}
for ( var i = 0; i < ord.length; i++ ) {
//D:this.debug( "Testing", ord[i] );
var res = cbs[ ord[i] ].call( this, block, next );
if ( res ) {
//D:this.debug(" matched");
if ( !res instanceof Array || ( res.length > 0 && !( res[0] instanceof Array ) ) )
this.debug(ord[i], "didn't return a proper array");
//D:this.debug( "" );
return res;
}
}
// Uhoh! no match! Should we throw an error?
return [];
}
Markdown.prototype.processInline = function processInline( block ) {
return this.dialect.inline.__call__.call( this, String( block ) );
}
/**
* Markdown#toTree( source ) -> JsonML
* - source (String): markdown source to parse
*
* Parse `source` into a JsonML tree representing the markdown document.
**/
// custom_tree means set this.tree to `custom_tree` and restore old value on return
Markdown.prototype.toTree = function toTree( source, custom_root ) {
var blocks = source instanceof Array
? source
: this.split_blocks( source );
// Make tree a member variable so its easier to mess with in extensions
var old_tree = this.tree;
try {
this.tree = custom_root || this.tree || [ "markdown" ];
blocks:
while ( blocks.length ) {
var b = this.processBlock( blocks.shift(), blocks );
// Reference blocks and the like won't return any content
if ( !b.length ) continue blocks;
this.tree.push.apply( this.tree, b );
}
return this.tree;
}
finally {
if ( custom_root )
this.tree = old_tree;
}
}
// Noop by default
Markdown.prototype.debug = function () {
var args = Array.prototype.slice.call( arguments);
args.unshift(this.debug_indent);
print.apply( print, args );
}
Markdown.prototype.loop_re_over_block = function( re, block, cb ) {
// Dont use /g regexps with this
var m,
b = block.valueOf();
while ( b.length && (m = re.exec(b) ) != null) {
b = b.substr( m[0].length );
cb.call(this, m);
}
return b;
}
/**
* Markdown.dialects
*
* Namespace of built-in dialects.
**/
Markdown.dialects = {};
/**
* Markdown.dialects.Gruber
*
* The default dialect that follows the rules set out by John Gruber's
* markdown.pl as closely as possible. Well actually we follow the behaviour of
* that script which in some places is not exactly what the syntax web page
* says.
**/
Markdown.dialects.Gruber = {
block: {
atxHeader: function atxHeader( block, next ) {
var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ );
if ( !m ) return undefined;
var header = [ "header", { level: m[ 1 ].length }, m[ 2 ] ];
if ( m[0].length < block.length )
next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
return [ header ];
},
setextHeader: function setextHeader( block, next ) {
var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ );
if ( !m ) return undefined;
var level = ( m[ 2 ] === "=" ) ? 1 : 2;
var header = [ "header", { level : level }, m[ 1 ] ];
if ( m[0].length < block.length )
next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
return [ header ];
},
code: function code( block, next ) {
// | Foo
// |bar
// should be a code block followed by a paragraph. Fun
//
// There might also be adjacent code block to merge.
var ret = [],
re = /^(?: {0,3}\t| {4})(.*)\n?/,
lines;
// 4 spaces + content
var m = block.match( re );
if ( !m ) return undefined;
block_search:
do {
// Now pull out the rest of the lines
var b = this.loop_re_over_block(
re, block.valueOf(), function( m ) { ret.push( m[1] ) } );
if (b.length) {
// Case alluded to in first comment. push it back on as a new block
next.unshift( mk_block(b, block.trailing) );
break block_search;
}
else if (next.length) {
// Check the next block - it might be code too
var m = next[0].match( re );
if ( !m ) break block_search;
// Pull how how many blanks lines follow - minus two to account for .join
ret.push ( block.trailing.replace(/[^\n]/g, '').substring(2) );
block = next.shift();
}
else
break block_search;
} while (true);
return [ [ "code_block", ret.join("\n") ] ];
},
horizRule: function horizRule( block, next ) {
// this needs to find any hr in the block to handle abutting blocks
var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ );
if ( !m ) {
return undefined;
}
var jsonml = [ [ "hr" ] ];
// if there's a leading abutting block, process it
if ( m[ 1 ] ) {
jsonml.unshift.apply( jsonml, this.processBlock( m[ 1 ], [] ) );
}
// if there's a trailing abutting block, stick it into next
if ( m[ 3 ] ) {
next.unshift( mk_block( m[ 3 ] ) );
}
return jsonml;
},
// There are two types of lists. Tight and loose. Tight lists have no whitespace
// between the items (and result in text just in the <li>) and loose lists,
// which have an empty line between list items, resulting in (one or more)
// paragraphs inside the <li>.
//
// There are all sorts weird edge cases about the original markdown.pl's
// handling of lists:
//
// * Nested lists are supposed to be indented by four chars per level. But
// if they aren't, you can get a nested list by indenting by less than
// four so long as the indent doesn't match an indent of an existing list
// item in the 'nest stack'.
//
// * The type of the list (bullet or number) is controlled just by the
// first item at the indent. Subsequent changes are ignored unless they
// are for nested lists
//
lists: (function( ) {
// Use a closure to hide a few variables.
var any_list = "[*+-]|\\d\\.",
bullet_list = /[*+-]/,
number_list = /\d+\./,
// Capture leading indent as it matters for determining nested lists.
is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ),
indent_re = "(?: {0,3}\\t| {4})";
// TODO: Cache this regexp for certain depths.
// Create a regexp suitable for matching an li for a given stack depth
function regex_for_depth( depth ) {
return new RegExp(
// m[1] = indent, m[2] = list_type
"(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" +
// m[3] = cont
"(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})"
);
}
function expand_tab( input ) {
return input.replace( / {0,3}\t/g, " " );
}
// Add inline content `inline` to `li`. inline comes from processInline
// so is an array of content
function add(li, loose, inline, nl) {
if (loose) {
li.push( [ "para" ].concat(inline) );
return;
}
// Hmmm, should this be any block level element or just paras?
var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] == "para"
? li[li.length -1]
: li;
// If there is already some content in this list, add the new line in
if (nl && li.length > 1) inline.unshift(nl);
for (var i=0; i < inline.length; i++) {
var what = inline[i],
is_str = typeof what == "string";
if (is_str && add_to.length > 1 && typeof add_to[add_to.length-1] == "string" )
{
add_to[ add_to.length-1 ] += what;
}
else {
add_to.push( what );
}
}
}
// contained means have an indent greater than the current one. On
// *every* line in the block
function get_contained_blocks( depth, blocks ) {
var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ),
replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"),
ret = [];
while ( blocks.length > 0 ) {
if ( re.exec( blocks[0] ) ) {
var b = blocks.shift(),
// Now remove that indent
x = b.replace( replace, "");
ret.push( mk_block( x, b.trailing, b.lineNumber ) );
}
break;
}
return ret;
}
// passed to stack.forEach to turn list items up the stack into paras
function paragraphify(s, i, stack) {
var list = s.list;
var last_li = list[list.length-1];
if (last_li[1] instanceof Array && last_li[1][0] == "para") {
return;
}
if (i+1 == stack.length) {
// Last stack frame
// Keep the same array, but replace the contents
last_li.push( ["para"].concat( last_li.splice(1) ) );
}
else {
var sublist = last_li.pop();
last_li.push( ["para"].concat( last_li.splice(1) ), sublist );
}
}
// The matcher function
return function( block, next ) {
var m = block.match( is_list_re );
if ( !m ) return undefined;
function make_list( m ) {
var list = bullet_list.exec( m[2] )
? ["bulletlist"]
: ["numberlist"];
stack.push( { list: list, indent: m[1] } );
return list;
}
var stack = [], // Stack of lists for nesting.
list = make_list( m ),
last_li,
loose = false,
ret = [ stack[0].list ];
// Loop to search over block looking for inner block elements and loose lists
loose_search:
while( true ) {
// Split into lines preserving new lines at end of line
var lines = block.split( /(?=\n)/ );
// We have to grab all lines for a li and call processInline on them
// once as there are some inline things that can span lines.
var li_accumulate = "";
// Loop over the lines in this block looking for tight lists.
tight_search:
for (var line_no=0; line_no < lines.length; line_no++) {
var nl = "",
l = lines[line_no].replace(/^\n/, function(n) { nl = n; return "" });
// TODO: really should cache this
var line_re = regex_for_depth( stack.length );
m = l.match( line_re );
//print( "line:", uneval(l), "\nline match:", uneval(m) );
// We have a list item
if ( m[1] !== undefined ) {
// Process the previous list item, if any
if ( li_accumulate.length ) {
add( last_li, loose, this.processInline( li_accumulate ), nl );
// Loose mode will have been dealt with. Reset it
loose = false;
li_accumulate = "";
}
m[1] = expand_tab( m[1] );
var wanted_depth = Math.floor(m[1].length/4)+1;
//print( "want:", wanted_depth, "stack:", stack.length);
if ( wanted_depth > stack.length ) {
// Deep enough for a nested list outright
//print ( "new nested list" );
list = make_list( m );
last_li.push( list );
last_li = list[1] = [ "listitem" ];
}
else {
// We aren't deep enough to be strictly a new level. This is
// where Md.pl goes nuts. If the indent matches a level in the
// stack, put it there, else put it one deeper then the
// wanted_depth deserves.
var found = stack.some(function(s, i) {
if ( s.indent != m[1] ) return false;
list = s.list; // Found the level we want
stack.splice(i+1); // Remove the others
//print("found");
return true; // And stop looping
});
if (!found) {
//print("not found. l:", uneval(l));
wanted_depth++;
if (wanted_depth <= stack.length) {
stack.splice(wanted_depth);
//print("Desired depth now", wanted_depth, "stack:", stack.length);
list = stack[wanted_depth-1].list;
//print("list:", uneval(list) );
}
else {
//print ("made new stack for messy indent");
list = make_list(m);
last_li.push(list);
}
}
//print( uneval(list), "last", list === stack[stack.length-1].list );
last_li = [ "listitem" ];
list.push(last_li);
} // end depth of shenegains
nl = "";
}
// Add content
if (l.length > m[0].length) {
li_accumulate += nl + l.substr( m[0].length );
}
} // tight_search
if ( li_accumulate.length ) {
add( last_li, loose, this.processInline( li_accumulate ), nl );
// Loose mode will have been dealt with. Reset it
loose = false;
li_accumulate = "";
}
// Look at the next block - we might have a loose list. Or an extra
// paragraph for the current li
var contained = get_contained_blocks( stack.length, next );
// Deal with code blocks or properly nested lists
if (contained.length > 0) {
// Make sure all listitems up the stack are paragraphs
stack.forEach( paragraphify, this );
last_li.push.apply( last_li, this.toTree( contained, [] ) );
}
var next_block = next[0] && next[0].valueOf() || "";
if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) {
block = next.shift();
// Check for an HR following a list: features/lists/hr_abutting
var hr = this.dialect.block.horizRule( block, next );
if (hr) {
ret.push.apply(ret, hr);
break;
}
// Make sure all listitems up the stack are paragraphs
stack.forEach( paragraphify , this );
loose = true;
continue loose_search;
}
break;
} // loose_search
return ret;
}
})(),
blockquote: function blockquote( block, next ) {
if ( !block.match( /^>/m ) )
return undefined;
var jsonml = [];
// separate out the leading abutting block, if any
if ( block[ 0 ] != ">" ) {
var lines = block.split( /\n/ ),
prev = [];
// keep shifting lines until you find a crotchet
while ( lines.length && lines[ 0 ][ 0 ] != ">" ) {
prev.push( lines.shift() );
}
// reassemble!
block = lines.join( "\n" );
jsonml.push.apply( jsonml, this.processBlock( prev.join( "\n" ), [] ) );
}
// if the next block is also a blockquote merge it in
while ( next.length && next[ 0 ][ 0 ] == ">" ) {
var b = next.shift();
block += block.trailing + b;
block.trailing = b.trailing;
}
// Strip off the leading "> " and re-process as a block.
var input = block.replace( /^> ?/gm, '' ),
old_tree = this.tree;
jsonml.push( this.toTree( input, [ "blockquote" ] ) );
return jsonml;
},
referenceDefn: function referenceDefn( block, next) {
var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/;
// interesting matches are [ , ref_id, url, , title, title ]
if ( !block.match(re) )
return undefined;
// make an attribute node if it doesn't exist
if ( !extract_attr( this.tree ) ) {
this.tree.splice( 1, 0, {} );
}
var attrs = extract_attr( this.tree );
// make a references hash if it doesn't exist
if ( attrs.references === undefined ) {
attrs.references = {};
}
var b = this.loop_re_over_block(re, block, function( m ) {
if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' )
m[2] = m[2].substring( 1, m[2].length - 1 );
var ref = attrs.references[ m[1].toLowerCase() ] = {
href: m[2]
};
if (m[4] !== undefined)
ref.title = m[4];
else if (m[5] !== undefined)
ref.title = m[5];
} );
if (b.length)
next.unshift( mk_block( b, block.trailing ) );
return [];
},
para: function para( block, next ) {
// everything's a para!
return [ ["para"].concat( this.processInline( block ) ) ];
}
}
}
Markdown.dialects.Gruber.inline = {
__call__: function inline( text, patterns ) {
// Hmmm - should this function be directly in Md#processInline, or
// conversely, should Md#processBlock be moved into block.__call__ too
var out = [ ],
m,
// Look for the next occurange of a special character/pattern
re = new RegExp( "([\\s\\S]*?)(" + (patterns.source || patterns) + ")", "g" ),
lastIndex = 0;
//D:var self = this;
//D:self.debug("processInline:", uneval(text) );
function add(x) {
//D:self.debug(" adding output", uneval(x));
if (typeof x == "string" && typeof out[out.length-1] == "string")
out[ out.length-1 ] += x;
else
out.push(x);
}
while ( ( m = re.exec(text) ) != null) {
if ( m[1] ) add( m[1] ); // Some un-interesting text matched
else m[1] = { length: 0 }; // Or there was none, but make m[1].length == 0
var res;
if ( m[2] in this.dialect.inline ) {
res = this.dialect.inline[ m[2] ].call(
this,
text.substr( m.index + m[1].length ), m, out );
}
// Default for now to make dev easier. just slurp special and output it.
res = res || [ m[2].length, m[2] ];
var len = res.shift();
// Update how much input was consumed
re.lastIndex += ( len - m[2].length );
// Add children
res.forEach(add);
lastIndex = re.lastIndex;
}
// Add last 'boring' chunk
if ( text.length > lastIndex )
add( text.substr( lastIndex ) );
return out;
},
"\\": function escaped( text ) {
// [ length of input processed, node/children to add... ]
// Only esacape: \ ` * _ { } [ ] ( ) # * + - . !
if ( text.match( /^\\[\\`\*_{}\[\]()#\+.!\-]/ ) )
return [ 2, text[1] ];
else
// Not an esacpe
return [ 1, "\\" ];
},
"![": function image( text ) {
// ![Alt text](/path/to/img.jpg "Optional title")
// 1 2 3 4 <--- captures
var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*(\S*)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ );
if ( m ) {
if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' )
m[2] = m[2].substring( 1, m[2].length - 1 );
m[2] == this.dialect.inline.__call__.call( this, m[2], /\\/ )[0];
var attrs = { alt: m[1], href: m[2] || "" };
if ( m[4] !== undefined)
attrs.title = m[4];
return [ m[0].length, [ "img", attrs ] ];
}
// ![Alt text][id]
m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ );
if ( m ) {
// We can't check if the reference is known here as it likely wont be
// found till after. Check it in md tree->hmtl tree conversion
return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), text: m[0] } ] ];
}
// Just consume the '!['
return [ 2, "![" ];
},
"[": function link( text ) {
// [link text](/path/to/img.jpg "Optional title")
// 1 2 3 4 <--- captures
var m = text.match( /^\[([\s\S]*?)\][ \t]*\([ \t]*(\S+)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ );
if ( m ) {
if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' )
m[2] = m[2].substring( 1, m[2].length - 1 );
// Process escapes only
m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0];
var attrs = { href: m[2] || "" };
if ( m[4] !== undefined)
attrs.title = m[4];
return [ m[0].length, [ "link", attrs, m[1] ] ];
}
// [Alt text][id]
// [Alt text] [id]
// [id]
m = text.match( /^\[([\s\S]*?)\](?: ?\[(.*?)\])?/ );
if ( m ) {
// [id] case, text == id
if ( m[2] === undefined || m[2] === "" ) m[2] = m[1];
// We can't check if the reference is known here as it likely wont be
// found till after. Check it in md tree->hmtl tree conversion.
// Store the original so that conversion can revert if the ref isn't found.
return [
m[ 0 ].length,
[
"link_ref",
{
ref: m[ 2 ].toLowerCase(),
original: m[ 0 ]
},
m[ 1 ]
]
];
}
// Just consume the '['
return [ 1, "[" ];
},
"<": function autoLink( text ) {
var m;
if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) != null ) {
if ( m[3] ) {
return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ];
}
else if ( m[2] == "mailto" ) {
return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ];
}
else
return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ];
}
return [ 1, "<" ];
},
"`": function inlineCode( text ) {
// Inline code block. as many backticks as you like to start it
// Always skip over the opening ticks.
var m = text.match( /(`+)(([\s\S]*?)\1)/ );
if ( m && m[2] )
return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ];
else {
// TODO: No matching end code found - warn!
return [ 1, "`" ];
}
},
" \n": function lineBreak( text ) {
return [ 3, [ "linebreak" ] ];
}
}
// Meta Helper/generator method for em and strong handling
function strong_em( tag, md ) {
var state_slot = tag + "_state",
other_slot = tag == "strong" ? "em_state" : "strong_state";
function CloseTag(len) {
this.len_after = len;
this.name = "close_" + md;
}
return function ( text, orig_match ) {
if (this[state_slot][0] == md) {
// Most recent em is of this type
//D:this.debug("closing", md);
this[state_slot].shift();
// "Consume" everything to go back to the recrusion in the else-block below
return[ text.length, new CloseTag(text.length-md.length) ];
}
else {
// Store a clone of the em/strong states
var other = this[other_slot].slice(),
state = this[state_slot].slice();
this[state_slot].unshift(md);
//D:this.debug_indent += " ";
// Recurse
var res = this.processInline( text.substr( md.length ) );
//D:this.debug_indent = this.debug_indent.substr(2);
var last = res[res.length - 1];
//D:this.debug("processInline from", tag + ": ", uneval( res ) );
var check = this[state_slot].shift();
if (last instanceof CloseTag) {
res.pop();
// We matched! Huzzah.
var consumed = text.length - last.len_after;
return [ consumed, [ tag ].concat(res) ];
}
else {
// Restore the state of the other kind. We might have mistakenly closed it.
this[other_slot] = other;
this[state_slot] = state;
// We can't reuse the processed result as it could have wrong parsing contexts in it.
return [ md.length, md ];
}
}
} // End returned function
}
Markdown.dialects.Gruber.inline["**"] = strong_em("strong", "**");
Markdown.dialects.Gruber.inline["__"] = strong_em("strong", "__");
Markdown.dialects.Gruber.inline["*"] = strong_em("em", "*");
Markdown.dialects.Gruber.inline["_"] = strong_em("em", "_");
// Build default order from insertion order.
Markdown.buildBlockOrder = function(d) {
var ord = [];
for ( var i in d ) {
if ( i == "__order__" || i == "__call__" ) continue;
ord.push( i );
}
d.__order__ = ord;
}
// Build patterns for inline matcher
Markdown.buildInlinePatterns = function(d) {
var patterns = [];
for ( var i in d ) {
if (i == "__call__") continue;
var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" )
.replace( /\n/, "\\n" );
patterns.push( i.length == 1 ? l : "(?:" + l + ")" );
}
patterns = patterns.join("|");
//print("patterns:", uneval( patterns ) );
var fn = d.__call__;
d.__call__ = function(text, pattern) {
if (pattern != undefined)
return fn.call(this, text, pattern);
else
return fn.call(this, text, patterns);
}
}
// Helper function to make sub-classing a dialect easier
Markdown.subclassDialect = function( d ) {
function Block() {};
Block.prototype = d.block;
function Inline() {};
Inline.prototype = d.inline;
return { block: new Block(), inline: new Inline() };
}
Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block );
Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline );
Markdown.dialects.Maruku = Markdown.subclassDialect( Markdown.dialects.Gruber );
Markdown.dialects.Maruku.block.document_meta = function document_meta( block, next ) {
// we're only interested in the first block
if ( block.lineNumber > 1 ) return undefined;
// document_meta blocks consist of one or more lines of `Key: Value\n`
if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) ) return undefined;
// make an attribute node if it doesn't exist
if ( !extract_attr( this.tree ) ) {
this.tree.splice( 1, 0, {} );
}
var pairs = block.split( /\n/ );
for ( p in pairs ) {
var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ),
key = m[ 1 ].toLowerCase(),
value = m[ 2 ];
this.tree[ 1 ][ key ] = value;
}
// document_meta produces no content!
return [];
}
Markdown.dialects.Maruku.block.block_meta = function block_meta( block, next ) {
// check if the last line of the block is an meta hash
var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ );
if ( !m ) return undefined;
// process the meta hash
var attr = process_meta_hash( m[ 2 ] );
// if we matched ^ then we need to apply meta to the previous block
if ( m[ 1 ] === "" ) {
var node = this.tree[ this.tree.length - 1 ],
hash = extract_attr( node );
// if the node is a string (rather than JsonML), bail
if ( typeof node === "string" ) return undefined;
// create the attribute hash if it doesn't exist
if ( !hash ) {
hash = {};
node.splice( 1, 0, hash );
}
// add the attributes in
for ( a in attr ) {
hash[ a ] = attr[ a ];
}
// return nothing so the meta hash is removed
return [];
}
// pull the meta hash off the block and process what's left
var b = block.replace( /\n.*$/, "" ),
result = this.processBlock( b, [] );
// get or make the attributes hash
var hash = extract_attr( result[ 0 ] );
if ( !hash ) {
hash = {};
result[ 0 ].splice( 1, 0, hash );
}
// attach the attributes to the block
for ( a in attr ) {
hash[ a ] = attr[ a ];
}
return result;
}
Markdown.dialects.Maruku.block.definition_list = function definition_list( block, next ) {
// one or more terms followed by one or more definitions, in a single block
var tight = /^((?:[^\s:].*\n)+):\s+([^]+)$/,
list = [ "dl" ];
// see if we're dealing with a tight or loose block
if ( ( m = block.match( tight ) ) ) {
// pull subsequent tight DL blocks out of `next`
var blocks = [ block ];
while ( next.length && tight.exec( next[ 0 ] ) ) {
blocks.push( next.shift() );
}
for ( var b = 0; b < blocks.length; ++b ) {
var m = blocks[ b ].match( tight ),
terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ),
defns = m[ 2 ].split( /\n:\s+/ );
// print( uneval( m ) );
for ( var i = 0; i < terms.length; ++i ) {
list.push( [ "dt", terms[ i ] ] );
}
for ( var i = 0; i < defns.length; ++i ) {
// run inline processing over the definition
list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) );
}
}
}
else {
return undefined;
}
return [ list ];
}
Markdown.dialects.Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) {
if ( !out.length ) {
return [ 2, "{:" ];
}
// get the preceeding element
var before = out[ out.length - 1 ];
if ( typeof before === "string" ) {
return [ 2, "{:" ];
}
// match a meta hash
var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ );
// no match, false alarm
if ( !m ) {
return [ 2, "{:" ];
}
// attach the attributes to the preceeding element
var meta = process_meta_hash( m[ 1 ] ),
attr = extract_attr( before );
if ( !attr ) {
attr = {};
before.splice( 1, 0, attr );
}
for ( var k in meta ) {
attr[ k ] = meta[ k ];
}
// cut out the string and replace it with nothing
return [ m[ 0 ].length, "" ];
}
Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block );
Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline );
function extract_attr( jsonml ) {
return jsonml instanceof Array
&& jsonml.length > 1
&& typeof jsonml[ 1 ] === "object"
&& !( jsonml[ 1 ] instanceof Array )
? jsonml[ 1 ]
: undefined;
}
function process_meta_hash( meta_string ) {
var meta = split_meta_hash( meta_string ),
attr = {};
for ( var i = 0; i < meta.length; ++i ) {
// id: #foo
if ( /^#/.test( meta[ i ] ) ) {
attr.id = meta[ i ].substring( 1 );
}
// class: .foo
else if ( /^\./.test( meta[ i ] ) ) {
// if class already exists, append the new one
if ( attr['class'] ) {
attr['class'] = attr['class'] + meta[ i ].replace( /./, " " );
}
else {
attr['class'] = meta[ i ].substring( 1 );
}
}
// attribute: foo=bar
else if ( /=/.test( meta[ i ] ) ) {
var s = meta[ i ].split( /=/ );
attr[ s[ 0 ] ] = s[ 1 ];
}
}
return attr;
}
function split_meta_hash( meta_string ) {
var meta = meta_string.split( "" ),
parts = [ "" ],
in_quotes = false;
while ( meta.length ) {
var letter = meta.shift();
switch ( letter ) {
case " " :
// if we're in a quoted section, keep it
if ( in_quotes ) {
parts[ parts.length - 1 ] += letter;
}
// otherwise make a new part
else {
parts.push( "" );
}
break;
case "'" :
case '"' :
// reverse the quotes and move straight on
in_quotes = !in_quotes;
break;
case "\\" :
// shift off the next letter to be used straight away.
// it was escaped so we'll keep it whatever it is
letter = meta.shift();
default :
parts[ parts.length - 1 ] += letter;
break;
}
}
return parts;
}
/**
* renderJsonML( jsonml[, options] ) -> String
* - jsonml (Array): JsonML array to render to XML
* - options (Object): options
*
* Converts the given JsonML into well-formed XML.
*
* The options currently understood are:
*
* - root (Boolean): wether or not the root node should be included in the
* output, or just its children. The default `false` is to not include the
* root itself.
*/
expose.renderJsonML = function( jsonml, options ) {
options = options || {};
// include the root element in the rendered output?
options.root = options.root || false;
options.xhtml = options.xhtml || false;
var content = [];
if ( options.root ) {
content.push( render_tree( jsonml, options.xhtml ) );
}
else {
jsonml.shift(); // get rid of the tag
if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) {
jsonml.shift(); // get rid of the attributes
}
while ( jsonml.length ) {
content.push( render_tree( jsonml.shift(), options.xhtml ) );
}
}
return content.join( "\n\n" );
}
function render_tree( jsonml, xhtml ) {
// basic case
if ( typeof jsonml === "string" ) {
return jsonml.replace( /&/g, "&amp;" )
.replace( /</g, "&lt;" )
.replace( />/g, "&gt;" );
}
var tag = jsonml.shift(),
attributes = {},
content = [];
if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) {
attributes = jsonml.shift();
}
while ( jsonml.length ) {
content.push( arguments.callee( jsonml.shift(), xhtml ) );
}
var tag_attrs = "";
for ( var a in attributes ) {
tag_attrs += " " + a + '="' + attributes[ a ] + '"';
}
// if xhtml, self-close empty tags
// be careful about adding whitespace here for inline elements
var markup = "<"+ tag + tag_attrs;
var contentstr = content.join( "" );
if ( xhtml && contentstr.length == 0 ) {
markup += " />";
}
else {
markup += ">" + contentstr + "</" + tag + ">";
}
return markup;
}
function convert_tree_to_html( tree, references ) {
// shallow clone
var jsonml = tree.slice( 0 );
// Clone attributes if the exist
var attrs = extract_attr( jsonml );
if ( attrs ) {
jsonml[ 1 ] = {};
for ( var i in attrs ) {
jsonml[ 1 ][ i ] = attrs[ i ];
}
attrs = jsonml[ 1 ];
}
// basic case
if ( typeof jsonml === "string" ) {
return jsonml;
}
// convert this node
switch ( jsonml[ 0 ] ) {
case "header":
jsonml[ 0 ] = "h" + jsonml[ 1 ].level;
delete jsonml[ 1 ].level;
break;
case "bulletlist":
jsonml[ 0 ] = "ul";
break;
case "numberlist":
jsonml[ 0 ] = "ol";
break;
case "listitem":
jsonml[ 0 ] = "li";
break;
case "para":
jsonml[ 0 ] = "p";
break;
case "markdown":
jsonml[ 0 ] = "html";
if ( attrs ) delete attrs.references;
break;
case "code_block":
jsonml[ 0 ] = "pre";
var i = attrs ? 2 : 1;
var code = [ "code" ];
code.push.apply( code, jsonml.splice( i ) );
jsonml[ i ] = code;
break;
case "inlinecode":
jsonml[ 0 ] = "code";
break;
case "img":
jsonml[ 1 ].src = jsonml[ 1 ].href;
delete jsonml[ 1 ].href;
break;
case "linebreak":
jsonml[0] = "br";
break;
case "link":
jsonml[ 0 ] = "a";
break;
case "link_ref":
jsonml[ 0 ] = "a";
// grab this ref and clean up the attribute node
var ref = references[ attrs.ref ];
// if the reference exists, make the link
if ( ref ) {
delete attrs.ref;
// add in the href and title, if present
attrs.href = ref.href;
if ( ref.title ) {
attrs.title = ref.title;
}
// get rid of the unneeded original text
delete attrs.original;
}
// the reference doesn't exist, so revert to plain text
else {
return attrs.original;
}
break;
}
// convert all the children
var i = 1;
// deal with the attribute node, if it exists
if ( attrs ) {
// if there are keys, skip over it
for ( var key in jsonml[ 1 ] ) {
i = 2;
}
// if there aren't, remove it
if ( i === 1 ) {
jsonml.splice( i, 1 );
}
}
for ( ; i < jsonml.length; ++i ) {
jsonml[ i ] = arguments.callee( jsonml[ i ], references );
}
return jsonml;
}
// merges adjacent text nodes into a single node
function merge_text_nodes( jsonml ) {
// skip the tag name and attribute hash
var i = extract_attr( jsonml ) ? 2 : 1;
while ( i < jsonml.length ) {
// if it's a string check the next item too
if ( typeof jsonml[ i ] === "string" ) {
if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) {
// merge the second string into the first and remove it
jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ];
}
else {
++i;
}
}
// if it's not a string recurse
else {
arguments.callee( jsonml[ i ] );
++i;
}
}
}
})(exports);