From f7cf24ff19860831c34965d388ebde7154f284b1 Mon Sep 17 00:00:00 2001 From: Rene' Jeschke Date: Mon, 18 Apr 2011 18:30:52 +0200 Subject: [PATCH] Added abbreviations and text anchors, updated README and JavaDoc. --- README.md | 267 ++++++++++++++++--------- src/java/txtmark/Block.java | 4 +- src/java/txtmark/Decorator.java | 29 ++- src/java/txtmark/DefaultDecorator.java | 17 +- src/java/txtmark/Emitter.java | 75 ++++++- src/java/txtmark/Line.java | 87 ++++++++ src/java/txtmark/LinkRef.java | 5 +- src/java/txtmark/MarkToken.java | 22 +- src/java/txtmark/Processor.java | 18 +- src/java/txtmark/Run.java | 2 +- src/java/txtmark/Utils.java | 2 + 11 files changed, 404 insertions(+), 124 deletions(-) diff --git a/README.md b/README.md index 603383f..e08fc13 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ See LICENSE.txt for licensing information. *** -Txtmark is yet another markdown processor for the JVM. +### Txtmark is yet another markdown processor for the JVM. * It is easy to use: @@ -16,12 +16,14 @@ Txtmark is yet another markdown processor for the JVM. * It does not depend on other libraries, so classpathing `txtmark.jar` is sufficient to use Txtmark in your project. -For an in-depth explanation of the markdown syntax have a look at [daringfireball.net](http://daringfireball.net/projects/markdown/syntax). +For an in-depth explanation of markdown have a look at the original [Markdown Syntax]. + +*** ### Build instructions -1. Clone the repo or download the sources as [tar] or [zip] -2. Install [Apache Ant(TM)](http://ant.apache.org/) +1. Clone the [repo] or download the sources as [tar] or [zip] +2. Install [Apache Ant(TM)] 3. Do ant release @@ -29,10 +31,148 @@ For an in-depth explanation of the markdown syntax have a look at [daringfirebal and you will find everything you need inside the `release` folder. -### Where Txtmark is not like Markdown +*** + +### Txtmark extensions + +To enable Txtmark's extended markdown parsing you can use the $PROFILE$ mechanism: + + [$PROFILE$]: extended + +This seemed to me as the easiest and safest way to enable different behaviours. +Just put this line into your Txtmark file like you would use reference links. + +#### Behavior changes when using `[$PROFILE$]: extended` + +* ##### Lists and code blocks end a paragraph + + In normal markdown the following: + + This is a paragraph + * and this is not a list + + Will produce: + +

This is a paragraph + * and this is not a list

+ + When using Txtmark extensions this changes to: + +

This is a paragraph

+ + +* ##### Text anchors + + Headlines and list items may recieve an ID which + you can refer to using links. + + ## Headline with ID ## {#headid} + + Another headline with ID {#headid2} + ------------------------ + + * List with ID {#listid} + + Links: [Foo] (#headid) + + this will produce: + +

Headline with ID

+

Another headline with ID

+ +

Links: Foo

+ + The ID _must_ be the last thing on the first line. + + All spaces before `{#` get removed, so you can't + use an ID and a manual line break in the same line. + +* ##### Auto HTML entities + + * `(C)` becomes `©` - © + * `(R)` becomes `®` - ® + * `(TM)` becomes `™` - ™ + * `--` becomes `–` - – + * `---` becomes `—` - — + * `...` becomes `…` - … + * `<<` becomes `«` - « + * `>>` becomes `»` - » + * `"Hello"` becomes `“Hello”` - “Hello” + +* ##### Underscores (Emphasis) + + Underscores in the middle of a word don't result in emphasis. + + Con_cat_this + + normally produces this: + + Concatthis + +* ##### Superscript + + You can use `^` to mark a span as superscript. + + 2^2^ = 4 + + turns into + + 22 = 4 + +* ##### Abbreviations + + Abbreviations are defined like reference links, but using a `*` + instead of a link and must be single-line only. + + [Git]: * "Fast distributed revision control system" + + and used like this + + This is [Git]! + + which will produce + + This is Git! + +*** + +### Markdown conformity + +Txtmark passes all tests inside [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/downloads/MarkdownTest_1.0_2007-05-09.tgz) +except of two: + +1. **Images.text** + + Fails because Txtmark doesn't produce empty 'title' image attributes. + (IMHO: Images ... OK) + +2. **Literal quotes in titles.text** + + What the frell ... this test will continue to FAIL. + Sorry, but using unescaped `"` in a title which should be surrounded + by `"` is unacceptable for me ;) + + Change: + + Foo [bar](/url/ "Title with "quotes" inside"). + [bar]: /url/ "Title with "quotes" inside" + + to: + + Foo [bar](/url/ "Title with \"quotes\" inside"). + [bar]: /url/ "Title with \"quotes\" inside" + + and Txtmark will produce the correct result. + (IMHO: Literal quotes in titles ... OK) *** +### Where Txtmark is not like Markdown + * Txtmark does not produce empty `title` attributes in link and image tags. * Unescaped `"` in link titles starting with `"` are not recognized and result @@ -77,86 +217,17 @@ For an in-depth explanation of the markdown syntax have a look at [daringfirebal -### Txtmark extensions +* List of escapeable characters: + + \ [ ] ( ) { } # + " ' . < > + - _ + ! ` ^ + *** -To enable Txtmark's extended markdown parsing you can use the $PROFILE$ mechanism: - - [$PROFILE$]: extended - -This seemed to me as the easiest and safest way to enable different behaviours. -(All other markdown processors will ignore this line.) - -#### Behavior changes when using `[$PROFILE$]: extended` - -* Lists and code blocks end a paragraph (inspired by [Actuarius]) - - In normal markdown the following: - - This is a paragraph - * and this is not a list - - will produce: - -

This is a paragraph - * and this is not a list

- - When using Txtmark extensions this changes to: - -

This is a paragraph

- - -* Auto HTML entities (inspired by [SmartyPants]): - - * `(C)` becomes `©` - © - * `(R)` becomes `®` - ® - * `(TM)` becomes `™` - ™ - * `--` becomes `–` - – - * `---` becomes `—` - — - * `...` becomes `…` - … - * `<<` becomes `«` - « - * `>>` becomes `»` - » - * `"Hello"` becomes `“Hello”` - “Hello” - -### Markdown conformity - -*** - -Txtmark passes all tests inside [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/downloads/MarkdownTest_1.0_2007-05-09.tgz) -except of two: - -1. **Images.text** - - Fails because Txtmark doesn't produce empty 'title' image attributes. - (IMHO: Images ... OK) - -2. **Literal quotes in titles.text** - - What the frell ... this test will continue to FAIL. - Sorry, but using unescaped `"` in a title which should be surrounded - by `"` is unacceptable for me ;) - - Change: - - Foo [bar](/url/ "Title with "quotes" inside"). - [bar]: /url/ "Title with "quotes" inside" - - to: - - Foo [bar](/url/ "Title with \"quotes\" inside"). - [bar]: /url/ "Title with \"quotes\" inside" - - and Txtmark will produce the correct result. - (IMHO: Literal quotes in titles ... OK) - - ### Performance comparison of markdown processors for the JVM ---- - Based on [this benchmark suite](http://henkelmann.eu/2011/01/10/performance_comparison_of_markdown_processor_for_the_jvm). Excerpt from the original post concerning this benchmark suite: @@ -196,29 +267,45 @@ Benchmark system: All tests together32812885518451961013010460206196 -Benchmarked versions: +##### Benchmarked versions: [Actuarius] version: 0.2 [PegDown] version: 0.8.5.4 [Knockoff] version: 0.7.3-15 ---- +*** -Mentioned/related projects: -[Markdown] is Copyright (C) 2004 by John Gruber -[SmartyPants] is Copyright (C) 2003 John Gruber -[Actuarius] is Copyright (C) 2010 by Christoph Henkelmann -[Knockoff] is Copyright (C) 2009-2011 by Tristan Juricek -[PegDown] is Copyright (C) 2010 by Mathias Doenitz +### TODO + +* Inline HTML control (configurable escaping of unallowed HTML tags) +* Code clean-ups +* Binary download *** +### Mentioned/related projects + +[Markdown] is Copyright (C) 2004 by John Gruber +[SmartyPants] is Copyright (C) 2003 by John Gruber +[Actuarius] is Copyright (C) 2010 by Christoph Henkelmann +[Knockoff] is Copyright (C) 2009-2011 by Tristan Juricek +[PegDown] is Copyright (C) 2010 by Mathias Doenitz +[PHP Markdown & Extra] is Copyright (C) 2009 Michel Fortin + +*** + +[Markdown Syntax]: http://daringfireball.net/projects/markdown/syntax/ "daringfireball.net" [Markdown]: http://daringfireball.net/projects/markdown/ +[SmartyPants]: http://daringfireball.net/projects/smartypants/ [Actuarius]: http://henkelmann.eu/projects/actuarius/ [Knockoff]: http://tristanhunt.com/projects/knockoff/ -[PegDown]: https://github.com/sirthias/pegdown -[SmartyPants]: http://daringfireball.net/projects/smartypants/ +[PegDown]: https://github.com/sirthias/pegdown/ +[PHP Markdown & Extra]: http://michelf.com/projects/php-markdown/ +[Apache Ant(TM)]: http://ant.apache.org/ + +[repo]: https://github.com/rjeschke/txtmark/ "Txtmark at GitHub.com" [tar]: https://github.com/rjeschke/txtmark/tarball/master "branch: master" [zip]: https://github.com/rjeschke/txtmark/zipball/master "branch: master" + [$PROFILE$]: extended "Txtmark processing information." Project link: diff --git a/src/java/txtmark/Block.java b/src/java/txtmark/Block.java index b9550d2..34addc3 100644 --- a/src/java/txtmark/Block.java +++ b/src/java/txtmark/Block.java @@ -21,7 +21,9 @@ class Block public Block next = null; /** Depth of headline BlockType. */ public int hlDepth = 0; - + /** ID for headlines and list items */ + public String id = null; + /** Constructor. */ public Block() { diff --git a/src/java/txtmark/Decorator.java b/src/java/txtmark/Decorator.java index 5dcfc1a..4228132 100644 --- a/src/java/txtmark/Decorator.java +++ b/src/java/txtmark/Decorator.java @@ -94,10 +94,10 @@ public interface Decorator /** * Called when a headline is opened. * + *

Note: Don't close the HTML tag!

*

Default implementation is:

*
 out.append("<h");
-     * out.append(level);
-     * out.append('>');
+ * out.append(level); * * @param out The StringBuilder to write to. */ @@ -155,6 +155,26 @@ public interface Decorator */ public void closeEmphasis(final StringBuilder out); + /** + * Called when a superscript span is opened. + * + *

Default implementation is:

+ *
out.append("<sup>");
+ * + * @param out The StringBuilder to write to. + */ + public void openSuper(final StringBuilder out); + + /** + * Called when a superscript span is closed. + * + *

Default implementation is:

+ *
out.append("</sup>");
+ * + * @param out The StringBuilder to write to. + */ + public void closeSuper(final StringBuilder out); + /** * Called when an ordered list is opened. * @@ -198,8 +218,9 @@ public interface Decorator /** * Called when a list item is opened. * + *

Note: Don't close the HTML tag!

*

Default implementation is:

- *
out.append("<li>");
+ *
out.append("<li");
* * @param out The StringBuilder to write to. */ @@ -228,6 +249,7 @@ public interface Decorator /** * Called when a link is opened. * + *

Note: Don't close the HTML tag!

*

Default implementation is:

*
out.append("<a");
* @@ -238,6 +260,7 @@ public interface Decorator /** * Called when an image is opened. * + *

Note: Don't close the HTML tag!

*

Default implementation is:

*
out.append("<img");
* diff --git a/src/java/txtmark/DefaultDecorator.java b/src/java/txtmark/DefaultDecorator.java index cd8c60f..4415ba1 100644 --- a/src/java/txtmark/DefaultDecorator.java +++ b/src/java/txtmark/DefaultDecorator.java @@ -90,7 +90,6 @@ public class DefaultDecorator implements Decorator { out.append("'); } /** @see txtmark.Decorator#closeHeadline(StringBuilder, int) */ @@ -130,6 +129,20 @@ public class DefaultDecorator implements Decorator out.append(""); } + /** @see txtmark.Decorator#openSuper(StringBuilder) */ + @Override + public void openSuper(StringBuilder out) + { + out.append(""); + } + + /** @see txtmark.Decorator#closeSuper(StringBuilder) */ + @Override + public void closeSuper(StringBuilder out) + { + out.append(""); + } + /** @see txtmark.Decorator#openOrderedList(StringBuilder)*/ @Override public void openOrderedList(StringBuilder out) @@ -162,7 +175,7 @@ public class DefaultDecorator implements Decorator @Override public void openListItem(StringBuilder out) { - out.append("
  • "); + out.append("'); break; case PARAGRAPH: this.decorator.openParagraph(out); @@ -75,6 +82,13 @@ class Emitter break; case LIST_ITEM: this.decorator.openListItem(out); + if(this.useExtensions && root.id != null) + { + out.append(" id=\""); + Utils.appendCode(out, root.id, 0, root.id.length()); + out.append('"'); + } + out.append('>'); break; } @@ -175,9 +189,10 @@ class Emitter */ private int checkLink(final StringBuilder out, final String in, int start, MarkToken token) { + boolean isAbbrev = false; int pos = start + (token == MarkToken.LINK ? 1 : 2); final StringBuilder temp = new StringBuilder(); - + temp.setLength(0); pos = Utils.readMdLinkId(temp, in, pos); if(pos < start) @@ -191,6 +206,7 @@ class Emitter final LinkRef lr = this.linkRefs.get(name.toLowerCase()); if(lr != null) { + isAbbrev = lr.isAbbrev; link = lr.link; comment = lr.title; pos = oldPos; @@ -255,6 +271,7 @@ class Emitter final LinkRef lr = this.linkRefs.get(name.toLowerCase()); if(lr != null) { + isAbbrev = lr.isAbbrev; link = lr.link; comment = lr.title; pos = oldPos; @@ -270,19 +287,32 @@ class Emitter if(token == MarkToken.LINK) { - this.decorator.openLink(out); - out.append(" href=\""); - Utils.appendValue(out, link, 0, link.length()); - out.append('"'); - if(comment != null) + if(isAbbrev && comment != null) { - out.append(" title=\""); + if(!this.useExtensions) + return -1; + out.append(""); + this.recursiveEmitLine(out, name, 0, MarkToken.NONE); + out.append(""); + } + else + { + this.decorator.openLink(out); + out.append(" href=\""); + Utils.appendValue(out, link, 0, link.length()); + out.append('"'); + if(comment != null) + { + out.append(" title=\""); + Utils.appendValue(out, comment, 0, comment.length()); + out.append('"'); + } + out.append('>'); + this.recursiveEmitLine(out, name, 0, MarkToken.NONE); + out.append(""); } - out.append('>'); - this.recursiveEmitLine(out, name, 0, MarkToken.NONE); - out.append(""); } else { @@ -486,6 +516,21 @@ class Emitter out.append(in.charAt(pos)); } break; + case SUPER: + temp.setLength(0); + b = this.recursiveEmitLine(temp, in, pos + 1, mt); + if(b > 0) + { + this.decorator.openSuper(out); + out.append(temp); + this.decorator.closeSuper(out); + pos = b; + } + else + { + out.append(in.charAt(pos)); + } + break; case CODE_SINGLE: case CODE_DOUBLE: a = pos + (mt == MarkToken.CODE_DOUBLE ? 2 : 1); @@ -613,6 +658,10 @@ class Emitter { return c0 != ' ' || c2 != ' ' ? MarkToken.STRONG_UNDERSCORE : MarkToken.EM_UNDERSCORE; } + if(this.useExtensions) + { + return c0 != ' ' && c0 != '_' && c1 != ' ' ? MarkToken.NONE : MarkToken.EM_UNDERSCORE; + } return c0 != ' ' || c1 != ' ' ? MarkToken.EM_UNDERSCORE : MarkToken.NONE; case '!': if(c1 == '[') @@ -637,12 +686,14 @@ class Emitter case '\'': case '.': case '>': + case '<': case '*': case '+': case '-': case '_': case '!': case '`': + case '^': return MarkToken.ESCAPE; default: return MarkToken.NONE; @@ -662,6 +713,8 @@ class Emitter if(c1 == '-') return c2 == '-' ? MarkToken.X_MDASH : MarkToken.X_NDASH; break; + case '^': + return c0 == '^' || c1 == '^' ? MarkToken.NONE : MarkToken.SUPER; case '>': if(c1 == '>') return MarkToken.X_RAQUO; diff --git a/src/java/txtmark/Line.java b/src/java/txtmark/Line.java index b3bcded..346dd57 100644 --- a/src/java/txtmark/Line.java +++ b/src/java/txtmark/Line.java @@ -306,6 +306,93 @@ class Line } return -1; } + + /** + * Checks if this line contains an ID at it's end and removes it from the line. + * + * @return The ID or null if no valid ID exists. + */ + // FIXME ... hack + public String stripIP() + { + if(this.isEmpty || this.value.charAt(this.value.length() - this.trailing - 1) != '}') + return null; + int p = this.leading; + boolean found = false; + while(p < this.value.length() && !found) + { + switch(this.value.charAt(p)) + { + case '\\': + if(p + 1 < this.value.length()) + { + switch(this.value.charAt(p + 1)) + { + case '{': + p++; + break; + } + } + p++; + break; + case '{': + found = true; + break; + default: + p++; + break; + } + } + + if(found) + { + if(p + 1 < this.value.length() && this.value.charAt(p + 1) == '#') + { + final int start = p + 2; + p = start; + found = false; + while(p < this.value.length() && !found) + { + switch(this.value.charAt(p)) + { + case '\\': + if(p + 1 < this.value.length()) + { + switch(this.value.charAt(p + 1)) + { + case '}': + p++; + break; + } + } + p++; + break; + case '}': + found = true; + break; + default: + p++; + break; + } + } + if(found) + { + final String id = this.value.substring(start, p).trim(); + if(this.leading != 0) + { + this.value = this.value.substring(0, this.leading) + this.value.substring(this.leading, start - 2).trim(); + } + else + { + this.value = this.value.substring(this.leading, start - 2).trim(); + } + this.trailing = 0; + return id.length() > 0 ? id : null; + } + } + } + return null; + } /** * Checks for a valid HTML block. Sets xmlEndLine. diff --git a/src/java/txtmark/LinkRef.java b/src/java/txtmark/LinkRef.java index 22f5863..35be03a 100644 --- a/src/java/txtmark/LinkRef.java +++ b/src/java/txtmark/LinkRef.java @@ -15,6 +15,8 @@ class LinkRef public final String link; /** The optional comment/title. */ public String title; + /** Flag indicating that this is an abbreviation. */ + public final boolean isAbbrev; /** * Constructor. @@ -22,10 +24,11 @@ class LinkRef * @param link The link. * @param title The title (may be null). */ - public LinkRef(final String link, final String title) + public LinkRef(final String link, final String title, final boolean isAbbrev) { this.link = link; this.title = title; + this.isAbbrev = isAbbrev; } /** @see java.lang.Object#toString() */ diff --git a/src/java/txtmark/MarkToken.java b/src/java/txtmark/MarkToken.java index 8004110..eb87488 100644 --- a/src/java/txtmark/MarkToken.java +++ b/src/java/txtmark/MarkToken.java @@ -35,24 +35,26 @@ enum MarkToken ENTITY, // & /** \ */ ESCAPE, // \x - /** Extended: © */ + /** Extended: ^ */ + SUPER, // ^ + /** Extended: (C) */ X_COPY, // (C) - /** Extended: ® */ + /** Extended: (R) */ X_REG, // (R) - /** Extended: ™ */ + /** Extended: (TM) */ X_TRADE, // (TM) - /** Extended: « */ + /** Extended: << */ X_LAQUO, // << - /** Extended: » */ + /** Extended: >> */ X_RAQUO, // >> - /** Extended: — */ + /** Extended: -- */ X_NDASH, // -- - /** Extended: – */ + /** Extended: --- */ X_MDASH, // --- - /** Extended: … */ + /** Extended: ... */ X_HELLIP, // ... - /** Extended: ” */ + /** Extended: "x */ X_RDQUO, // " - /** Extended: “ */ + /** Extended: x" */ X_LDQUO // " } diff --git a/src/java/txtmark/Processor.java b/src/java/txtmark/Processor.java index 5752b7d..76568f0 100644 --- a/src/java/txtmark/Processor.java +++ b/src/java/txtmark/Processor.java @@ -336,7 +336,7 @@ public class Processor else { // Store linkRef and skip line - final LinkRef lr = new LinkRef(link, comment); + final LinkRef lr = new LinkRef(link, comment, comment != null && (link.length() == 1 && link.charAt(0) == '*')); this.emitter.addLinkRef(id, lr); if(comment == null) lastLinkRef = lr; @@ -402,18 +402,24 @@ public class Processor * @param root The Block to process. * @param listMode Flag indicating that we're in a list item block. */ - // TODO ... paragraphs and lists seems to be not working correctly private void recurse(final Block root, boolean listMode) { Block block, list; Line line = root.lines; + + if(listMode) + { + root.removeListIndent(); + if(this.useExtensions && root.lines != null && root.lines.getLineType() != LineType.CODE) + { + root.id = root.lines.stripIP(); + } + } + while(line != null && line.isEmpty) line = line.next; if(line == null) return; - if(listMode) - root.removeListIndent(); - while(line != null) { final LineType type = line.getLineType(); @@ -509,6 +515,8 @@ public class Processor block.type = BlockType.HEADLINE; if(type != LineType.HEADLINE) block.hlDepth = type == LineType.HEADLINE1 ? 1 : 2; + if(this.useExtensions) + block.id = block.lines.stripIP(); block.transfromHeadline(); root.removeLeadingEmptyLines(); line = root.lines; diff --git a/src/java/txtmark/Run.java b/src/java/txtmark/Run.java index 333fdcb..bf15a96 100644 --- a/src/java/txtmark/Run.java +++ b/src/java/txtmark/Run.java @@ -32,7 +32,7 @@ import java.io.InputStreamReader; *<meta http-equiv="content-type" content="text/html; charset=UTF-8"/> *</head> *<body> - *<!-- the following file separates header from footer --> + *<!-- the following line separates header from footer --> *<!-- ### --> *</body> *</html> diff --git a/src/java/txtmark/Utils.java b/src/java/txtmark/Utils.java index 36dc212..8fff33c 100644 --- a/src/java/txtmark/Utils.java +++ b/src/java/txtmark/Utils.java @@ -63,12 +63,14 @@ class Utils case '\'': case '.': case '>': + case '<': case '*': case '+': case '-': case '_': case '!': case '`': + case '^': out.append(ch); return pos + 1; default: