Version 0.5

Added devdoc ant target, added HTML block processing. Implemented setting of user Decorator.
2026-03-13 07:39:37 +00:00 · 2011-04-16 22:18:03 +02:00 · 2011-04-16 22:18:03 +02:00 · f977df2c97
commit f977df2c97
parent e08ff546e5
8 changed files with 422 additions and 249 deletions
--- a/README.md
+++ b/README.md
@ -5,58 +5,33 @@ See LICENSE.txt for licensing information.
 ***

 txtmark is yet another markdown processor for the JVM.  
-... and is *damn* fast^^

-Again this is a WIP release.
+*   It is easy to use:

-TODO:
+        String result = txtmark.Processor.process("This is ***TXTMARK***");
+    
+*   It is fast (see below)  
+    ... well, it is the fastest markdown processor on the JVM right now.

- block-level HTML element processing
- code clean-ups
- see below (markdown test suite)
+This is a RC version, tagged v0.5

-### MarkdownTest results so far
+For an in-depth explanation of the markdown syntax have a look at [daringfireball.net](http://daringfireball.net/projects/markdown/syntax).
+
+
+### Markdown conformity

 ***

-Based on [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/downloads/MarkdownTest_1.0_2007-05-09.tgz)
+Txtmark passes all tests inside [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/downloads/MarkdownTest_1.0_2007-05-09.tgz)
+except of two:

-* Amps and angle encoding ... OK
-* Auto links ... OK
-* Backslash escapes ... OK
-* Blockquotes with code blocks ... OK
-* Code Blocks ... OK
-* Code Spans ... OK
-* Hard-wrapped paragraphs with list-like lines ... OK
-* Horizontal rules ... OK
-* Images ... FAILED (see [Note 1](#note0))
-* Inline HTML (Advanced) ... FAILED (see [Note 2](#note1))
-* Inline HTML (Simple) ... FAILED (see [Note 2](#note1))
-* Inline HTML comments ... FAILED (see [Note 2](#note1))
-* Links, inline style ... OK
-* Links, reference style ... OK
-* Links, shortcut references ... OK
-* Literal quotes in titles ... FAILED (see [Note 3](#note2))
-* Markdown Documentation - Basics ... OK
-* Markdown Documentation - Syntax ... FAILED (see [Note 2](#note1))
-* Nested blockquotes ... OK
-* Ordered and unordered lists ... OK
-* Strong and em together ... OK
-* Tabs ... OK
-* Tidyness ... OK
+1.  **Images.text**

-17 passed; 6 failed.
-
-***
-
-1. <h4 id="note0">Note:</h4>
    Fails because Txtmark doesn't produce empty 'title' image attributes.  
    (IMHO: Images ... OK)

-2. <h4 id="note1">Note:</h4>
-    Fails because of currently missing block-level HTML identification.
+2.  **Literal quotes in titles.text**

-3. <h4 id="note2">Note:</h4>
    What the frell ... this test will continue to FAIL.  
    Sorry, but using unescaped `"` in a title which should be surrounded
    by `"` is unacceptable for me ;)
@ -74,142 +49,55 @@ Based on [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/down
    and Txtmark will produce the correct result.  
    (IMHO: Literal quotes in titles ... OK)

+
 ### Performance comparison of markdown processors for the JVM

-***
+---

-Based on [this](http://henkelmann.eu/2011/01/10/performance_comparison_of_markdown_processor_for_the_jvm).  
-Txtmark's results should not be considered final, they may change in either direction
-during the upcoming releases.  
-But I think you get the point.  
+Based on [this benchmark suite](http://henkelmann.eu/2011/01/10/performance_comparison_of_markdown_processor_for_the_jvm).  

 <table>
-  <tr>
-    <th>Test</th>
-    <th colspan="2">Actuarius</th>
-    <th colspan="2">PegDown</th>
-    <th colspan="2">Knockoff</th>
-    <th colspan="2">Txtmark</th>
-  </tr>
-  <tr>
-    <td></td>
-    <td>1st Run (ms)</td><td>2nd Run (ms)</td>
-    <td>1st Run (ms)</td><td>2nd Run (ms)</td>
-    <td>1st Run (ms)</td><td>2nd Run (ms)</td>
-    <td>1st Run (ms)</td><td>2nd Run (ms)</td>
-  </tr>
-  <tr>
-    <td>Plain Paragraphs</td>
-    <td>969</td><td>300</td>
-    <td>1468</td><td>956</td>
-    <td>564</td><td>362</td>
-    <td>114</td><td>45</td>
-  </tr>
-  <tr>
-    <td>Every Word Emphasized</td>
-    <td>1409</td><td>884</td>
-    <td>1435</td><td>1417</td>
-    <td>13161</td><td>12921</td>
-    <td>52</td><td>44</td>
-  </tr>
-  <tr>
-    <td>Every Word Strong</td>
-    <td>1087</td><td>978</td>
-    <td>1125</td><td>1100</td>
-    <td>9717</td><td>9586</td>
-    <td>40</td><td>46</td>
-  </tr>
-  <tr>
-    <td>Every Word Inline Code</td>
-    <td>351</td><td>278</td>
-    <td>1047</td><td>1037</td>
-    <td>9499</td><td>9245</td>
-    <td>45</td><td>35</td>
-  </tr>
-  <tr>
-    <td>Every Word a Fast Link</td>
-    <td>2123</td><td>1580</td>
-    <td>523</td><td>512</td>
-    <td>4086</td><td>3470</td>
-    <td>78</td><td>50</td>
-  </tr>
-  <tr>
-    <td>Every Word Consisting of Special XML Chars</td>
-    <td>3981</td><td>3973</td>
-    <td>3341</td><td>3055</td>
-    <td>372</td><td>319</td>
-    <td>1842</td><td>1841</td>
-  </tr>
-  <tr>
-    <td>Every Word wrapped in manual HTML tags</td>
-    <td>3073</td><td>2907</td>
-    <td>901</td><td>888</td>
-    <td>3826</td><td>3529</td>
-    <td>492</td><td>453</td>
-  </tr>
-  <tr>
-    <td>Every Line with a manual line break</td>
-    <td>437</td><td>583</td>
-    <td>1370</td><td>1363</td>
-    <td>1352</td><td>957</td>
-    <td>42</td><td>44</td>
-  </tr>
-  <tr>
-    <td>Every word with a full link</td>
-    <td>398</td><td>266</td>
-    <td>1057</td><td>1014</td>
-    <td>1755</td><td>1689</td>
-    <td>88</td><td>47</td>
-  </tr>
-  <tr>
-    <td>Every word with a full image</td>
-    <td>228</td><td>139</td>
-    <td>1110</td><td>1101</td>
-    <td>1917</td><td>1773</td>
-    <td>37</td><td>33</td>
-  </tr>
-  <tr>
-    <td>Every word with a reference link</td>
-    <td>9726</td><td>9146</td>
-    <td>19019</td><td>20044</td>
-    <td>117632</td><td>118306</td>
-    <td>1431</td><td>1240</td>
-  </tr>
-  <tr>
-    <td>Every block a quote</td>
-    <td>431</td><td>205</td>
-    <td>1366</td><td>1328</td>
-    <td>474</td><td>464</td>
-    <td>35</td><td>36</td>
-  </tr>
-  <tr>
-    <td>Every block a codeblock</td>
-    <td>68</td><td>84</td>
-    <td>387</td><td>377</td>
-    <td>161</td><td>169</td>
-    <td>61</td><td>19</td>
-  </tr>
-  <tr>
-    <td>Every block a list</td>
-    <td>863</td><td>912</td>
-    <td>1735</td><td>1762</td>
-    <td>602</td><td>686</td>
-    <td>46</td><td>36</td>
-  </tr>
-  <tr>
-    <td>All tests together</td>
-    <td>3319</td><td>2959</td>
-    <td>5245</td><td>5305</td>
-    <td>10252</td><td>9751</td>
-    <td>222</td><td>173</td>
-  </tr>
+  <tr><th>Test</th><th colspan="2">Actuarius</th><th colspan="2">PegDown</th><th colspan="2">Knockoff</th><th colspan="2">Txtmark</th></tr>
+  <tr><td></td><td>1st Run (ms)</td><td>2nd Run (ms)</td><td>1st Run (ms)</td><td>2nd Run (ms)</td><td>1st Run (ms)</td><td>2nd Run (ms)</td><td>1st Run (ms)</td><td>2nd Run (ms)</td></tr>
+  <tr><td>Plain Paragraphs</td><td>887</td><td>461</td><td>2455</td><td>2236</td><td>764</td><td>568</td><td>89</td><td>47</td></tr>
+  <tr><td>Every Word Emphasized</td><td>2220</td><td>2077</td><td>3411</td><td>3406</td><td>30503</td><td>30514</td><td>72</td><td>66</td></tr>
+  <tr><td>Every Word Strong</td><td>2384</td><td>2270</td><td>2456</td><td>2466</td><td>23639</td><td>23577</td><td>62</td><td>57</td></tr>
+  <tr><td>Every Word Inline Code</td><td>824</td><td>804</td><td>2337</td><td>2237</td><td>23506</td><td>23622</td><td>54</td><td>55</td></tr>
+  <tr><td>Every Word a Fast Link</td><td>3942</td><td>3738</td><td>1164</td><td>1159</td><td>8621</td><td>8595</td><td>89</td><td>68</td></tr>
+  <tr><td>Every Word Consisting of Special XML Chars</td><td>9393</td><td>9312</td><td>7544</td><td>7314</td><td>801</td><td>608</td><td>3587</td><td>3614</td></tr>
+  <tr><td>Every Word wrapped in manual HTML tags</td><td>6843</td><td>6828</td><td>1850</td><td>1859</td><td>8699</td><td>8692</td><td>1169</td><td>1154</td></tr>
+  <tr><td>Every Line with a manual line break</td><td>859</td><td>724</td><td>2968</td><td>2946</td><td>2171</td><td>1990</td><td>58</td><td>56</td></tr>
+  <tr><td>Every word with a full link</td><td>528</td><td>501</td><td>2252</td><td>2280</td><td>3513</td><td>3512</td><td>66</td><td>60</td></tr>
+  <tr><td>Every word with a full image</td><td>395</td><td>374</td><td>2463</td><td>2569</td><td>3757</td><td>3726</td><td>56</td><td>55</td></tr>
+  <tr><td>Every word with a reference link</td><td>19208</td><td>19035</td><td>39183</td><td>38710</td><td>243450</td><td>244943</td><td>1826</td><td>1798</td></tr>
+  <tr><td>Every block a quote</td><td>465</td><td>449</td><td>2687</td><td>2684</td><td>978</td><td>977</td><td>48</td><td>48</td></tr>
+  <tr><td>Every block a codeblock</td><td>151</td><td>134</td><td>597</td><td>601</td><td>270</td><td>262</td><td>36</td><td>27</td></tr>
+  <tr><td>Every block a list</td><td>1209</td><td>1106</td><td>3448</td><td>3432</td><td>1411</td><td>1368</td><td>52</td><td>60</td></tr>
+  <tr><td>All tests together</td><td>6062</td><td>6042</td><td>11556</td><td>11589</td><td>19827</td><td>19637</td><td>452</td><td>448</td></tr>
 </table>

+*   Q: Why is Txtmark so slow when it comes to XML entities?
+*   A: Because Txtmark does some sanity checks on XML entities to make sure
+    it outputs valid XML. For example:
+
+        &cutie;
+
+    will produce (when processed with Markdown and most other markdown processors):
+
+        &cutie;
+
+    and
+
+        &amp;cutie;
+
+    when processed with Txtmark.
+
+Tested versions:  
 [Actuarius] version: 0.2  
 [PegDown] version: 0.8.5.4  
 [Knockoff] version: 0.7.3-15  

-***
+---

 [Markdown] is copyright (c) 2004 by John Gruber  
   [Markdown]: http://daringfireball.net/projects/markdown/
--- a/build.xml
+++ b/build.xml
@ -19,8 +19,26 @@
    <javac srcdir="src/java" destdir="build/classes" target="1.6" includeAntRuntime="false"/>
  </target>

-  <target name="doc" description="Generates the JavaDoc">
+  <target name="doc" description="Generates the user JavaDoc">
      <mkdir dir="doc"/>
+      <delete>
+        <fileset dir="doc" includes="**/*.*"/>
+      </delete>
+      <javadoc
+          sourcepath="src/java"
+          access="public"
+          author="true"
+          destdir="doc"
+          encoding="UTF-8"
+          charset="UTF-8"
+          link="http://download.oracle.com/javase/6/docs/api/"/>
+  </target>
+
+  <target name="devdoc" description="Generates the developer JavaDoc">
+      <mkdir dir="doc"/>
+      <delete>
+        <fileset dir="doc" includes="**/*.*"/>
+      </delete>
      <javadoc
          sourcepath="src/java"
          access="private"
--- a/src/java/txtmark/Emitter.java
+++ b/src/java/txtmark/Emitter.java
@ -16,12 +16,12 @@ class Emitter
    /** Link references. */
    private final HashMap<String, LinkRef> linkRefs = new HashMap<String, LinkRef>();
    /** The Decorator. */
-    private final Decorator decorator = new DefaultDecorator();
+    private Decorator decorator;
    
    /** Constructor. */
-    public Emitter()
+    public Emitter(final Decorator decorator)
    {
-        //
+        this.decorator = decorator;
    }

    /**
@ -360,62 +360,7 @@ class Emitter
        if(start + 2 < in.length())
        {
            temp.setLength(0);
-            temp.append('<');
-            pos = start + 1;
-            if(in.charAt(pos) == '/')
-            {
-                temp.append('/');
-                pos++;
-            }
-            if(pos < in.length() && Character.isLetter(in.charAt(pos)))
-            {
-                pos = Utils.readUntil(temp, in, pos, ' ', '/', '>');
-                if(pos > 0)
-                {
-                    while(pos < in.length() && in.charAt(pos) == ' ')
-                    {
-                        pos = Utils.skipSpaces(in, pos);
-                        if(pos == -1)
-                            break;
-                        if(in.charAt(pos) == '/')
-                        {
-                            temp.append(" /");
-                            pos++;
-                            break;
-                        }
-                        if(in.charAt(pos) == '>')
-                        {
-                            break;
-                        }
-                        temp.append(' ');
-                        if(!Character.isLetter(in.charAt(pos)))
-                        {
-                            pos = -1;
-                            break;
-                        }
-                        pos = Utils.readUntil(temp, in, pos, '=');
-                        if(pos == -1)
-                            break;
-                        pos = Utils.readUntil(temp, in, pos, '\'', '"');
-                        if(pos == -1)
-                            break;
-                        final char lim = in.charAt(pos);
-                        temp.append(lim);
-                        pos++;
-                        pos = Utils.readRawUntil(temp, in, pos, lim);
-                        if(pos == -1)
-                            break;
-                        temp.append(lim);
-                        pos++;
-                    }
-                    if(pos > 0 && pos < in.length() && in.charAt(pos) == '>')
-                    {
-                        temp.append('>');
-                        out.append(temp);
-                        return pos;
-                    }
-                }
-            }
+            return Utils.readXML(out, in, start);
        }        
    
        return -1;
@ -712,8 +657,7 @@ class Emitter
            {
                out.append(line.value);
            }
-            if(line.next != null)
-                out.append('\n');
+            out.append('\n');
            line = line.next;
        }
    }
--- a/src/java/txtmark/HTMLElement.java
+++ b/src/java/txtmark/HTMLElement.java
@ -11,6 +11,7 @@ package txtmark;
 */
 enum HTMLElement
 {
+    NONE,
    a, abbr, acronym, address, applet, area,
    b, base, basefont, bdo, big, blockquote, body, br, button,
    caption, cite, code, col, colgroup,
--- a/src/java/txtmark/Line.java
+++ b/src/java/txtmark/Line.java
@ -4,6 +4,8 @@
 */
 package txtmark;

+import java.util.LinkedList;
+
 /**
 * This class represents a text line.
 * 
@ -26,7 +28,8 @@ class Line
    public Line previous = null, next = null;
    /** Is previous/next line empty? */
    public boolean prevEmpty, nextEmpty;
-
+    /** Final line of a XML block. */
+    public Line xmlEndLine;
    /** Constructor. */
    public Line()
    {
@ -243,6 +246,12 @@ class Line
                return LineType.OLIST;
        }

+        if(this.value.charAt(this.leading) == '<')
+        {
+            if(this.checkHTML())
+                return LineType.XML;
+        }
+        
        if(this.next != null && !this.next.isEmpty)
        {
            if((this.next.value.charAt(0) == '-') && (this.next.countChars('-') > 0))
@ -253,4 +262,133 @@ class Line

        return LineType.OTHER;
    }
+    
+    /**
+     * Reads an XML comment. Sets <code>xmlEndLine</code>.
+     * 
+     * @param firstLine The Line to start reading from.
+     * @param start The starting position.
+     * @return The new position or -1 if it is no valid comment.
+     */
+    private int readXMLComment(final Line firstLine, final int start)
+    {
+        Line line = firstLine;
+        if(start + 3 < line.value.length())
+        {
+            if(line.value.charAt(2) == '-' && line.value.charAt(3) == '-')
+            {
+                int pos = start + 4;
+                while(line != null)
+                {
+                    while(pos < line.value.length() && line.value.charAt(pos) != '-')
+                    {
+                        pos++;
+                    }
+                    if(pos == line.value.length())
+                    {
+                        line = line.next;
+                        pos = 0;
+                    }
+                    else
+                    {
+                        if(pos + 2 < line.value.length())
+                        {
+                            if(line.value.charAt(pos + 1) == '-' && line.value.charAt(pos + 2) == '>')
+                            {
+                                this.xmlEndLine = line;
+                                return pos + 3;
+                            }
+                        }
+                        pos++;
+                    }
+                }
+            }
+        }
+        return -1;
+    }
+    
+    /**
+     * Checks for a valid HTML block. Sets <code>xmlEndLine</code>.
+     * 
+     * @return <code>true</code> if it is a valid block.
+     */
+    private boolean checkHTML()
+    {
+        final LinkedList<String> tags = new LinkedList<String>();
+        final StringBuilder temp = new StringBuilder();
+        int pos = this.leading;
+        if(this.value.charAt(this.leading + 1) == '!')
+        {
+            if(this.readXMLComment(this, this.leading) > 0)
+                return true;
+        }
+        pos = Utils.readXML(temp, this.value, this.leading);
+        String element, tag;
+        if(pos > -1)
+        {
+            element = temp.toString();
+            temp.setLength(0);
+            Utils.getXMLTag(temp, element);
+            tag = temp.toString().toLowerCase();
+            if(!HTML.isHtmlBlockElement(tag))
+                return false;
+            if(tag.equals("hr"))
+            {
+                this.xmlEndLine = this;
+                return true;
+            }
+            tags.add(tag);
+            
+            Line line = this;
+            while(line != null)
+            {
+                while(pos < line.value.length() && line.value.charAt(pos) != '<')
+                {
+                    pos++;
+                }
+                if(pos >= line.value.length())
+                {
+                    line = line.next;
+                    pos = 0;
+                }
+                else
+                {
+                    temp.setLength(0);
+                    final int newPos = Utils.readXML(temp, line.value, pos);
+                    if(newPos > 0)
+                    {
+                        element = temp.toString();
+                        temp.setLength(0);
+                        Utils.getXMLTag(temp, element);
+                        tag = temp.toString().toLowerCase();
+                        if(HTML.isHtmlBlockElement(tag) && !tag.equals("hr"))
+                        {
+                            if(element.charAt(1) == '/')
+                            {
+                                if(!tags.getLast().equals(tag))
+                                    return false;
+                                tags.removeLast();
+                            }
+                            else
+                            {
+                                tags.addLast(tag);
+                            }
+                        }
+                        if(tags.size() == 0)
+                        {
+                            this.xmlEndLine = line;
+                            break;
+                        }
+                        pos = newPos;
+                    }
+                    else
+                    {
+                        pos++;
+                    }
+                }
+            }
+            return tags.size() == 0;
+        }
+        return false;
+    }
 }
--- a/src/java/txtmark/LineType.java
+++ b/src/java/txtmark/LineType.java
@ -24,5 +24,7 @@ enum LineType
    /** A block quote. */
    BQUOTE,
    /** A horizontal ruler. */
-    HR
+    HR,
+    /** Start of a XML block. */
+    XML
 }
--- a/src/java/txtmark/Processor.java
+++ b/src/java/txtmark/Processor.java
@ -23,20 +23,21 @@ public class Processor
    /** The reader. */
    private final Reader reader;
    /** The emitter. */
-    private Emitter emitter = new Emitter();
+    private final Emitter emitter;

    /**
     * Constructor.
     * 
     * @param reader The input reader.
     */
-    private Processor(Reader reader)
+    private Processor(Reader reader, Decorator decorator)
    {
        this.reader = reader;
+        this.emitter = new Emitter(decorator);
    }

    /**
-     * Transforms an input String into XHTML.
+     * Transforms an input String into XHTML using the default Decorator.
     * 
     * @param input The String to process. 
     * @return The processed String.
@ -48,7 +49,19 @@ public class Processor
    }

    /**
-     * Transforms an input file into XHTML using UTF-8 encoding.
+     * Transforms an input String into XHTML.
+     * 
+     * @param input The String to process. 
+     * @return The processed String.
+     * @throws IOException if an IO error occurs
+     */
+    public static String process(final String input, final Decorator decorator) throws IOException
+    {
+        return process(new StringReader(input), decorator);
+    }
+
+    /**
+     * Transforms an input file into XHTML using UTF-8 encoding and the default Decorator.
     * 
     * @param file The File to process. 
     * @return The processed String.
@ -60,7 +73,19 @@ public class Processor
    }

    /**
-     * Transforms an input file into XHTML.
+     * Transforms an input file into XHTML using UTF-8 encoding.
+     * 
+     * @param file The File to process. 
+     * @return The processed String.
+     * @throws IOException if an IO error occurs
+     */
+    public static String process(final File file, final Decorator decorator) throws IOException
+    {
+        return process(file, "UTF-8", decorator);
+    }
+
+    /**
+     * Transforms an input file into XHTML using the default Decorator.
     * 
     * @param file The File to process. 
     * @param encoding The encoding to use. 
@ -69,13 +94,37 @@ public class Processor
     */
    public static String process(final File file, final String encoding) throws IOException
    {
-        final Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding));
-        final Processor p = new Processor(r);
-        final String ret = p.process();
-        r.close();
+        return process(file, encoding, new DefaultDecorator());
+    }
+
+    /**
+     * Transforms an input file into XHTML.
+     * 
+     * @param file The File to process. 
+     * @param encoding The encoding to use. 
+     * @return The processed String.
+     * @throws IOException if an IO error occurs
+     */
+    public static String process(final File file, final String encoding, final Decorator decorator) throws IOException
+    {
+        final FileInputStream input = new FileInputStream(file);
+        final String ret = process(input, encoding, decorator);
+        input.close();
        return ret;
    }

+    /**
+     * Transforms an input stream into XHTML using UTF-8 encoding using the default Decorator.
+     * 
+     * @param input The InputStream to process. 
+     * @return The processed String.
+     * @throws IOException if an IO error occurs
+     */
+    public static String process(final InputStream input) throws IOException
+    {
+        return process(input, "UTF-8", new DefaultDecorator());
+    }
+
    /**
     * Transforms an input stream into XHTML using UTF-8 encoding.
     * 
@ -83,9 +132,23 @@ public class Processor
     * @return The processed String.
     * @throws IOException if an IO error occurs
     */
-    public static String process(final InputStream input) throws IOException
+    public static String process(final InputStream input, final Decorator decorator) throws IOException
    {
-        return process(input, "UTF-8");
+        return process(input, "UTF-8", decorator);
+    }
+
+    /**
+     * Transforms an input stream into XHTML using the default Decorator.
+     * 
+     * @param input The InputStream to process. 
+     * @param encoding The encoding to use. 
+     * @return The processed String.
+     * @throws IOException if an IO error occurs
+     */
+    public static String process(final InputStream input, final String encoding) throws IOException
+    {
+        final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding)), new DefaultDecorator());
+        return p.process();
    }

    /**
@ -96,9 +159,24 @@ public class Processor
     * @return The processed String.
     * @throws IOException if an IO error occurs
     */
-    public static String process(final InputStream input, final String encoding) throws IOException
+    public static String process(final InputStream input, final String encoding, final Decorator decorator) throws IOException
    {
-        final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding)));
+        final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding)), decorator);
+        return p.process();
+    }
+
+    /**
+     * Transforms an input stream into XHTML using the default Decorator.
+     * 
+     * @param reader The Reader to process. 
+     * @return The processed String.
+     * @throws IOException if an IO error occurs
+     */
+    public static String process(final Reader reader) throws IOException
+    {
+        final Processor p = new Processor(
+                !(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader, 
+                        new DefaultDecorator());
        return p.process();
    }

@ -109,9 +187,11 @@ public class Processor
     * @return The processed String.
     * @throws IOException if an IO error occurs
     */
-    public static String process(final Reader reader) throws IOException
+    public static String process(final Reader reader, final Decorator decorator) throws IOException
    {
-        final Processor p = new Processor(!(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader);
+        final Processor p = new Processor(
+                !(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader, 
+                        decorator);
        return p.process();
    }

@ -319,7 +399,9 @@ public class Processor
                        final LineType t = line.getLineType();
                        if(listMode && (t == LineType.OLIST || t == LineType.ULIST))
                            break;
-                        if(t == LineType.HEADLINE || t == LineType.HEADLINE1 || t == LineType.HEADLINE2 || t == LineType.HR || t == LineType.BQUOTE)
+                        if(t == LineType.HEADLINE || t == LineType.HEADLINE1 || t == LineType.HEADLINE2 
+                                || t == LineType.HR || t == LineType.BQUOTE
+                                || t == LineType.XML)
                            break;
                        line = line.next;
                    }
@ -349,6 +431,16 @@ public class Processor
                block.type = BlockType.CODE;
                block.removeSurroundingEmptyLines();
                break;
+            case XML:
+                if(line.previous != null)
+                {
+                    // FIXME ... this looks wrong
+                    root.split(line.previous);
+                }
+                root.split(line.xmlEndLine).type = BlockType.XML;
+                root.removeLeadingEmptyLines();
+                line = root.lines;
+                break;
            case BQUOTE:
                while(line != null)
                {
@ -366,6 +458,7 @@ public class Processor
            case HR:
                if(line.previous != null)
                {
+                    // FIXME ... this looks wrong
                    root.split(line.previous);
                }
                root.split(line).type = BlockType.RULER;
@ -442,8 +535,6 @@ public class Processor
    {
        final StringBuilder out = new StringBuilder();

-//        long t0 = System.nanoTime();
-
        final Block parent = this.readLines();
        parent.removeSurroundingEmptyLines();

@ -455,9 +546,6 @@ public class Processor
            block = block.next;
        }

-//        t0 = System.nanoTime() - t0;
-//        out.append(String.format("\n<!-- Processing time: %dms -->\n", (int)(t0 * 1e-6)));
-
        return out.toString();
    }
 }
--- a/src/java/txtmark/Utils.java
+++ b/src/java/txtmark/Utils.java
@ -428,4 +428,98 @@ class Utils
            }
        }
    }
+    
+    /**
+     * Extracts the tag from an XML element.
+     * 
+     * @param out The StringBuilder to write to.
+     * @param in Input StringBuilder.
+     */
+    public static void getXMLTag(final StringBuilder out, final StringBuilder in)
+    {
+        int pos = 1;
+        if(in.charAt(1) == '/')
+            pos++;
+        while(Character.isLetterOrDigit(in.charAt(pos)))
+        {
+            out.append(in.charAt(pos++));
+        }
+    }
+    
+    /**
+     * Extracts the tag from an XML element.
+     * 
+     * @param out The StringBuilder to write to.
+     * @param in Input String.
+     */
+    public static void getXMLTag(final StringBuilder out, final String in)
+    {
+        int pos = 1;
+        if(in.charAt(1) == '/')
+            pos++;
+        while(Character.isLetterOrDigit(in.charAt(pos)))
+        {
+            out.append(in.charAt(pos++));
+        }
+    }
+
+    /**
+     * Reads an XML element.
+     * 
+     * @param out The StringBuilder to write to.
+     * @param in Input String.
+     * @param start Starting position.
+     * @return The new position or -1 if this is no valid XML element.
+     */
+    public static int readXML(final StringBuilder out, final String in, final int start)
+    {
+        int pos;
+        if(in.charAt(start + 1) == '/')
+        {
+            out.append("</");
+            pos = start + 2;
+        }
+        else
+        {
+            out.append('<');
+            pos = start + 1;
+        }
+        pos = readRawUntil(out, in, pos, ' ', '/', '>');
+        if(pos == -1) return -1;
+        pos = skipSpaces(in, pos);
+        if(Character.isLetter(in.charAt(pos)))
+        {
+            while(in.charAt(pos) != '/' && in.charAt(pos) != '>')
+            {
+                out.append(' ');
+                pos = readRawUntil(out, in, pos, ' ', '=');
+                if(pos == -1) return -1;
+                pos = skipSpaces(in, pos);
+                if(pos == -1) return -1;
+                out.append('=');
+                pos = skipSpaces(in, pos + 1);
+                if(pos == -1) return -1;
+                final char lim = in.charAt(pos);
+                if(lim != '\'' && lim != '"') return -1;
+                out.append(lim);
+                pos = readRawUntil(out, in, pos + 1, lim);
+                if(pos == -1) return -1;
+                out.append(lim);
+                pos = skipSpaces(in, pos + 1);
+                if(pos == -1) return -1;
+            }
+            
+        }
+        if(in.charAt(pos) == '/')
+        {
+            out.append('/');
+            pos++;
+        }
+        if(in.charAt(pos) == '>')
+        {
+            out.append('>');
+            return pos;
+        }
+        return -1;
+    }
 }