mirror of
https://github.com/lucaspalomodevelop/txtmark.git
synced 2026-03-13 07:39:37 +00:00
Version 0.5
Added devdoc ant target, added HTML block processing. Implemented setting of user Decorator.
This commit is contained in:
parent
e08ff546e5
commit
f977df2c97
216
README.md
216
README.md
@ -5,58 +5,33 @@ See LICENSE.txt for licensing information.
|
||||
***
|
||||
|
||||
txtmark is yet another markdown processor for the JVM.
|
||||
... and is *damn* fast^^
|
||||
|
||||
Again this is a WIP release.
|
||||
* It is easy to use:
|
||||
|
||||
TODO:
|
||||
String result = txtmark.Processor.process("This is ***TXTMARK***");
|
||||
|
||||
* It is fast (see below)
|
||||
... well, it is the fastest markdown processor on the JVM right now.
|
||||
|
||||
- block-level HTML element processing
|
||||
- code clean-ups
|
||||
- see below (markdown test suite)
|
||||
This is a RC version, tagged v0.5
|
||||
|
||||
### MarkdownTest results so far
|
||||
For an in-depth explanation of the markdown syntax have a look at [daringfireball.net](http://daringfireball.net/projects/markdown/syntax).
|
||||
|
||||
|
||||
### Markdown conformity
|
||||
|
||||
***
|
||||
|
||||
Based on [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/downloads/MarkdownTest_1.0_2007-05-09.tgz)
|
||||
Txtmark passes all tests inside [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/downloads/MarkdownTest_1.0_2007-05-09.tgz)
|
||||
except of two:
|
||||
|
||||
* Amps and angle encoding ... OK
|
||||
* Auto links ... OK
|
||||
* Backslash escapes ... OK
|
||||
* Blockquotes with code blocks ... OK
|
||||
* Code Blocks ... OK
|
||||
* Code Spans ... OK
|
||||
* Hard-wrapped paragraphs with list-like lines ... OK
|
||||
* Horizontal rules ... OK
|
||||
* Images ... FAILED (see [Note 1](#note0))
|
||||
* Inline HTML (Advanced) ... FAILED (see [Note 2](#note1))
|
||||
* Inline HTML (Simple) ... FAILED (see [Note 2](#note1))
|
||||
* Inline HTML comments ... FAILED (see [Note 2](#note1))
|
||||
* Links, inline style ... OK
|
||||
* Links, reference style ... OK
|
||||
* Links, shortcut references ... OK
|
||||
* Literal quotes in titles ... FAILED (see [Note 3](#note2))
|
||||
* Markdown Documentation - Basics ... OK
|
||||
* Markdown Documentation - Syntax ... FAILED (see [Note 2](#note1))
|
||||
* Nested blockquotes ... OK
|
||||
* Ordered and unordered lists ... OK
|
||||
* Strong and em together ... OK
|
||||
* Tabs ... OK
|
||||
* Tidyness ... OK
|
||||
1. **Images.text**
|
||||
|
||||
17 passed; 6 failed.
|
||||
|
||||
***
|
||||
|
||||
1. <h4 id="note0">Note:</h4>
|
||||
Fails because Txtmark doesn't produce empty 'title' image attributes.
|
||||
(IMHO: Images ... OK)
|
||||
|
||||
2. <h4 id="note1">Note:</h4>
|
||||
Fails because of currently missing block-level HTML identification.
|
||||
2. **Literal quotes in titles.text**
|
||||
|
||||
3. <h4 id="note2">Note:</h4>
|
||||
What the frell ... this test will continue to FAIL.
|
||||
Sorry, but using unescaped `"` in a title which should be surrounded
|
||||
by `"` is unacceptable for me ;)
|
||||
@ -74,142 +49,55 @@ Based on [MarkdownTest\_1.0\_2007-05-09](http://daringfireball.net/projects/down
|
||||
and Txtmark will produce the correct result.
|
||||
(IMHO: Literal quotes in titles ... OK)
|
||||
|
||||
|
||||
### Performance comparison of markdown processors for the JVM
|
||||
|
||||
***
|
||||
---
|
||||
|
||||
Based on [this](http://henkelmann.eu/2011/01/10/performance_comparison_of_markdown_processor_for_the_jvm).
|
||||
Txtmark's results should not be considered final, they may change in either direction
|
||||
during the upcoming releases.
|
||||
But I think you get the point.
|
||||
Based on [this benchmark suite](http://henkelmann.eu/2011/01/10/performance_comparison_of_markdown_processor_for_the_jvm).
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Test</th>
|
||||
<th colspan="2">Actuarius</th>
|
||||
<th colspan="2">PegDown</th>
|
||||
<th colspan="2">Knockoff</th>
|
||||
<th colspan="2">Txtmark</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td>1st Run (ms)</td><td>2nd Run (ms)</td>
|
||||
<td>1st Run (ms)</td><td>2nd Run (ms)</td>
|
||||
<td>1st Run (ms)</td><td>2nd Run (ms)</td>
|
||||
<td>1st Run (ms)</td><td>2nd Run (ms)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Plain Paragraphs</td>
|
||||
<td>969</td><td>300</td>
|
||||
<td>1468</td><td>956</td>
|
||||
<td>564</td><td>362</td>
|
||||
<td>114</td><td>45</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Word Emphasized</td>
|
||||
<td>1409</td><td>884</td>
|
||||
<td>1435</td><td>1417</td>
|
||||
<td>13161</td><td>12921</td>
|
||||
<td>52</td><td>44</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Word Strong</td>
|
||||
<td>1087</td><td>978</td>
|
||||
<td>1125</td><td>1100</td>
|
||||
<td>9717</td><td>9586</td>
|
||||
<td>40</td><td>46</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Word Inline Code</td>
|
||||
<td>351</td><td>278</td>
|
||||
<td>1047</td><td>1037</td>
|
||||
<td>9499</td><td>9245</td>
|
||||
<td>45</td><td>35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Word a Fast Link</td>
|
||||
<td>2123</td><td>1580</td>
|
||||
<td>523</td><td>512</td>
|
||||
<td>4086</td><td>3470</td>
|
||||
<td>78</td><td>50</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Word Consisting of Special XML Chars</td>
|
||||
<td>3981</td><td>3973</td>
|
||||
<td>3341</td><td>3055</td>
|
||||
<td>372</td><td>319</td>
|
||||
<td>1842</td><td>1841</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Word wrapped in manual HTML tags</td>
|
||||
<td>3073</td><td>2907</td>
|
||||
<td>901</td><td>888</td>
|
||||
<td>3826</td><td>3529</td>
|
||||
<td>492</td><td>453</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every Line with a manual line break</td>
|
||||
<td>437</td><td>583</td>
|
||||
<td>1370</td><td>1363</td>
|
||||
<td>1352</td><td>957</td>
|
||||
<td>42</td><td>44</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every word with a full link</td>
|
||||
<td>398</td><td>266</td>
|
||||
<td>1057</td><td>1014</td>
|
||||
<td>1755</td><td>1689</td>
|
||||
<td>88</td><td>47</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every word with a full image</td>
|
||||
<td>228</td><td>139</td>
|
||||
<td>1110</td><td>1101</td>
|
||||
<td>1917</td><td>1773</td>
|
||||
<td>37</td><td>33</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every word with a reference link</td>
|
||||
<td>9726</td><td>9146</td>
|
||||
<td>19019</td><td>20044</td>
|
||||
<td>117632</td><td>118306</td>
|
||||
<td>1431</td><td>1240</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every block a quote</td>
|
||||
<td>431</td><td>205</td>
|
||||
<td>1366</td><td>1328</td>
|
||||
<td>474</td><td>464</td>
|
||||
<td>35</td><td>36</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every block a codeblock</td>
|
||||
<td>68</td><td>84</td>
|
||||
<td>387</td><td>377</td>
|
||||
<td>161</td><td>169</td>
|
||||
<td>61</td><td>19</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Every block a list</td>
|
||||
<td>863</td><td>912</td>
|
||||
<td>1735</td><td>1762</td>
|
||||
<td>602</td><td>686</td>
|
||||
<td>46</td><td>36</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>All tests together</td>
|
||||
<td>3319</td><td>2959</td>
|
||||
<td>5245</td><td>5305</td>
|
||||
<td>10252</td><td>9751</td>
|
||||
<td>222</td><td>173</td>
|
||||
</tr>
|
||||
<tr><th>Test</th><th colspan="2">Actuarius</th><th colspan="2">PegDown</th><th colspan="2">Knockoff</th><th colspan="2">Txtmark</th></tr>
|
||||
<tr><td></td><td>1st Run (ms)</td><td>2nd Run (ms)</td><td>1st Run (ms)</td><td>2nd Run (ms)</td><td>1st Run (ms)</td><td>2nd Run (ms)</td><td>1st Run (ms)</td><td>2nd Run (ms)</td></tr>
|
||||
<tr><td>Plain Paragraphs</td><td>887</td><td>461</td><td>2455</td><td>2236</td><td>764</td><td>568</td><td>89</td><td>47</td></tr>
|
||||
<tr><td>Every Word Emphasized</td><td>2220</td><td>2077</td><td>3411</td><td>3406</td><td>30503</td><td>30514</td><td>72</td><td>66</td></tr>
|
||||
<tr><td>Every Word Strong</td><td>2384</td><td>2270</td><td>2456</td><td>2466</td><td>23639</td><td>23577</td><td>62</td><td>57</td></tr>
|
||||
<tr><td>Every Word Inline Code</td><td>824</td><td>804</td><td>2337</td><td>2237</td><td>23506</td><td>23622</td><td>54</td><td>55</td></tr>
|
||||
<tr><td>Every Word a Fast Link</td><td>3942</td><td>3738</td><td>1164</td><td>1159</td><td>8621</td><td>8595</td><td>89</td><td>68</td></tr>
|
||||
<tr><td>Every Word Consisting of Special XML Chars</td><td>9393</td><td>9312</td><td>7544</td><td>7314</td><td>801</td><td>608</td><td>3587</td><td>3614</td></tr>
|
||||
<tr><td>Every Word wrapped in manual HTML tags</td><td>6843</td><td>6828</td><td>1850</td><td>1859</td><td>8699</td><td>8692</td><td>1169</td><td>1154</td></tr>
|
||||
<tr><td>Every Line with a manual line break</td><td>859</td><td>724</td><td>2968</td><td>2946</td><td>2171</td><td>1990</td><td>58</td><td>56</td></tr>
|
||||
<tr><td>Every word with a full link</td><td>528</td><td>501</td><td>2252</td><td>2280</td><td>3513</td><td>3512</td><td>66</td><td>60</td></tr>
|
||||
<tr><td>Every word with a full image</td><td>395</td><td>374</td><td>2463</td><td>2569</td><td>3757</td><td>3726</td><td>56</td><td>55</td></tr>
|
||||
<tr><td>Every word with a reference link</td><td>19208</td><td>19035</td><td>39183</td><td>38710</td><td>243450</td><td>244943</td><td>1826</td><td>1798</td></tr>
|
||||
<tr><td>Every block a quote</td><td>465</td><td>449</td><td>2687</td><td>2684</td><td>978</td><td>977</td><td>48</td><td>48</td></tr>
|
||||
<tr><td>Every block a codeblock</td><td>151</td><td>134</td><td>597</td><td>601</td><td>270</td><td>262</td><td>36</td><td>27</td></tr>
|
||||
<tr><td>Every block a list</td><td>1209</td><td>1106</td><td>3448</td><td>3432</td><td>1411</td><td>1368</td><td>52</td><td>60</td></tr>
|
||||
<tr><td>All tests together</td><td>6062</td><td>6042</td><td>11556</td><td>11589</td><td>19827</td><td>19637</td><td>452</td><td>448</td></tr>
|
||||
</table>
|
||||
|
||||
* Q: Why is Txtmark so slow when it comes to XML entities?
|
||||
* A: Because Txtmark does some sanity checks on XML entities to make sure
|
||||
it outputs valid XML. For example:
|
||||
|
||||
&cutie;
|
||||
|
||||
will produce (when processed with Markdown and most other markdown processors):
|
||||
|
||||
&cutie;
|
||||
|
||||
and
|
||||
|
||||
&cutie;
|
||||
|
||||
when processed with Txtmark.
|
||||
|
||||
Tested versions:
|
||||
[Actuarius] version: 0.2
|
||||
[PegDown] version: 0.8.5.4
|
||||
[Knockoff] version: 0.7.3-15
|
||||
|
||||
***
|
||||
---
|
||||
|
||||
[Markdown] is copyright (c) 2004 by John Gruber
|
||||
[Markdown]: http://daringfireball.net/projects/markdown/
|
||||
|
||||
20
build.xml
20
build.xml
@ -19,8 +19,26 @@
|
||||
<javac srcdir="src/java" destdir="build/classes" target="1.6" includeAntRuntime="false"/>
|
||||
</target>
|
||||
|
||||
<target name="doc" description="Generates the JavaDoc">
|
||||
<target name="doc" description="Generates the user JavaDoc">
|
||||
<mkdir dir="doc"/>
|
||||
<delete>
|
||||
<fileset dir="doc" includes="**/*.*"/>
|
||||
</delete>
|
||||
<javadoc
|
||||
sourcepath="src/java"
|
||||
access="public"
|
||||
author="true"
|
||||
destdir="doc"
|
||||
encoding="UTF-8"
|
||||
charset="UTF-8"
|
||||
link="http://download.oracle.com/javase/6/docs/api/"/>
|
||||
</target>
|
||||
|
||||
<target name="devdoc" description="Generates the developer JavaDoc">
|
||||
<mkdir dir="doc"/>
|
||||
<delete>
|
||||
<fileset dir="doc" includes="**/*.*"/>
|
||||
</delete>
|
||||
<javadoc
|
||||
sourcepath="src/java"
|
||||
access="private"
|
||||
|
||||
@ -16,12 +16,12 @@ class Emitter
|
||||
/** Link references. */
|
||||
private final HashMap<String, LinkRef> linkRefs = new HashMap<String, LinkRef>();
|
||||
/** The Decorator. */
|
||||
private final Decorator decorator = new DefaultDecorator();
|
||||
private Decorator decorator;
|
||||
|
||||
/** Constructor. */
|
||||
public Emitter()
|
||||
public Emitter(final Decorator decorator)
|
||||
{
|
||||
//
|
||||
this.decorator = decorator;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -360,62 +360,7 @@ class Emitter
|
||||
if(start + 2 < in.length())
|
||||
{
|
||||
temp.setLength(0);
|
||||
temp.append('<');
|
||||
pos = start + 1;
|
||||
if(in.charAt(pos) == '/')
|
||||
{
|
||||
temp.append('/');
|
||||
pos++;
|
||||
}
|
||||
if(pos < in.length() && Character.isLetter(in.charAt(pos)))
|
||||
{
|
||||
pos = Utils.readUntil(temp, in, pos, ' ', '/', '>');
|
||||
if(pos > 0)
|
||||
{
|
||||
while(pos < in.length() && in.charAt(pos) == ' ')
|
||||
{
|
||||
pos = Utils.skipSpaces(in, pos);
|
||||
if(pos == -1)
|
||||
break;
|
||||
if(in.charAt(pos) == '/')
|
||||
{
|
||||
temp.append(" /");
|
||||
pos++;
|
||||
break;
|
||||
}
|
||||
if(in.charAt(pos) == '>')
|
||||
{
|
||||
break;
|
||||
}
|
||||
temp.append(' ');
|
||||
if(!Character.isLetter(in.charAt(pos)))
|
||||
{
|
||||
pos = -1;
|
||||
break;
|
||||
}
|
||||
pos = Utils.readUntil(temp, in, pos, '=');
|
||||
if(pos == -1)
|
||||
break;
|
||||
pos = Utils.readUntil(temp, in, pos, '\'', '"');
|
||||
if(pos == -1)
|
||||
break;
|
||||
final char lim = in.charAt(pos);
|
||||
temp.append(lim);
|
||||
pos++;
|
||||
pos = Utils.readRawUntil(temp, in, pos, lim);
|
||||
if(pos == -1)
|
||||
break;
|
||||
temp.append(lim);
|
||||
pos++;
|
||||
}
|
||||
if(pos > 0 && pos < in.length() && in.charAt(pos) == '>')
|
||||
{
|
||||
temp.append('>');
|
||||
out.append(temp);
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Utils.readXML(out, in, start);
|
||||
}
|
||||
|
||||
return -1;
|
||||
@ -712,8 +657,7 @@ class Emitter
|
||||
{
|
||||
out.append(line.value);
|
||||
}
|
||||
if(line.next != null)
|
||||
out.append('\n');
|
||||
out.append('\n');
|
||||
line = line.next;
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,6 +11,7 @@ package txtmark;
|
||||
*/
|
||||
enum HTMLElement
|
||||
{
|
||||
NONE,
|
||||
a, abbr, acronym, address, applet, area,
|
||||
b, base, basefont, bdo, big, blockquote, body, br, button,
|
||||
caption, cite, code, col, colgroup,
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
*/
|
||||
package txtmark;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* This class represents a text line.
|
||||
*
|
||||
@ -26,7 +28,8 @@ class Line
|
||||
public Line previous = null, next = null;
|
||||
/** Is previous/next line empty? */
|
||||
public boolean prevEmpty, nextEmpty;
|
||||
|
||||
/** Final line of a XML block. */
|
||||
public Line xmlEndLine;
|
||||
/** Constructor. */
|
||||
public Line()
|
||||
{
|
||||
@ -243,6 +246,12 @@ class Line
|
||||
return LineType.OLIST;
|
||||
}
|
||||
|
||||
if(this.value.charAt(this.leading) == '<')
|
||||
{
|
||||
if(this.checkHTML())
|
||||
return LineType.XML;
|
||||
}
|
||||
|
||||
if(this.next != null && !this.next.isEmpty)
|
||||
{
|
||||
if((this.next.value.charAt(0) == '-') && (this.next.countChars('-') > 0))
|
||||
@ -253,4 +262,133 @@ class Line
|
||||
|
||||
return LineType.OTHER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an XML comment. Sets <code>xmlEndLine</code>.
|
||||
*
|
||||
* @param firstLine The Line to start reading from.
|
||||
* @param start The starting position.
|
||||
* @return The new position or -1 if it is no valid comment.
|
||||
*/
|
||||
private int readXMLComment(final Line firstLine, final int start)
|
||||
{
|
||||
Line line = firstLine;
|
||||
if(start + 3 < line.value.length())
|
||||
{
|
||||
if(line.value.charAt(2) == '-' && line.value.charAt(3) == '-')
|
||||
{
|
||||
int pos = start + 4;
|
||||
while(line != null)
|
||||
{
|
||||
while(pos < line.value.length() && line.value.charAt(pos) != '-')
|
||||
{
|
||||
pos++;
|
||||
}
|
||||
if(pos == line.value.length())
|
||||
{
|
||||
line = line.next;
|
||||
pos = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(pos + 2 < line.value.length())
|
||||
{
|
||||
if(line.value.charAt(pos + 1) == '-' && line.value.charAt(pos + 2) == '>')
|
||||
{
|
||||
this.xmlEndLine = line;
|
||||
return pos + 3;
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a valid HTML block. Sets <code>xmlEndLine</code>.
|
||||
*
|
||||
* @return <code>true</code> if it is a valid block.
|
||||
*/
|
||||
private boolean checkHTML()
|
||||
{
|
||||
final LinkedList<String> tags = new LinkedList<String>();
|
||||
final StringBuilder temp = new StringBuilder();
|
||||
int pos = this.leading;
|
||||
if(this.value.charAt(this.leading + 1) == '!')
|
||||
{
|
||||
if(this.readXMLComment(this, this.leading) > 0)
|
||||
return true;
|
||||
}
|
||||
pos = Utils.readXML(temp, this.value, this.leading);
|
||||
String element, tag;
|
||||
if(pos > -1)
|
||||
{
|
||||
element = temp.toString();
|
||||
temp.setLength(0);
|
||||
Utils.getXMLTag(temp, element);
|
||||
tag = temp.toString().toLowerCase();
|
||||
if(!HTML.isHtmlBlockElement(tag))
|
||||
return false;
|
||||
if(tag.equals("hr"))
|
||||
{
|
||||
this.xmlEndLine = this;
|
||||
return true;
|
||||
}
|
||||
tags.add(tag);
|
||||
|
||||
Line line = this;
|
||||
while(line != null)
|
||||
{
|
||||
while(pos < line.value.length() && line.value.charAt(pos) != '<')
|
||||
{
|
||||
pos++;
|
||||
}
|
||||
if(pos >= line.value.length())
|
||||
{
|
||||
line = line.next;
|
||||
pos = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
temp.setLength(0);
|
||||
final int newPos = Utils.readXML(temp, line.value, pos);
|
||||
if(newPos > 0)
|
||||
{
|
||||
element = temp.toString();
|
||||
temp.setLength(0);
|
||||
Utils.getXMLTag(temp, element);
|
||||
tag = temp.toString().toLowerCase();
|
||||
if(HTML.isHtmlBlockElement(tag) && !tag.equals("hr"))
|
||||
{
|
||||
if(element.charAt(1) == '/')
|
||||
{
|
||||
if(!tags.getLast().equals(tag))
|
||||
return false;
|
||||
tags.removeLast();
|
||||
}
|
||||
else
|
||||
{
|
||||
tags.addLast(tag);
|
||||
}
|
||||
}
|
||||
if(tags.size() == 0)
|
||||
{
|
||||
this.xmlEndLine = line;
|
||||
break;
|
||||
}
|
||||
pos = newPos;
|
||||
}
|
||||
else
|
||||
{
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return tags.size() == 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -24,5 +24,7 @@ enum LineType
|
||||
/** A block quote. */
|
||||
BQUOTE,
|
||||
/** A horizontal ruler. */
|
||||
HR
|
||||
HR,
|
||||
/** Start of a XML block. */
|
||||
XML
|
||||
}
|
||||
|
||||
@ -23,20 +23,21 @@ public class Processor
|
||||
/** The reader. */
|
||||
private final Reader reader;
|
||||
/** The emitter. */
|
||||
private Emitter emitter = new Emitter();
|
||||
private final Emitter emitter;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param reader The input reader.
|
||||
*/
|
||||
private Processor(Reader reader)
|
||||
private Processor(Reader reader, Decorator decorator)
|
||||
{
|
||||
this.reader = reader;
|
||||
this.emitter = new Emitter(decorator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input String into XHTML.
|
||||
* Transforms an input String into XHTML using the default Decorator.
|
||||
*
|
||||
* @param input The String to process.
|
||||
* @return The processed String.
|
||||
@ -48,7 +49,19 @@ public class Processor
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input file into XHTML using UTF-8 encoding.
|
||||
* Transforms an input String into XHTML.
|
||||
*
|
||||
* @param input The String to process.
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final String input, final Decorator decorator) throws IOException
|
||||
{
|
||||
return process(new StringReader(input), decorator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input file into XHTML using UTF-8 encoding and the default Decorator.
|
||||
*
|
||||
* @param file The File to process.
|
||||
* @return The processed String.
|
||||
@ -60,7 +73,19 @@ public class Processor
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input file into XHTML.
|
||||
* Transforms an input file into XHTML using UTF-8 encoding.
|
||||
*
|
||||
* @param file The File to process.
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final File file, final Decorator decorator) throws IOException
|
||||
{
|
||||
return process(file, "UTF-8", decorator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input file into XHTML using the default Decorator.
|
||||
*
|
||||
* @param file The File to process.
|
||||
* @param encoding The encoding to use.
|
||||
@ -69,13 +94,37 @@ public class Processor
|
||||
*/
|
||||
public static String process(final File file, final String encoding) throws IOException
|
||||
{
|
||||
final Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding));
|
||||
final Processor p = new Processor(r);
|
||||
final String ret = p.process();
|
||||
r.close();
|
||||
return process(file, encoding, new DefaultDecorator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input file into XHTML.
|
||||
*
|
||||
* @param file The File to process.
|
||||
* @param encoding The encoding to use.
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final File file, final String encoding, final Decorator decorator) throws IOException
|
||||
{
|
||||
final FileInputStream input = new FileInputStream(file);
|
||||
final String ret = process(input, encoding, decorator);
|
||||
input.close();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input stream into XHTML using UTF-8 encoding using the default Decorator.
|
||||
*
|
||||
* @param input The InputStream to process.
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final InputStream input) throws IOException
|
||||
{
|
||||
return process(input, "UTF-8", new DefaultDecorator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input stream into XHTML using UTF-8 encoding.
|
||||
*
|
||||
@ -83,9 +132,23 @@ public class Processor
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final InputStream input) throws IOException
|
||||
public static String process(final InputStream input, final Decorator decorator) throws IOException
|
||||
{
|
||||
return process(input, "UTF-8");
|
||||
return process(input, "UTF-8", decorator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input stream into XHTML using the default Decorator.
|
||||
*
|
||||
* @param input The InputStream to process.
|
||||
* @param encoding The encoding to use.
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final InputStream input, final String encoding) throws IOException
|
||||
{
|
||||
final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding)), new DefaultDecorator());
|
||||
return p.process();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -96,9 +159,24 @@ public class Processor
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final InputStream input, final String encoding) throws IOException
|
||||
public static String process(final InputStream input, final String encoding, final Decorator decorator) throws IOException
|
||||
{
|
||||
final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding)));
|
||||
final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding)), decorator);
|
||||
return p.process();
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an input stream into XHTML using the default Decorator.
|
||||
*
|
||||
* @param reader The Reader to process.
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final Reader reader) throws IOException
|
||||
{
|
||||
final Processor p = new Processor(
|
||||
!(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader,
|
||||
new DefaultDecorator());
|
||||
return p.process();
|
||||
}
|
||||
|
||||
@ -109,9 +187,11 @@ public class Processor
|
||||
* @return The processed String.
|
||||
* @throws IOException if an IO error occurs
|
||||
*/
|
||||
public static String process(final Reader reader) throws IOException
|
||||
public static String process(final Reader reader, final Decorator decorator) throws IOException
|
||||
{
|
||||
final Processor p = new Processor(!(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader);
|
||||
final Processor p = new Processor(
|
||||
!(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader,
|
||||
decorator);
|
||||
return p.process();
|
||||
}
|
||||
|
||||
@ -319,7 +399,9 @@ public class Processor
|
||||
final LineType t = line.getLineType();
|
||||
if(listMode && (t == LineType.OLIST || t == LineType.ULIST))
|
||||
break;
|
||||
if(t == LineType.HEADLINE || t == LineType.HEADLINE1 || t == LineType.HEADLINE2 || t == LineType.HR || t == LineType.BQUOTE)
|
||||
if(t == LineType.HEADLINE || t == LineType.HEADLINE1 || t == LineType.HEADLINE2
|
||||
|| t == LineType.HR || t == LineType.BQUOTE
|
||||
|| t == LineType.XML)
|
||||
break;
|
||||
line = line.next;
|
||||
}
|
||||
@ -349,6 +431,16 @@ public class Processor
|
||||
block.type = BlockType.CODE;
|
||||
block.removeSurroundingEmptyLines();
|
||||
break;
|
||||
case XML:
|
||||
if(line.previous != null)
|
||||
{
|
||||
// FIXME ... this looks wrong
|
||||
root.split(line.previous);
|
||||
}
|
||||
root.split(line.xmlEndLine).type = BlockType.XML;
|
||||
root.removeLeadingEmptyLines();
|
||||
line = root.lines;
|
||||
break;
|
||||
case BQUOTE:
|
||||
while(line != null)
|
||||
{
|
||||
@ -366,6 +458,7 @@ public class Processor
|
||||
case HR:
|
||||
if(line.previous != null)
|
||||
{
|
||||
// FIXME ... this looks wrong
|
||||
root.split(line.previous);
|
||||
}
|
||||
root.split(line).type = BlockType.RULER;
|
||||
@ -442,8 +535,6 @@ public class Processor
|
||||
{
|
||||
final StringBuilder out = new StringBuilder();
|
||||
|
||||
// long t0 = System.nanoTime();
|
||||
|
||||
final Block parent = this.readLines();
|
||||
parent.removeSurroundingEmptyLines();
|
||||
|
||||
@ -455,9 +546,6 @@ public class Processor
|
||||
block = block.next;
|
||||
}
|
||||
|
||||
// t0 = System.nanoTime() - t0;
|
||||
// out.append(String.format("\n<!-- Processing time: %dms -->\n", (int)(t0 * 1e-6)));
|
||||
|
||||
return out.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@ -428,4 +428,98 @@ class Utils
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the tag from an XML element.
|
||||
*
|
||||
* @param out The StringBuilder to write to.
|
||||
* @param in Input StringBuilder.
|
||||
*/
|
||||
public static void getXMLTag(final StringBuilder out, final StringBuilder in)
|
||||
{
|
||||
int pos = 1;
|
||||
if(in.charAt(1) == '/')
|
||||
pos++;
|
||||
while(Character.isLetterOrDigit(in.charAt(pos)))
|
||||
{
|
||||
out.append(in.charAt(pos++));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the tag from an XML element.
|
||||
*
|
||||
* @param out The StringBuilder to write to.
|
||||
* @param in Input String.
|
||||
*/
|
||||
public static void getXMLTag(final StringBuilder out, final String in)
|
||||
{
|
||||
int pos = 1;
|
||||
if(in.charAt(1) == '/')
|
||||
pos++;
|
||||
while(Character.isLetterOrDigit(in.charAt(pos)))
|
||||
{
|
||||
out.append(in.charAt(pos++));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an XML element.
|
||||
*
|
||||
* @param out The StringBuilder to write to.
|
||||
* @param in Input String.
|
||||
* @param start Starting position.
|
||||
* @return The new position or -1 if this is no valid XML element.
|
||||
*/
|
||||
public static int readXML(final StringBuilder out, final String in, final int start)
|
||||
{
|
||||
int pos;
|
||||
if(in.charAt(start + 1) == '/')
|
||||
{
|
||||
out.append("</");
|
||||
pos = start + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
out.append('<');
|
||||
pos = start + 1;
|
||||
}
|
||||
pos = readRawUntil(out, in, pos, ' ', '/', '>');
|
||||
if(pos == -1) return -1;
|
||||
pos = skipSpaces(in, pos);
|
||||
if(Character.isLetter(in.charAt(pos)))
|
||||
{
|
||||
while(in.charAt(pos) != '/' && in.charAt(pos) != '>')
|
||||
{
|
||||
out.append(' ');
|
||||
pos = readRawUntil(out, in, pos, ' ', '=');
|
||||
if(pos == -1) return -1;
|
||||
pos = skipSpaces(in, pos);
|
||||
if(pos == -1) return -1;
|
||||
out.append('=');
|
||||
pos = skipSpaces(in, pos + 1);
|
||||
if(pos == -1) return -1;
|
||||
final char lim = in.charAt(pos);
|
||||
if(lim != '\'' && lim != '"') return -1;
|
||||
out.append(lim);
|
||||
pos = readRawUntil(out, in, pos + 1, lim);
|
||||
if(pos == -1) return -1;
|
||||
out.append(lim);
|
||||
pos = skipSpaces(in, pos + 1);
|
||||
if(pos == -1) return -1;
|
||||
}
|
||||
|
||||
}
|
||||
if(in.charAt(pos) == '/')
|
||||
{
|
||||
out.append('/');
|
||||
pos++;
|
||||
}
|
||||
if(in.charAt(pos) == '>')
|
||||
{
|
||||
out.append('>');
|
||||
return pos;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user