commit 6543a1a55b89dd1031f9db68b5ee4c099c5ea9f0 Author: Rene' Jeschke Date: Thu Apr 14 20:18:51 2011 +0200 Initial commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b34e8a6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +build/ +doc/ +release/ +.settings +.classpath +.project \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README b/README new file mode 100644 index 0000000..aa1c4ee --- /dev/null +++ b/README @@ -0,0 +1,8 @@ +txtmark - Java markdown processor +Copyright (C) 2011 René Jeschke +See LICENSE.txt for licensing information. +------------------------------------------------------- + + +--- +Project link: https://github.com/rjeschke/txtmark \ No newline at end of file diff --git a/build.xml b/build.xml new file mode 100644 index 0000000..4ad3bfe --- /dev/null +++ b/build.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/java/txtmark/Block.java b/src/java/txtmark/Block.java new file mode 100644 index 0000000..f7fc187 --- /dev/null +++ b/src/java/txtmark/Block.java @@ -0,0 +1,187 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +class Block +{ + public BlockType type = BlockType.NONE; + public Line lines = null, lineTail = null; + public Block blocks = null, blockTail = null, blockParent = null; + public Block next = null, previous = null; + public int hlDepth = 0; + + public Block() + { + // + } + + public boolean hasLines() + { + return this.lines != null; + } + + public void removeSurroundingEmptyLines() + { + if(this.lines != null) + { + this.removeTrailingEmptyLines(); + this.removeLeadingEmptyLines(); + } + } + + public void transfromHeadline() + { + if(this.hlDepth > 0) + return; + int level = 0; + final Line line = this.lines; + if(line.isEmpty) + return; + int start = line.leading; + while(start < line.value.length() && line.value.charAt(start) == '#') + { + level++; + start++; + } + while(start < line.value.length() && line.value.charAt(start) == ' ') + start++; + if(start >= line.value.length()) + { + line.setEmpty(); + } + else + { + int end = line.value.length() - line.trailing - 1; + while(line.value.charAt(end) == '#') + end--; + while(line.value.charAt(end) == ' ') + end--; + line.value = line.value.substring(start, end + 1); + line.leading = line.trailing = 0; + } + this.hlDepth = level; + } + + public void removeListIndent() + { + Line line = this.lines; + while(line != null) + { + if(!line.isEmpty) + { + switch(line.getLineType()) + { + case ULIST: + line.value = line.value.substring(line.leading + 2); + break; + case OLIST: + line.value = line.value.substring(line.value.indexOf('.') + 2); + break; + default: + line.value = line.value.substring(Math.min(line.leading, 4)); + break; + } + line.initLeading(); + } + line = line.next; + } + } + + public void removeBlockQuotePrefix() + { + Line line = this.lines; + while(line != null) + { + if(!line.isEmpty) + { + if(line.value.charAt(line.leading) == '>') + { + int rem = line.leading + 1; + if(line.leading + 1 < line.value.length() && line.value.charAt(line.leading + 1) == ' ') + rem++; + line.value = line.value.substring(rem); + line.initLeading(); + } + } + line = line.next; + } + } + + public boolean removeLeadingEmptyLines() + { + boolean wasEmpty = false; + Line line = this.lines; + while(line != null && line.isEmpty) + { + this.removeLine(line); + line = this.lines; + wasEmpty = true; + } + return wasEmpty; + } + + public void removeTrailingEmptyLines() + { + Line line = this.lineTail; + while(line != null && line.isEmpty) + { + this.removeLine(line); + line = this.lineTail; + } + } + + public Block split(final Line line) + { + final Block block = new Block(); + + block.lines = this.lines; + block.lineTail = line; + this.lines = line.next; + line.next = null; + if(this.lines == null) + this.lineTail = null; + else + this.lines.previous = null; + + block.blockParent = this; + if(this.blocks == null) + this.blocks = this.blockTail = block; + else + { + block.previous = this.blockTail; + this.blockTail.next = block; + this.blockTail = block; + } + + return block; + } + + public void removeLine(final Line line) + { + if(line.previous == null) + this.lines = line.next; + else + line.previous.next = line.next; + if(line.next == null) + this.lineTail = line.previous; + else + line.next.previous = line.previous; + line.previous = line.next = null; + } + + public void appendLine(final Line line) + { + if(this.lineTail == null) + this.lines = this.lineTail = line; + else + { + this.lineTail.nextEmpty = line.isEmpty; + line.prevEmpty = this.lineTail.isEmpty; + line.previous = this.lineTail; + this.lineTail.next = line; + this.lineTail = line; + } + } +} diff --git a/src/java/txtmark/BlockType.java b/src/java/txtmark/BlockType.java new file mode 100644 index 0000000..46efca2 --- /dev/null +++ b/src/java/txtmark/BlockType.java @@ -0,0 +1,19 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +enum BlockType +{ + NONE, + BLOCKQUOTE, + CODE, + HEADLINE, + LIST_ITEM, + ORDERED_LIST, + PARAGRAPH, + RULER, + UNORDERED_LIST, + XML +} diff --git a/src/java/txtmark/Emitter.java b/src/java/txtmark/Emitter.java new file mode 100644 index 0000000..5666bc2 --- /dev/null +++ b/src/java/txtmark/Emitter.java @@ -0,0 +1,537 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +import java.util.HashMap; + +class Emitter +{ + private final HashMap linkRefs = new HashMap(); + + public Emitter() + { + // + } + + public void addLinkRef(final String key, final LinkRef linkRef) + { + this.linkRefs.put(key.toLowerCase(), linkRef); + } + + public void emit(final StringBuilder out, final Block root) + { + root.removeSurroundingEmptyLines(); + + switch(root.type) + { + case RULER: + out.append("
"); + return; + case NONE: + case XML: + break; + case HEADLINE: + out.append("'); + break; + case PARAGRAPH: + out.append("

"); + break; + case CODE: + out.append("

");
+            break;
+        case BLOCKQUOTE:
+            out.append("
"); + break; + case UNORDERED_LIST: + out.append("
    \n"); + break; + case ORDERED_LIST: + out.append("
      \n"); + break; + case LIST_ITEM: + out.append("
    1. "); + break; + } + + if(root.hasLines()) + { + this.emitLines(out, root); + } + else + { + Block block = root.blocks; + while(block != null) + { + this.emit(out, block); + block = block.next; + } + } + + switch(root.type) + { + case RULER: + case NONE: + case XML: + break; + case HEADLINE: + out.append("\n"); + break; + case PARAGRAPH: + out.append("

      \n"); + break; + case CODE: + out.append("
\n"); + break; + case BLOCKQUOTE: + out.append("\n"); + break; + case UNORDERED_LIST: + out.append("\n"); + break; + case ORDERED_LIST: + out.append("\n"); + break; + case LIST_ITEM: + out.append("\n"); + break; + } + } + + private void emitLines(final StringBuilder out, final Block block) + { + switch(block.type) + { + case CODE: + this.emitCodeLines(out, block.lines); + break; + case XML: + this.emitRawLines(out, block.lines); + break; + default: + this.emitMarkedLines(out, block.lines); + break; + } + } + + private void appendCode(final StringBuilder out, final String in, int start, int end) + { + for(int i = start; i < end; i++) + { + final char c; + switch(c = in.charAt(i)) + { + case '&': + out.append("&"); + break; + case '<': + out.append("<"); + break; + case '>': + out.append(">"); + break; + default: + out.append(c); + break; + } + } + } + + private int findToken(final String in, int start, MarkToken token) + { + int pos = start; + while(pos < in.length()) + { + if(this.getToken(in, pos) == token) + return pos; + pos++; + } + return -1; + } + + private int skipSpaces(final String in, int start) + { + int pos = start; + while(pos < in.length() && in.charAt(pos) == ' ' && in.charAt(pos) != '\n') + pos++; + return pos < in.length() && in.charAt(pos) != '\n' ? pos : -1; + } + + private int readUntil(final StringBuilder out, final String in, int start, char... end) + { + int pos = start; + while(pos < in.length() && in.charAt(pos) != '\n') + { + final char ch = in.charAt(pos); + if(ch == '\\' && pos + 1 < in.length()) + { + final char c; + switch(c = in.charAt(pos + 1)) + { + case '\\': + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case '#': + case '"': + case '\'': + case '.': + case '>': + case '*': + case '+': + case '-': + case '_': + case '!': + case '`': + out.append(c); + pos++; + break; + default: + out.append(ch); + break; + } + } + else + { + boolean endReached = false; + for(int n = 0; n < end.length; n++) + { + if(ch == end[n]) + { + endReached = true; + break; + } + } + if(endReached) + break; + out.append(ch); + } + pos++; + } + + final char ch = pos < in.length() ? in.charAt(pos) : '\n'; + for(int n = 0; n < end.length; n++) + { + if(ch == end[n]) + return pos; + } + return -1; + } + + private int checkLink(final StringBuilder out, final String in, int start, MarkToken token) + { + int pos = start + (token == MarkToken.LINK ? 1 : 2); + final StringBuilder temp = new StringBuilder(); + + temp.setLength(0); + pos = this.readUntil(temp, in, pos, ']'); + if(pos < start) + return -1; + + String name = temp.toString(), link = null, comment = null; + + pos++; + pos = this.skipSpaces(in, pos); + if(pos < start) + return -1; + if(in.charAt(pos) == '(') + { + pos++; + temp.setLength(0); + pos = this.readUntil(temp, in, pos, ' ', ')'); + if(pos < start) + return -1; + link = temp.toString(); + + if(in.charAt(pos) == ' ') + { + pos = this.skipSpaces(in, pos); + if(pos > start && in.charAt(pos) == '"') + { + pos++; + temp.setLength(0); + pos = this.readUntil(temp, in, pos, '"'); + if(pos < start) + return -1; + comment = temp.toString(); + pos++; + this.skipSpaces(link, pos); + } + } + if(in.charAt(pos) != ')') + return -1; + } + else if(in.charAt(pos) == '[') + { + pos++; + temp.setLength(0); + pos = this.readUntil(temp, in, pos, ']'); + if(pos < start) + return -1; + final String id = temp.length() > 0 ? temp.toString() : name; + final LinkRef lr = this.linkRefs.get(id.toLowerCase()); + if(lr != null) + { + link = lr.link; + comment = lr.title; + } + } + + if(link == null) + return -1; + + if(token == MarkToken.LINK) + { + out.append("'); + out.append(name); + out.append(""); + } + else + { + out.append("\"");"); + } + + return pos; + } + + private int recursiveEmitLine(final StringBuilder out, final String in, int start, MarkToken token) + { + int pos = start, a, b; + final StringBuilder temp = new StringBuilder(); + while(pos < in.length()) + { + final MarkToken mt = this.getToken(in, pos); + if(token != MarkToken.NONE && (mt == token || token == MarkToken.EM_STAR && mt == MarkToken.STRONG_STAR || token == MarkToken.EM_UNDERSCORE && mt == MarkToken.STRONG_UNDERSCORE)) + return pos; + + switch(mt) + { + case IMAGE: + case LINK: + temp.setLength(0); + b = this.checkLink(temp, in, pos, mt); + if(b > 0) + { + out.append(temp); + pos = b; + } + else + { + out.append(in.charAt(pos)); + } + break; + case EM_STAR: + case EM_UNDERSCORE: + temp.setLength(0); + b = this.recursiveEmitLine(temp, in, pos + 1, mt); + if(b > 0) + { + out.append(""); + out.append(temp); + out.append(""); + pos = b; + } + else + { + out.append(in.charAt(pos)); + } + break; + case STRONG_STAR: + case STRONG_UNDERSCORE: + temp.setLength(0); + b = this.recursiveEmitLine(temp, in, pos + 2, mt); + if(b > 0) + { + out.append(""); + out.append(temp); + out.append(""); + pos = b + 1; + } + else + { + out.append(in.charAt(pos)); + } + break; + case CODE_SINGLE: + case CODE_DOUBLE: + a = pos + (mt == MarkToken.CODE_DOUBLE ? 2 : 1); + b = this.findToken(in, a, mt); + if(b > 0) + { + out.append(""); + this.appendCode(out, in, a, b); + out.append(""); + pos = b + (mt == MarkToken.CODE_DOUBLE ? 1 : 0); + } + else + { + out.append(in.charAt(pos)); + } + break; + case ESCAPE: + pos++; + //$FALL-THROUGH$ + default: + out.append(in.charAt(pos)); + break; + } + pos++; + } + return -1; + } + + private MarkToken getToken(final String in, final int pos) + { + final char c0 = pos > 0 ? in.charAt(pos - 1) : ' '; + final char c = in.charAt(pos); + final char c1 = pos + 1 < in.length() ? in.charAt(pos + 1) : ' '; + final char c2 = pos + 2 < in.length() ? in.charAt(pos + 2) : ' '; + + switch(c) + { + case '*': + if(c1 == '*') + { + return c0 != ' ' || c2 != ' ' ? MarkToken.STRONG_STAR : MarkToken.EM_STAR; + } + return c0 != ' ' || c1 != ' ' ? MarkToken.EM_STAR : MarkToken.NONE; + case '_': + if(c1 == '_') + { + return c0 != ' ' || c2 != ' ' ? MarkToken.STRONG_UNDERSCORE : MarkToken.EM_UNDERSCORE; + } + return c0 != ' ' || c1 != ' ' ? MarkToken.EM_UNDERSCORE : MarkToken.NONE; + case '!': + if(c1 == '[') + return MarkToken.IMAGE; + return MarkToken.NONE; + case '[': + return MarkToken.LINK; + case '`': + return c1 == '`' ? MarkToken.CODE_DOUBLE : MarkToken.CODE_SINGLE; + case '\\': + switch(c1) + { + case '\\': + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case '#': + case '"': + case '\'': + case '.': + case '>': + case '*': + case '+': + case '-': + case '_': + case '!': + case '`': + return MarkToken.ESCAPE; + default: + return MarkToken.NONE; + } + default: + return MarkToken.NONE; + } + } + + private void emitMarkedLines(final StringBuilder out, final Line lines) + { + final StringBuilder in = new StringBuilder(); + Line line = lines; + while(line != null) + { + if(!line.isEmpty) + { + in.append(line.value.substring(line.leading, line.value.length() - line.trailing)); + if(line.trailing >= 2) + in.append("
"); + } + if(line.next != null) + in.append('\n'); + line = line.next; + } + + this.recursiveEmitLine(out, in.toString(), 0, MarkToken.NONE); + } + + private void emitRawLines(final StringBuilder out, final Line lines) + { + Line line = lines; + while(line != null) + { + if(!line.isEmpty) + { + out.append(line.value); + } + if(line.next != null) + out.append('\n'); + line = line.next; + } + } + + private void emitCodeLines(final StringBuilder out, final Line lines) + { + Line line = lines; + while(line != null) + { + if(!line.isEmpty) + { + for(int i = 4; i < line.value.length() - line.trailing; i++) + { + final char c; + switch(c = line.value.charAt(i)) + { + case '&': + out.append("&"); + break; + case '<': + out.append("<"); + break; + case '>': + out.append(">"); + break; + default: + out.append(c); + break; + } + } + } + if(line.next != null) + out.append('\n'); + line = line.next; + } + } +} diff --git a/src/java/txtmark/HTML.java b/src/java/txtmark/HTML.java new file mode 100644 index 0000000..25a044b --- /dev/null +++ b/src/java/txtmark/HTML.java @@ -0,0 +1,54 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +import java.util.HashMap; +import java.util.HashSet; + +class HTML +{ + private final static String[] names = new String[] {"Â", "â", "´", "Æ", "æ", "À", "à", "ℵ", "Α", "α", "&", "∧", "∠", "'", "Å", "å", "≈", "Ã", "ã", "Ä", "ä", "„", "Β", "β", "¦", "•", "∩", "Ç", "ç", "¸", "¢", "Χ", "χ", "ˆ", "♣", "≅", "©", "↵", "∪", "¤", "‡", "†", "⇓", "↓", "°", "Δ", "δ", "♦", "÷", "É", "é", "Ê", "ê", "È", "è", "∅", " ", " ", "Ε", "ε", "≡", "Η", "η", "Ð", "ð", "Ë", "ë", "€", "∃", "ƒ", "∀", "½", "¼", "¾", "⁄", "Γ", "γ", "≥", ">", "⇔", "↔", "♥", "…", "Í", "í", "Î", "î", "¡", "Ì", "ì", "ℑ", "∞", "∫", "Ι", "ι", "¿", "∈", "Ï", "ï", "Κ", "κ", "Λ", "λ", "⟨", "«", "⇐", "←", "⌈", "“", "≤", "⌊", "∗", "◊", "‎", "‹", "‘", "<", "¯", "—", "µ", "·", "−", "Μ", "μ", "∇", " ", "–", "≠", "∋", "¬", "∉", "⊄", "Ñ", "ñ", "Ν", "ν", "Ó", "ó", "Ô", "ô", "Œ", "œ", "Ò", "ò", "‾", "Ω", "ω", "Ο", "ο", "⊕", "∨", "ª", "º", "Ø", "ø", "Õ", "õ", "⊗", "Ö", "ö", "¶", "∂", "‰", "⊥", "Φ", "φ", "Π", "π", "ϖ", "±", "£", "″", "′", "∏", "∝", "Ψ", "ψ", """, "√", "⟩", "»", "⇒", "→", "⌉", "”", "ℜ", "®", "⌋", "Ρ", "ρ", "‏", "›", "’", "‚", "Š", "š", "⋅", "§", "­", "Σ", "σ", "ς", "∼", "♠", "⊂", "⊆", "∑", "⊃", "¹", "²", "³", "⊇", "ß", "Τ", "τ", "∴", "Θ", "θ", "ϑ", " ", "þ", "˜", "×", "™", "Ú", "ú", "⇑", "↑", "Û", "û", "Ù", "ù", "¨", "ϒ", "Υ", "υ", "Ü", "ü", "℘", "Ξ", "ξ", "Ý", "ý", "¥", "Ÿ", "ÿ", "Ζ", "ζ", "‍", "‌"}; + private final static char[] chars = new char[] {'\u00C2', '\u00E2', '\u00B4', '\u00C6', '\u00E6', '\u00C0', '\u00E0', '\u2135', '\u0391', '\u03B1', '\u0026', '\u2227', '\u2220', '\'', '\u00C5', '\u00E5', '\u2248', '\u00C3', '\u00E3', '\u00C4', '\u00E4', '\u201E', '\u0392', '\u03B2', '\u00A6', '\u2022', '\u2229', '\u00C7', '\u00E7', '\u00B8', '\u00A2', '\u03A7', '\u03C7', '\u02C6', '\u2663', '\u2245', '\u00A9', '\u21B5', '\u222A', '\u00A4', '\u2021', '\u2020', '\u21D3', '\u2193', '\u00B0', '\u0394', '\u03B4', '\u2666', '\u00F7', '\u00C9', '\u00E9', '\u00CA', '\u00EA', '\u00C8', '\u00E8', '\u2205', '\u2003', '\u2002', '\u0395', '\u03B5', '\u2261', '\u0397', '\u03B7', '\u00D0', '\u00F0', '\u00CB', '\u00EB', '\u20AC', '\u2203', '\u0192', '\u2200', '\u00BD', '\u00BC', '\u00BE', '\u2044', '\u0393', '\u03B3', '\u2265', '\u003E', '\u21D4', '\u2194', '\u2665', '\u2026', '\u00CD', '\u00ED', '\u00CE', '\u00EE', '\u00A1', '\u00CC', '\u00EC', '\u2111', '\u221E', '\u222B', '\u0399', '\u03B9', '\u00BF', '\u2208', '\u00CF', '\u00EF', '\u039A', '\u03BA', '\u039B', '\u03BB', '\u2329', '\u00AB', '\u21D0', '\u2190', '\u2308', '\u201C', '\u2264', '\u230A', '\u2217', '\u25CA', '\u200E', '\u2039', '\u2018', '\u003C', '\u00AF', '\u2014', '\u00B5', '\u00B7', '\u2212', '\u039C', '\u03BC', '\u2207', '\u00A0', '\u2013', '\u2260', '\u220B', '\u00AC', '\u2209', '\u2284', '\u00D1', '\u00F1', '\u039D', '\u03BD', '\u00D3', '\u00F3', '\u00D4', '\u00F4', '\u0152', '\u0153', '\u00D2', '\u00F2', '\u203E', '\u03A9', '\u03C9', '\u039F', '\u03BF', '\u2295', '\u2228', '\u00AA', '\u00BA', '\u00D8', '\u00F8', '\u00D5', '\u00F5', '\u2297', '\u00D6', '\u00F6', '\u00B6', '\u2202', '\u2030', '\u22A5', '\u03A6', '\u03C6', '\u03A0', '\u03C0', '\u03D6', '\u00B1', '\u00A3', '\u2033', '\u2032', '\u220F', '\u221D', '\u03A8', '\u03C8', '\u0022', '\u221A', '\u232A', '\u00BB', '\u21D2', '\u2192', '\u2309', '\u201D', '\u211C', '\u00AE', '\u230B', '\u03A1', '\u03C1', '\u200F', '\u203A', '\u2019', '\u201A', '\u0160', '\u0161', '\u22C5', '\u00A7', '\u00AD', '\u03A3', '\u03C3', '\u03C2', '\u223C', '\u2660', '\u2282', '\u2286', '\u2211', '\u2283', '\u00B9', '\u00B2', '\u00B3', '\u2287', '\u00DF', '\u03A4', '\u03C4', '\u2234', '\u0398', '\u03B8', '\u03D1', '\u00DE', '\u00FE', '\u02DC', '\u00D7', '\u2122', '\u00DA', '\u00FA', '\u21D1', '\u2191', '\u00DB', '\u00FB', '\u00D9', '\u00F9', '\u00A8', '\u03D2', '\u03A5', '\u03C5', '\u00DC', '\u00FC', '\u2118', '\u039E', '\u03BE', '\u00DD', '\u00FD', '\u00A5', '\u0178', '\u00FF', '\u0396', '\u03B6', '\u200D', '\u200C'}; + private final static HashMap encodeMap = new HashMap(); + private final static HashMap decodeMap = new HashMap(); + private final static HashSet HTML_ELEMENTS = new HashSet(); + private final static HashSet HTML_BLOCK_ELEMENTS = new HashSet(); + + private final static HTMLElement[] BLOCK_ELEMENTS = new HTMLElement[] { + HTMLElement.address, + HTMLElement.blockquote, + HTMLElement.del, HTMLElement.div, HTMLElement.dl, + HTMLElement.fieldset, HTMLElement.form, + HTMLElement.h1, HTMLElement.h2, HTMLElement.h3, HTMLElement.h4, HTMLElement.h5, HTMLElement.h6, HTMLElement.hr, + HTMLElement.ins, + HTMLElement.noscript, + HTMLElement.ol, + HTMLElement.p, HTMLElement.pre, + HTMLElement.table, + HTMLElement.ul + }; + + static + { + for(final HTMLElement h : HTMLElement.values()) + { + HTML_ELEMENTS.add(h.toString()); + } + for(final HTMLElement h : BLOCK_ELEMENTS) + { + HTML_BLOCK_ELEMENTS.add(h.toString()); + } + for(int i = 0; i < names.length; i++) + { + encodeMap.put(chars[i], names[i]); + decodeMap.put(names[i], chars[i]); + } + } + + private HTML() + { + // + } +} diff --git a/src/java/txtmark/HTMLElement.java b/src/java/txtmark/HTMLElement.java new file mode 100644 index 0000000..fea6c92 --- /dev/null +++ b/src/java/txtmark/HTMLElement.java @@ -0,0 +1,28 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +enum HTMLElement +{ + a, abbr, acronym, address, applet, area, + b, base, basefont, bdo, big, blockquote, body, br, button, + caption, cite, code, col, colgroup, + dd, del, dfn, div, dl, dt, + em, + fieldset, font, form, frame, frameset, + h1, h2, h3, h4, h5, h6, head, hr, html, + i, iframe, img, input, ins, + kbd, + label, legend, li, link, + map, meta, + noscript, + object, ol, optgroup, option, + p, param, pre, + q, + s, samp, script, select, small, span, strike, strong, style, sub, sup, + table, tbody, td, textarea, tfoot, th, thead, title, tr, tt, + u, ul, + var +} diff --git a/src/java/txtmark/Line.java b/src/java/txtmark/Line.java new file mode 100644 index 0000000..3f0ed9d --- /dev/null +++ b/src/java/txtmark/Line.java @@ -0,0 +1,207 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +class Line +{ + public int pos; + public int leading = 0, trailing = 0; + public boolean isEmpty = true; + public String value = null; + public Line previous = null, next = null; + public boolean prevEmpty, nextEmpty; + + public Line() + { + // + } + + public void init() + { + this.leading = 0; + while(this.leading < this.value.length() && this.value.charAt(this.leading) == ' ') + this.leading++; + + if(this.leading == this.value.length()) + { + this.setEmpty(); + } + else + { + this.isEmpty = false; + this.trailing = 0; + while(this.value.charAt(this.value.length() - this.trailing - 1) == ' ') + this.trailing++; + } + } + + public void initLeading() + { + this.leading = 0; + while(this.leading < this.value.length() && this.value.charAt(this.leading) == ' ') + this.leading++; + + if(this.leading == this.value.length()) + { + this.setEmpty(); + } + } + + public boolean skipSpaces() + { + while(this.pos < this.value.length() && this.value.charAt(this.pos) == ' ') + this.pos++; + return this.pos < this.value.length(); + } + + public String readUntil(char... end) + { + final StringBuilder sb = new StringBuilder(); + int pos = this.pos; + while(pos < this.value.length()) + { + final char ch = this.value.charAt(pos); + if(ch == '\\' && pos + 1 < this.value.length()) + { + final char c; + switch(c = this.value.charAt(pos + 1)) + { + case '\\': + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case '#': + case '"': + case '\'': + case '.': + case '>': + case '*': + case '+': + case '-': + case '_': + case '!': + case '`': + sb.append(c); + pos++; + break; + default: + sb.append(ch); + break; + } + } + else + { + boolean endReached = false; + for(int n = 0; n < end.length; n++) + { + if(ch == end[n]) + { + endReached = true; + break; + } + } + if(endReached) + break; + sb.append(ch); + } + pos++; + } + + final char ch = pos < this.value.length() ? this.value.charAt(pos) : '\n'; + for(int n = 0; n < end.length; n++) + { + if(ch == end[n]) + { + this.pos = pos; + return sb.toString(); + } + } + return null; + } + + public void setEmpty() + { + this.value = ""; + this.leading = this.trailing = 0; + this.isEmpty = true; + if(this.previous != null) + this.previous.nextEmpty = true; + if(this.next != null) + this.next.prevEmpty = true; + } + + private int countCharsWs(char ch) + { + int count = 0; + for(int i = 0; i < this.value.length(); i++) + { + final char c = this.value.charAt(i); + if(c == ' ') + continue; + if(c == ch) + { + count++; + continue; + } + count = 0; + break; + } + return count; + } + + public LineType getLineType() + { + if(this.isEmpty) + return LineType.EMPTY; + + if(this.leading > 3) + return LineType.CODE; + + if(this.value.charAt(this.leading) == '#') + return LineType.HEADLINE; + + if(this.value.charAt(this.leading) == '>') + return LineType.BQUOTE; + + if(this.leading == 0 && this.value.length() > 2 && (this.value.charAt(0) == '*' || this.value.charAt(0) == '-')) + { + if(this.countCharsWs(this.value.charAt(0)) >= 3) + return LineType.HR; + } + + if(this.value.length() - this.leading >= 2 && this.value.charAt(this.leading + 1) == ' ') + { + switch(this.value.charAt(this.leading)) + { + case '*': + case '-': + case '+': + return LineType.ULIST; + } + } + + if(this.value.length() - this.leading >= 3 && Character.isDigit(this.value.charAt(this.leading))) + { + int i = this.leading + 1; + while(i < this.value.length() && Character.isDigit(this.value.charAt(i))) + i++; + if(i + 1 < this.value.length() && this.value.charAt(i) == '.' && this.value.charAt(i + 1) == ' ') + return LineType.OLIST; + } + + if(this.next != null && !this.next.isEmpty) + { + if((this.next.value.charAt(0) == '-') && (this.next.countCharsWs('-') > 0)) + return LineType.HEADLINE2; + if((this.next.value.charAt(0) == '=') && (this.next.countCharsWs('=') > 0)) + return LineType.HEADLINE1; + } + + return LineType.OTHER; + } +} diff --git a/src/java/txtmark/LineType.java b/src/java/txtmark/LineType.java new file mode 100644 index 0000000..548e90e --- /dev/null +++ b/src/java/txtmark/LineType.java @@ -0,0 +1,16 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +enum LineType +{ + EMPTY, + OTHER, + HEADLINE, HEADLINE1, HEADLINE2, + CODE, + ULIST, OLIST, + BQUOTE, + HR +} diff --git a/src/java/txtmark/LinkRef.java b/src/java/txtmark/LinkRef.java new file mode 100644 index 0000000..d3d6dad --- /dev/null +++ b/src/java/txtmark/LinkRef.java @@ -0,0 +1,23 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +class LinkRef +{ + public final String link; + public String title; + + public LinkRef(final String link, final String title) + { + this.link = link; + this.title = title; + } + + @Override + public String toString() + { + return this.link + " \"" + this.title + "\""; + } +} diff --git a/src/java/txtmark/MarkToken.java b/src/java/txtmark/MarkToken.java new file mode 100644 index 0000000..be1280e --- /dev/null +++ b/src/java/txtmark/MarkToken.java @@ -0,0 +1,21 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +enum MarkToken +{ + NONE, + EM_STAR, // x*x + EM_UNDERSCORE, // x_x + STRONG_STAR, // x**x + STRONG_UNDERSCORE, // x__x + CODE_SINGLE, // ` + CODE_DOUBLE, // `` + LINK, // [ + HTML, // < + IMAGE, // ![ + ENTITY, // & + ESCAPE // \x +} diff --git a/src/java/txtmark/Processor.java b/src/java/txtmark/Processor.java new file mode 100644 index 0000000..47bccaa --- /dev/null +++ b/src/java/txtmark/Processor.java @@ -0,0 +1,384 @@ +/* +* Copyright (C) 2011 René Jeschke +* See LICENSE.txt for licensing information. +*/ +package txtmark; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; + +public class Processor +{ + private final Reader reader; + private Emitter emitter = new Emitter(); + + private Processor(Reader reader) + { + this.reader = reader; + } + + public static String process(final String input) throws IOException + { + return process(new StringReader(input)); + } + + public static String process(final File file) throws IOException + { + return process(file, "UTF-8"); + } + + public static String process(final File file, final String encoding) throws IOException + { + final Reader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding)); + final Processor p = new Processor(r); + final String ret = p.process(); + r.close(); + return ret; + } + + public static String process(final InputStream input) throws IOException + { + return process(input, "UTF-8"); + } + + public static String process(final InputStream input, final String encoding) throws IOException + { + final Processor p = new Processor(new BufferedReader(new InputStreamReader(input, encoding))); + return p.process(); + } + + public static String process(final Reader reader) throws IOException + { + final Processor p = new Processor(!(reader instanceof BufferedReader) ? new BufferedReader(reader) : reader); + return p.process(); + } + + private Block readLines() throws IOException + { + final Block block = new Block(); + final StringBuilder sb = new StringBuilder(80); + int c = this.reader.read(); + LinkRef lastLinkRef = null; + while(c != -1) + { + sb.setLength(0); + int pos = 0; + boolean eol = false; + while(!eol) + { + switch(c) + { + case -1: + eol = true; + break; + case '\n': + c = this.reader.read(); + if(c == '\r') + c = this.reader.read(); + eol = true; + break; + case '\r': + c = this.reader.read(); + if(c == '\n') + c = this.reader.read(); + eol = true; + break; + case '\t': + { + final int np = pos + (4 - (pos & 3)); + while(pos < np) + { + sb.append(' '); + pos++; + } + c = this.reader.read(); + } + break; + default: + pos++; + sb.append((char)c); + c = this.reader.read(); + break; + } + } + + final Line line = new Line(); + line.value = sb.toString(); + line.init(); + + // Check for link definitions + boolean isLinkRef = false; + String id = null, link = null, comment = null; + if(!line.isEmpty && line.leading < 4 && line.value.charAt(line.leading) == '[') + { + line.pos = line.leading + 1; + // Read ID up to ']' + id = line.readUntil(']'); + // Is ID valid and are there any more characters? + if(id != null && line.pos + 2 < line.value.length()) + { + // Check for ':' ([...]:...) + if(line.value.charAt(line.pos + 1) == ':') + { + line.pos += 2; + line.skipSpaces(); + // Check for link syntax + if(line.value.charAt(line.pos) == '<') + { + line.pos++; + link = line.readUntil('>'); + line.pos++; + } + else + link = line.readUntil(' ', '\n'); + + // Is link valid? + if(link != null) + { + // Any non-whitespace characters following? + if(line.skipSpaces()) + { + final char ch = line.value.charAt(line.pos); + // Read comment + if(ch == '\"' || ch == '\'' || ch == '(') + { + line.pos++; + comment = line.readUntil(ch == '(' ? ')' : ch); + // Valid linkRef only if comment is valid + if(comment != null) + isLinkRef = true; + } + } + else + isLinkRef = true; + } + } + } + } + + if(isLinkRef) + { + // Store linkRef and skip line + final LinkRef lr = new LinkRef(link, comment); + this.emitter.addLinkRef(id, lr); + if(comment == null) + lastLinkRef = lr; + } + else + { + comment = null; + // Check for multi-line linkRef + if(!line.isEmpty && lastLinkRef != null) + { + line.pos = line.leading; + final char ch = line.value.charAt(line.pos); + if(ch == '\"' || ch == '\'' || ch == '(') + { + line.pos++; + comment = line.readUntil(ch == '(' ? ')' : ch); + } + if(comment != null) + lastLinkRef.title = comment; + + lastLinkRef = null; + } + + // No multi-line linkRef, store line + if(comment == null) + { + line.pos = 0; + block.appendLine(line); + } + } + } + + return block; + } + + private void initListBlock(final Block root) + { + Line line = root.lines; + line = line.next; + while(line != null) + { + final LineType t = line.getLineType(); + if( + (t == LineType.OLIST || t == LineType.ULIST) || + (!line.isEmpty && (line.prevEmpty && line.leading == 0 && !(t == LineType.OLIST || t == LineType.ULIST)))) + { + root.split(line.previous).type = BlockType.LIST_ITEM; + } + line = line.next; + } + root.split(root.lineTail).type = BlockType.LIST_ITEM; + } + + private void recurse(final Block root, boolean listMode) + { + Block block; + Line line = root.lines; + while(line != null && line.isEmpty) line = line.next; + if(line == null) + return; + + if(listMode) + root.removeListIndent(); + + boolean hasParagraph = false; + + while(line != null) + { + final LineType type = line.getLineType(); + switch(type) + { + case OTHER: + { + final boolean wasEmpty = line.prevEmpty; + while(line != null && !line.isEmpty) + { + final LineType t = line.getLineType(); + if(listMode && (t == LineType.OLIST || t == LineType.ULIST)) + break; + if(t == LineType.HEADLINE || t == LineType.HEADLINE1 || t == LineType.HEADLINE2 || t == LineType.HR || t == LineType.BQUOTE) + break; + line = line.next; + } + final BlockType bt; + if(line != null && !line.isEmpty) + { + bt = (listMode && root.blocks == null && !wasEmpty) ? BlockType.NONE : BlockType.PARAGRAPH; + root.split(line.previous).type = bt; + root.removeLeadingEmptyLines(); + } + else + { + bt = (listMode && (line == null || !line.isEmpty) && !wasEmpty) ? BlockType.NONE : BlockType.PARAGRAPH; + root.split(line == null ? root.lineTail : line).type = bt; + root.removeLeadingEmptyLines(); + } + hasParagraph = bt == BlockType.PARAGRAPH; + line = root.lines; + } + break; + case CODE: + while(line != null && (line.isEmpty || line.leading > 3)) + { + line = line.next; + } + block = root.split(line != null ? line.previous : root.lineTail); + block.type = BlockType.CODE; + block.removeSurroundingEmptyLines(); + break; + case BQUOTE: + while(line != null) + { + if(!line.isEmpty && (line.prevEmpty && line.leading == 0 && line.getLineType() != LineType.BQUOTE)) + break; + line = line.next; + } + block = root.split(line != null ? line.previous : root.lineTail); + block.type = BlockType.BLOCKQUOTE; + block.removeSurroundingEmptyLines(); + block.removeBlockQuotePrefix(); + this.recurse(block, false); + line = root.lines; + break; + case HR: + if(line.previous != null) + { + root.split(line.previous); + } + root.split(line).type = BlockType.RULER; + root.removeLeadingEmptyLines(); + line = root.lines; + break; + case HEADLINE: + case HEADLINE1: + case HEADLINE2: + if(line.previous != null) + { + root.split(line.previous); + } + if(type != LineType.HEADLINE) + { + line.next.setEmpty(); + } + block = root.split(line); + block.type = BlockType.HEADLINE; + if(type != LineType.HEADLINE) + block.hlDepth = type == LineType.HEADLINE1 ? 1 : 2; + block.transfromHeadline(); + root.removeLeadingEmptyLines(); + line = root.lines; + break; + case OLIST: + case ULIST: + while(line != null) + { + final LineType t = line.getLineType(); + if(!line.isEmpty && (line.prevEmpty && line.leading == 0 && !(t == LineType.OLIST || t == LineType.ULIST))) + break; + line = line.next; + } + block = root.split(line != null ? line.previous : root.lineTail); + block.type = type == LineType.OLIST ? BlockType.ORDERED_LIST : BlockType.UNORDERED_LIST; + block.lines.prevEmpty = false; + block.lineTail.nextEmpty = false; + block.removeSurroundingEmptyLines(); + block.lines.prevEmpty = block.lineTail.nextEmpty = false; + this.initListBlock(block); + block = block.blocks; + while(block != null) + { + this.recurse(block, true); + block = block.next; + } + break; + default: + line = line.next; + break; + } + } + + if(listMode && hasParagraph) + { + block = root.blocks; + while(block != null) + { + if(block.type == BlockType.NONE) + block.type = BlockType.PARAGRAPH; + block = block.next; + } + } + } + + private String process() throws IOException + { + final StringBuilder out = new StringBuilder(); + + long t0 = System.nanoTime(); + + final Block parent = this.readLines(); + parent.removeSurroundingEmptyLines(); + + this.recurse(parent, false); + Block block = parent.blocks; + while(block != null) + { + this.emitter.emit(out, block); + block = block.next; + } + + t0 = System.nanoTime() - t0; + + out.append(String.format("\n\n", (int)(t0 * 1e-6))); + + return out.toString(); + } +}