Revision 479 tmp/org.txm.groovy.core/src/groovy/org/txm/tokenizer/TokenizerClasses.groovy

TokenizerClasses.groovy (revision 479)
21 21
//
22 22
//
23 23
//
24
// $LastChangedDate: 2016-10-03 15:30:36 +0200 (Mon, 03 Oct 2016) $
25
// $LastChangedRevision: 3313 $
24
// $LastChangedDate: 2017-04-06 09:11:32 +0200 (jeu. 06 avril 2017) $
25
// $LastChangedRevision: 3425 $
26 26
// $LastChangedBy: mdecorde $
27 27
//
28 28
package org.txm.tokenizer
29 29

  
30
import java.util.regex.Pattern
30
import java.io.FileWriter;
31
import java.util.regex.Pattern;
31 32

  
32
import javax.xml.stream.*
33

  
34
import org.txm.utils.xml.DomUtils
35
import org.w3c.dom.Document
33
import org.txm.utils.xml.DomUtils;
34
import org.w3c.dom.Document;
36 35
import org.w3c.dom.Element
37
import org.w3c.dom.Node
38
import org.w3c.dom.NodeList
36
import org.w3c.dom.Node;
37
import org.w3c.dom.NodeList;
39 38

  
39
import javax.xml.stream.*;
40
import java.net.URL;
41

  
40 42
// TODO: Auto-generated Javadoc
41 43
/**
42 44
 * The Class TokenizerClasses.
43 45
 */
44
public class TokenizerClasses {
46
class TokenizerClasses {
45 47

  
46 48
	//// ROOTS ? ////
47 49
	public static boolean debug = false;
......
72 74
	public static String corr_tags_no_seg = "expan|unclear|choice|corr|sic|reg|orig|foreign|hi|title|name|supplied|subst|add|del|damage|date|idno|surplus";
73 75

  
74 76
	/** The word_tags. */
75
	public static String word_tags = "w|abbr|num";
77
	public static String word_tags = "w";
76 78

  
77 79
	/** The intraword_tags. */
78 80
	public static String intraword_tags = "c|ex|caesura";
......
191 193
		q_tags = "q|quote|item|stage|cit";
192 194
		extraword1_tags = "expan|pb|lb|milestone|gap|note|s|locus|title|ref|hi|witDetail";
193 195
		corr_tags_no_seg = "expan|unclear|choice|corr|sic|reg|orig|foreign|hi|title|name|supplied|subst|add|del|damage|date|idno|surplus";
194
		word_tags = "w|abbr|num";
196
		word_tags = "w";
195 197
		intraword_tags = "c|ex|caesura";
196 198
		punct_quotes = "'โ€˜โ€™โ€™"
197 199
		punct_strong1 = ".!?";

Also available in: Unified diff