/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

     				if (outSideTextTagsAndKeepContentRegex != null && outSideTextTagsAndKeepContentRegex.trim().length() > 0) {
     					tokenizer.setOutSideTextTagsAndKeepContent(outSideTextTagsAndKeepContentRegex)
+    				}
     				if (noteRegex != null && noteRegex.trim().length() > 0) {
     					tokenizer.setNote(noteRegex)
+    				}
     				// tokenize !
     				if (!tokenizer.process()) {
     					println("Failed to process "+f)

+    	}
     	/**
     	 * Set element content to NOT tokenize
+    	 *
     	 * @param regexp
     	 */
     	public void setNote(String regexp) {
     		this.note_content = regexp;
     		this.reg_note_content = Pattern.compile(note_content);
+    	}
     	/**
     	 * Set the element to ignore but not their content
+    	 *
     	 * @param regexp
-...
     		ChunkTokenizerXml tokenizer = new ChunkTokenizerXml(inFile, lang)
     		tokenizer.setRetokenize(false)
     		tokenizer.setNote("note")
     		//tokenizer.setOutSideTextTags("outsideToEdit")
     		tokenizer.setOutSideTextTagsAndKeepContent("outsideToEdit")
     		//tokenizer.setDEBUG false

     		this.outside_text_tags = regexp;
     		this.reg_outside_text_tags = Pattern.compile(outside_text_tags);
+    	}
     	/**
     	 * Set element content to NOT tokenize
+    	 *
     	 * @param regexp
     	 */
     	public void setNote(String regexp) {
     		this.note_content = regexp;
     		this.reg_note_content = Pattern.compile(note_content);
+    	}
     	/**
     	 * Set the element to ignore but not their content
+    	 *
     	 * @param regexp
-...
     	 * @param str the str
     	 * @return the list
     	 */
     	public List<String> tokenize(String str)
+    	{
     	public List<String> tokenize(String str) {
     		return str.tokenize()	// cut by whitespace
+    	}
-...
+    	 *
     	 * @param args the arguments
     	 */
     	public static void main(String[] args)
+    	{
     	public static void main(String[] args) {
     		//		File SVNDIR = new File(System.getProperty("user.home"), "xml/tokenizernum");
     		//		Runtime.getRuntime().exec("svn", "update", SVNDIR.getAbsolutePath());
     		//		File inputFile = new File(SVNDIR, "baye1.xml")
     		//		File outputFile = new File(SVNDIR, "baye1-t.xml")
     		//		SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(inputFile, outputFile, "fr")
     		//		tokenizer.DEBUG = true
     		//		tokenizer.setNote("note")
     		//		//tokenizer.setOutSideTextTags("teiHeader")
     		//		tokenizer.setOutSideTextTagsAndKeepContent("teiHeader")
     		//		println outputFile.toString() + " : "+tokenizer.process();
-...
     		SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(inFile, outFile, lang)
     		tokenizer.setRetokenize(false)
     		tokenizer.setNote("note")
     		//tokenizer.setOutSideTextTags("outsideToEdit")
     		tokenizer.setOutSideTextTagsAndKeepContent("outsideToEdit")
     		//tokenizer.setDEBUG false

Laboratoire ICAR » Plateforme TXM

Révision 3706