Révision 3281

TXM/main/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/Tokeniser/SimpleTokenizerXml.groovy (revision 3281)
523 523
				writer.writeCharacters("\n");
524 524
			}
525 525
			if (stringTokenizer.doSentences())  {
526
				writer.writeProcessingInstruction("txm", "</s>")
526
				writer.writeProcessingInstruction("txm", "</s>\n")
527 527
			}
528 528
		}
529 529
	}
TXM/main/org.txm.tokenizer.core/src/org/txm/tokenizer/StringTokenizer.java (revision 3281)
2 2

  
3 3
import java.util.List;
4 4

  
5
/**
6
 * Abstract class of a String tokenizer
7
 * 
8
 * @author mdecorde
9
 *
10
 */
5 11
public interface StringTokenizer {
6 12
	
13
	/**
14
	 * 
15
	 * @param text
16
	 * @return the tokenized string : a list of sentences containing a list of tokens
17
	 */
7 18
	List<List<String>> processText(String text);
8 19
	
20
	/**
21
	 * 
22
	 * @return true if the String tokenizer detects sentences
23
	 */
9 24
	boolean doSentences();
10 25
}

Formats disponibles : Unified diff