Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / scripts / clix / tnt-wrapper-definition.xml @ 187

History | View | Annotate | Download (3.7 kB)

1
<?xml version="1.0">
2
<application name="TnT" version="0.0.0" desc="Trigrams'n'Tags : part-of-speach tagger">
3
        <progs>
4
        
5
                <prog exec="tnt" version="0.0.0" desc="tagging. the language model is loaded from model.lex and model.123. if model.map exists, it is used for output mapping compressed files are recognized by suffix .gz, .bz2, or .Z">
6
                        <args>
7
                                <arg state="optional" type="int" name="a" desc="use suffix trie with max. suffix length = len; default = 10"/>
8
                                <arg state="optional" type="File" name="b" desc="use fil as backup lexicon"/>
9
                                <arg state="optional" type="int" name="B" desc="backup mode : 0=main only, 1=mix, 2=backup only (default: 1)"/>
10
                                <arg state="optional" type="String" name="d" desc="sparse data mode"/>
11
                                <arg state="optional" type="none" name="H" desc="copy HTML tags to output (without tagging)"/>
12
                                <arg state="optional" type="none" name="m" desc="mark unknown words in output with an asterisk (*)"/>
13
                                <arg state="optional" type="int" name="n" desc="use num-grams, num = 1, 2, 3, default = 3"/>
14
                                <arg state="optional" type="int" name="u" desc="unknown word count, default = 3"/>
15
                                <arg state="optional" type="int" name="v" desc="set verbosity level num: (default = 3)"/>
16
                                <arg state="optional" type="int" name="z" desc="output tag if prob is in beam num, default =0 (means infinity)"/>
17
                                <arg state="optional" type="int" name="Z" desc="cut off path if prob is not in beam num (default 1000)"/>
18
                                <arg state="must" type="String" name="model" desc="model name, will be searched in current dir or in the path specified in the env var TNT_MODELS"/>
19
                                <arg state="must" type="File" name="corpus" desc="corpus file ?"/>
20
                        </args>
21
                </prog>
22
                
23
                <prog exec="tnt-diff" version="0.0.0" desc="counting differences">
24
                        <args>
25
                                <arg state="optional" type="none" name="a" desc="print accuracy vs. ambiguity"/>
26
                                <arg state="optional" type="int" name="f" desc="print accuracy vs. frequency, max freq = max"/>
27
                                <arg state="optional" type="none" name="i" desc="ignore upper/lower case of tokens"/>
28
                                <arg state="optional" type="File" name="l" desc="lexicon to account for known/unknown words"/>
29
                                <arg state="optional" type="File" name="m" desc="use tagset mapping in file"/>
30
                                <arg state="must" type="File" name="originalfile" desc="model file"/>
31
                                <arg state="must" type="File" name="resultfile" desc="corpus file ?"/>
32
                        </args>
33
                </prog>
34
                
35
                <prog exec="tnt-para" version="0.0.0" desc="parameters generation">
36
                        <args>
37
                                <arg state="optional" type="none" name="c" desc="encode capitalization in tag"/>
38
                                <arg state="optional" type="none" name="H" desc="ignore HTML tags for parameter generation"/>
39
                                <arg state="optional" type="none" name="i" desc="ignore case"/>
40
                                <arg state="optional" type="none" name="l" desc="generate lexicon only"/>
41
                                <arg state="optional" type="none" name="n" desc="generate ngrams only"/>
42
                                <arg state="optional" type="String" name="o" desc="base name for output files, default=basename of corpus"/>
43
                                <arg state="optional" type="none" name="v" desc="generate verbose ngrams"/>
44
                                <arg state="must" type="File" name="corpus" desc="corpus file ?"/>
45
                        </args>
46
                </prog>
47
                
48
                <prog exec="tnt-wc" version="0.0.0" desc="counting tokens and types">
49
                        <args>
50
                                <arg state="optional" type="none" name="H" desc="ignore HTML tags for parameter generation"/>
51
                                <arg state="optional" type="none" name="i" desc="ignore case"/>
52
                                <arg state="optional" type="none" name="l" desc="count word types"/>
53
                                <arg state="optional" type="none" name="t" desc="count tags"/>
54
                                <arg state="optional" type="none" name="w" desc="count words tokens"/>
55
                                <arg state="must" type="File" name="corpus" desc="corpus file ?"/>
56
                        </args>
57
                </prog>
58
        </progs>
59
</application>
60