Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / clix / tnt-wrapper-definition.xml @ 489

History | View | Annotate | Download (3.7 kB)

1
<application name="TnT" version="0.0.0" desc="Trigrams'n'Tags : part-of-speach tagger">
2
        <progs>
3
        
4
                <prog exec="tnt" version="0.0.0" desc="tagging. the language model is loaded from model.lex and model.123. if model.map exists, it is used for output mapping compressed files are recognized by suffix .gz, .bz2, or .Z">
5
                        <args>
6
                                <arg state="optional" type="int" name="a" desc="use suffix trie with max. suffix length = len; default = 10"/>
7
                                <arg state="optional" type="File" name="b" desc="use fil as backup lexicon"/>
8
                                <arg state="optional" type="int" name="B" desc="backup mode : 0=main only, 1=mix, 2=backup only (default: 1)"/>
9
                                <arg state="optional" type="String" name="d" desc="sparse data mode"/>
10
                                <arg state="optional" type="none" name="H" desc="copy HTML tags to output (without tagging)"/>
11
                                <arg state="optional" type="none" name="m" desc="mark unknown words in output with an asterisk (*)"/>
12
                                <arg state="optional" type="int" name="n" desc="use num-grams, num = 1, 2, 3, default = 3"/>
13
                                <arg state="optional" type="int" name="u" desc="unknown word count, default = 3"/>
14
                                <arg state="optional" type="int" name="v" desc="set verbosity level num: (default = 3)"/>
15
                                <arg state="optional" type="int" name="z" desc="output tag if prob is in beam num, default =0 (means infinity)"/>
16
                                <arg state="optional" type="int" name="Z" desc="cut off path if prob is not in beam num (default 1000)"/>
17
                                <arg state="must" type="String" name="model" desc="model name, will be searched in current dir or in the path specified in the env var TNT_MODELS"/>
18
                                <arg state="must" type="File" name="corpus" desc="corpus file ?"/>
19
                        </args>
20
                </prog>
21
                
22
                <prog exec="tnt-diff" version="0.0.0" desc="counting differences">
23
                        <args>
24
                                <arg state="optional" type="none" name="a" desc="print accuracy vs. ambiguity"/>
25
                                <arg state="optional" type="int" name="f" desc="print accuracy vs. frequency, max freq = max"/>
26
                                <arg state="optional" type="none" name="i" desc="ignore upper/lower case of tokens"/>
27
                                <arg state="optional" type="File" name="l" desc="lexicon to account for known/unknown words"/>
28
                                <arg state="optional" type="File" name="m" desc="use tagset mapping in file"/>
29
                                <arg state="must" type="File" name="originalfile" desc="model file"/>
30
                                <arg state="must" type="File" name="resultfile" desc="corpus file ?"/>
31
                        </args>
32
                </prog>
33
                
34
                <prog exec="tnt-para" version="0.0.0" desc="parameters generation">
35
                        <args>
36
                                <arg state="optional" type="none" name="c" desc="encode capitalization in tag"/>
37
                                <arg state="optional" type="none" name="H" desc="ignore HTML tags for parameter generation"/>
38
                                <arg state="optional" type="none" name="i" desc="ignore case"/>
39
                                <arg state="optional" type="none" name="l" desc="generate lexicon only"/>
40
                                <arg state="optional" type="none" name="n" desc="generate ngrams only"/>
41
                                <arg state="optional" type="String" name="o" desc="base name for output files, default=basename of corpus"/>
42
                                <arg state="optional" type="none" name="v" desc="generate verbose ngrams"/>
43
                                <arg state="must" type="File" name="corpus" desc="corpus file ?"/>
44
                        </args>
45
                </prog>
46
                
47
                <prog exec="tnt-wc" version="0.0.0" desc="counting tokens and types">
48
                        <args>
49
                                <arg state="optional" type="none" name="H" desc="ignore HTML tags for parameter generation"/>
50
                                <arg state="optional" type="none" name="i" desc="ignore case"/>
51
                                <arg state="optional" type="none" name="l" desc="count word types"/>
52
                                <arg state="optional" type="none" name="t" desc="count tags"/>
53
                                <arg state="optional" type="none" name="w" desc="count words tokens"/>
54
                                <arg state="must" type="File" name="corpus" desc="corpus file ?"/>
55
                        </args>
56
                </prog>
57
        </progs>
58
</application>