Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / bvh / annotate.groovy @ 479

History | View | Annotate | Download (5.6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (ven. 08 nov. 2013) $
25
// $LastChangedRevision: 2569 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.bvh
29

    
30
import org.txm.importer.*;
31
import org.txm.scripts.*;
32
import org.txm.importer.cwb.CwbEncode
33
import org.txm.importer.cwb.CwbMakeAll
34
import org.txm.scripts.teitxm.BuildTTSrc;
35
import org.txm.scripts.teitxm.*;
36
import org.txm.utils.treetagger.TreeTagger;
37
import org.txm.Toolbox;
38

    
39
// TODO: Auto-generated Javadoc
40
/**
41
 * Add treetagger annotations with a choosen model file<br/>
42
 * If a word is unknowned the lemma will be set to the word's form <br/>
43
 * Use the &lt;s> tags to build sentences boundaries <br/>.
44
 *
45
 * @author mdecorde
46
 */
47
class annotate {
48
        /*
49
         * @param rootDirFile contains the TEI-TXM files
50
         * @param model the name of the model file to use
51
         * @param pos the name of the pos attribute added
52
         * @param lemme the name of the lemme attribute added
53
         */
54
        
55
        /**
56
         * Run.
57
         *
58
         * @param rootDirFile the root dir file
59
         * @param model the model
60
         * @param pos the pos
61
         * @param lemme the lemme
62
         */
63
        public static void run(File rootDirFile,String model,String pos, String lemme)
64
        {
65
                String rootDir = rootDirFile.getAbsolutePath()+"/";
66
                
67
                //cleaning
68
                new File(rootDir,"annotations").deleteDir();
69
                new File(rootDir,"annotations").mkdir();
70
                new File(rootDir,"treetagger").deleteDir();
71
                new File(rootDir,"treetagger").mkdir();
72
                new File(rootDir,"ptreetagger").deleteDir();
73
                new File(rootDir,"ptreetagger").mkdir();
74
                
75
                ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
76
                milestones.add("tagUsage");
77
                milestones.add("pb");
78
                milestones.add("lb");
79
                
80
                List<File> files = new File(rootDir,"txm").listFiles()        
81
                //BUILD TT FILE READY TO BE TAGGED
82
                for(File f : files)
83
                {
84
                        File srcfile = f;
85
                        File resultfile = new File(rootDir+"ptreetagger/",f.getName()+".tt");
86
                        new BuildTTSrc(srcfile.toURL()).process(resultfile)
87
                }
88
                
89
                //APPLY TREETAGGER
90
                files = new File(rootDir,"ptreetagger").listFiles()        
91
                for(File f : files)
92
                {
93
                        File modelfile = new File(Toolbox.getParam(Toolbox.TREETAGGER_MODELS_PATH),"/afr.par");
94
                        File infile = f
95
                        File outfile = new File(rootDir+"/treetagger",f.getName());
96
                        println("3- APPLY TT on : "+infile+" with : "+modelfile +" >>  "+outfile);
97
                        
98
                        TreeTagger tt = new TreeTagger(Toolbox.getParam(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/");
99
                        tt.settoken();
100
                        tt.setlemma();
101
                        tt.setquiet();
102
                        tt.setsgml();
103
                        tt.setnounknown();
104
                        tt.seteostag("<s>");
105
                        tt.treetagger( modelfile.getAbsolutePath(), infile.getAbsolutePath(), outfile.getAbsolutePath())
106
                        //infile.delete();
107
                }
108
                
109
                //BUILD STAND-OFF FILES
110
                //contains txm:application/txm:commandLine
111
                File reportFile = new File(rootDir,"NLPToolsParameters.xml");
112
                
113
                String respPerson = System.getProperty("user.name");
114
                String respId = "txm";
115
                String respDesc = "NLP annotation tool";
116
                String respDate = "";
117
                String respWhen = ""
118
                
119
                String appIdent = "TreeTagger";
120
                String appVersion = "3.2";
121
                
122
                String distributor = "";
123
                String publiStmt = """""";
124
                String sourceStmt = """""";
125
                
126
                def types = [pos,lemme];
127
                def typesTITLE = ["",""];
128
                def typesDesc = ["",""];
129
                def typesTAGSET = ["",""];
130
                def typesWEB = ["",""];
131
                String idform ="w_";
132
                
133
                files = new File(rootDir,"treetagger").listFiles()        
134
                for(File f : files)
135
                {
136
                        String target = f.getAbsolutePath();
137
                        File ttfile = f
138
                        File posfile = new File(rootDir+"annotations/",f.getName()+"STOFF.xml");
139
                        
140
                        def encoding ="UTF-8";
141
                        def transfo = new CSV2W_ANA();
142
                        println("build w-interp "+ttfile.getName()+ ">>"+posfile.getName())
143
                        transfo.setAnnotationTypes( types, typesDesc, typesTAGSET, typesWEB, idform);
144
                        transfo.setResp(respId, respDesc,respDate, respPerson, respWhen);
145
                        transfo.setApp(appIdent, appVersion);
146
                        transfo.setTarget(target, reportFile);
147
                        transfo.setInfos(distributor,  publiStmt, sourceStmt);
148
                        transfo.process( ttfile, posfile, encoding );
149
                }
150
                
151
                files = new File(rootDir,"annotations").listFiles();
152
                List<File> txmfiles = new File(rootDir,"txm").listFiles();
153
                files.sort();
154
                txmfiles.sort();
155
                for(int i = 0 ; i< files.size();i++)
156
                {
157
                        File srcfile = txmfiles.get(i);
158
                        File pos1file = files.get(i);
159
                        File temp = new File(rootDir,"temp"); 
160
                        
161
                        println("5- inject annotation in file : "+srcfile+" with : "+pos1file );
162
                        
163
                        def builder = new org.txm.scripts.teitxm.AnnotationInjection(srcfile.toURL(), pos1file.toURL(), milestones);
164
                        builder.transfomFile(temp.getParent(),temp.getName());
165
                        
166
                        if (!(srcfile.delete() && temp.renameTo(srcfile))) println "Warning can't rename file "+temp+" to "+srcfile
167
                }
168
                
169
        }
170
        
171
        /**
172
         * The main method.
173
         *
174
         * @param args the arguments
175
         */
176
        public static void main(String[] args)
177
        {
178
                File rootDir = new File("~/xml/bvh/");
179
                new annotate().run(rootDir,);
180
        }
181
}