Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / perrault / annotate.groovy @ 1000

History | View | Annotate | Download (5.4 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (ven. 08 nov. 2013) $
25
// $LastChangedRevision: 2569 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.importer.perrault
29

    
30
import org.txm.scripts.importer.*;
31
import org.txm.scripts.*;
32
import org.txm.importer.cwb.CwbEncode
33
import org.txm.importer.cwb.CwbMakeAll
34
import org.txm.importer.scripts.xmltxm.BuildTTSrc;
35
import org.txm.importer.scripts.xmltxm.*;
36
import org.txm.utils.treetagger.TreeTagger;
37
import org.txm.Toolbox;
38

    
39
// TODO: Auto-generated Javadoc
40
/**
41
 * The Class annotate.
42
 */
43
class annotate {
44
        
45
        /**
46
         * Run.
47
         *
48
         * @param rootDirFile the root dir file
49
         */
50
        public static void run(File rootDirFile)
51
        {
52
                String rootDir = rootDirFile.getAbsolutePath()+"/";
53

    
54
                //cleaning
55
                new File(rootDir,"annotations").deleteDir();
56
                new File(rootDir,"annotations").mkdir();
57
                new File(rootDir,"treetagger").deleteDir();
58
                new File(rootDir,"treetagger").mkdir();
59
                
60
                ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
61
                milestones.add("tagUsage");
62
                milestones.add("pb");
63
                milestones.add("lb");
64
                
65
                List<File> files = new File(rootDir,"txm").listFiles()        
66
                //BUILD TT FILE READY TO BE TAGGED
67
                for(File f : files)
68
                {
69
                        File srcfile = f;
70
                        File resultfile = new File(rootDir+"treetagger/",f.getName()+".tt");
71
                        new BuildTTSrc(srcfile.toURL()).process(resultfile)
72
                }
73
                
74
                //APPLY TREETAGGER
75
                files = new File(rootDir,"treetagger").listFiles()        
76
                for(File f : files)
77
                {
78
                        File modelfile = new File(Toolbox.getPreference(Toolbox.TREETAGGER_MODELS_PATH),"/fr.par");
79
                        if(!modelfile.exists())
80
                        {
81
                                println "Skipping ANNOTATE: Incorrect modelfile path: "+modelfile;
82
                                return;
83
                        }
84
                        File infile = f
85
                        File outfile = new File(f.getParent(),f.getName()+"-out.tt");
86
                        println("3- APPLY TT on : "+infile+" with : "+modelfile +" >>  "+outfile);
87
                        
88
                        if(!new File(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/").exists())
89
                        {
90
                                println("Path to TreeTagger is wrong "+Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/")
91
                                return;
92
                        }
93
                        TreeTagger tt = new TreeTagger(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/");
94
                        tt.settoken();
95
                        tt.setlemma();
96
                        tt.setquiet();
97
                        tt.setsgml();
98
                        tt.setnounknown();
99
                        tt.seteostag("<s>");
100
                        tt.treetagger( modelfile.getAbsolutePath(), infile.getAbsolutePath(), outfile.getAbsolutePath())
101
                        infile.delete();
102
                }
103
                
104
                //BUILD STAND-OFF FILES
105
                //contains txm:application/txm:commandLine
106
                File reportFile = new File(rootDir,"NLPToolsParameters.xml");
107
                
108
                String respPerson = System.getProperty("user.name");
109
                String respId = "txm";
110
                String respDesc = "NLP annotation tool";
111
                String respDate = "";
112
                String respWhen = ""
113
                
114
                String appIdent = "TreeTagger";
115
                String appVersion = "3.2";
116
                
117
                String distributor = "";
118
                String publiStmt = """""";
119
                String sourceStmt = """""";
120
                
121
                def types = ["pos","lemme"];
122
                def typesTITLE = ["",""];
123
                def typesDesc = ["",""];
124
                def typesTAGSET = ["",""];
125
                def typesWEB = ["",""];
126
                String idform ="w_c_";
127
                
128
                files = new File(rootDir,"treetagger").listFiles()        
129
                for(File f : files)
130
                {
131
                        String target = f.getAbsolutePath();
132
                        File ttfile = f
133
                        File posfile = new File(rootDir+"annotations/",f.getName()+"-STOFF.xml");
134
                        
135
                        def encoding ="UTF-8";
136
                        def transfo = new CSV2W_ANA();
137
                        println("build w-interp "+ttfile.getName()+ ">>"+posfile.getName())
138
                        transfo.setAnnotationTypes( types, typesDesc, typesTAGSET, typesWEB, idform);
139
                        transfo.setResp(respId, respDesc,respDate, respPerson, respWhen);
140
                        transfo.setApp(appIdent, appVersion);
141
                        transfo.setTarget(target, reportFile);
142
                        transfo.setInfos(distributor,  publiStmt, sourceStmt);
143
                        transfo.process( ttfile, posfile, encoding );
144
                }
145
                
146
                files = new File(rootDir,"annotations").listFiles();
147
                List<File> txmfiles = new File(rootDir,"txm").listFiles();
148
                files.sort();
149
                txmfiles.sort();
150
                for(int i = 0 ; i< files.size();i++)
151
                {
152
                        File srcfile = txmfiles.get(i);
153
                        
154
                        File pos1file = files.get(i);
155
                        File temp = new File(rootDir,"temp"); 
156

    
157
                        println("5- inject annotation in file : "+srcfile+" with : "+pos1file );
158
                        
159
                        def builder = new org.txm.scripts.teitxm.AnnotationInjection(srcfile.toURL(), pos1file.toURL(), milestones);
160
                        builder.transfomFile(temp.getParent(),temp.getName());
161
                        
162
                        if (!(srcfile.delete() && temp.renameTo(srcfile))) println "Warning can't rename file "+temp+" to "+srcfile
163
                }
164
        }
165
        
166
        /**
167
         * The main method.
168
         *
169
         * @param args the arguments
170
         */
171
        public static void main(String[] args)
172
        {
173
                File rootDir = new File("~/xml/perrault/");
174
                new annotate().run(rootDir);
175
        }
176
}