Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / bvh / annotate.groovy @ 1000

History | View | Annotate | Download (5.6 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 479 mdecorde
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (ven. 08 nov. 2013) $
25 321 mdecorde
// $LastChangedRevision: 2569 $
26 321 mdecorde
// $LastChangedBy: mdecorde $
27 321 mdecorde
//
28 986 mdecorde
package org.txm.scripts.importer.bvh
29 321 mdecorde
30 986 mdecorde
import org.txm.scripts.importer.*;
31 321 mdecorde
import org.txm.scripts.*;
32 1000 mdecorde
import org.txm.importer.cwb.CwbEncode
33 1000 mdecorde
import org.txm.importer.cwb.CwbMakeAll
34 1000 mdecorde
import org.txm.importer.scripts.xmltxm.BuildTTSrc;
35 1000 mdecorde
import org.txm.importer.scripts.xmltxm.*;
36 321 mdecorde
import org.txm.utils.treetagger.TreeTagger;
37 321 mdecorde
import org.txm.Toolbox;
38 321 mdecorde
39 321 mdecorde
// TODO: Auto-generated Javadoc
40 321 mdecorde
/**
41 321 mdecorde
 * Add treetagger annotations with a choosen model file<br/>
42 321 mdecorde
 * If a word is unknowned the lemma will be set to the word's form <br/>
43 321 mdecorde
 * Use the &lt;s> tags to build sentences boundaries <br/>.
44 321 mdecorde
 *
45 321 mdecorde
 * @author mdecorde
46 321 mdecorde
 */
47 321 mdecorde
class annotate {
48 321 mdecorde
        /*
49 321 mdecorde
         * @param rootDirFile contains the TEI-TXM files
50 321 mdecorde
         * @param model the name of the model file to use
51 321 mdecorde
         * @param pos the name of the pos attribute added
52 321 mdecorde
         * @param lemme the name of the lemme attribute added
53 321 mdecorde
         */
54 321 mdecorde
55 321 mdecorde
        /**
56 321 mdecorde
         * Run.
57 321 mdecorde
         *
58 321 mdecorde
         * @param rootDirFile the root dir file
59 321 mdecorde
         * @param model the model
60 321 mdecorde
         * @param pos the pos
61 321 mdecorde
         * @param lemme the lemme
62 321 mdecorde
         */
63 321 mdecorde
        public static void run(File rootDirFile,String model,String pos, String lemme)
64 321 mdecorde
        {
65 321 mdecorde
                String rootDir = rootDirFile.getAbsolutePath()+"/";
66 321 mdecorde
67 321 mdecorde
                //cleaning
68 321 mdecorde
                new File(rootDir,"annotations").deleteDir();
69 321 mdecorde
                new File(rootDir,"annotations").mkdir();
70 321 mdecorde
                new File(rootDir,"treetagger").deleteDir();
71 321 mdecorde
                new File(rootDir,"treetagger").mkdir();
72 321 mdecorde
                new File(rootDir,"ptreetagger").deleteDir();
73 321 mdecorde
                new File(rootDir,"ptreetagger").mkdir();
74 321 mdecorde
75 321 mdecorde
                ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
76 321 mdecorde
                milestones.add("tagUsage");
77 321 mdecorde
                milestones.add("pb");
78 321 mdecorde
                milestones.add("lb");
79 321 mdecorde
80 321 mdecorde
                List<File> files = new File(rootDir,"txm").listFiles()
81 321 mdecorde
                //BUILD TT FILE READY TO BE TAGGED
82 321 mdecorde
                for(File f : files)
83 321 mdecorde
                {
84 321 mdecorde
                        File srcfile = f;
85 321 mdecorde
                        File resultfile = new File(rootDir+"ptreetagger/",f.getName()+".tt");
86 321 mdecorde
                        new BuildTTSrc(srcfile.toURL()).process(resultfile)
87 321 mdecorde
                }
88 321 mdecorde
89 321 mdecorde
                //APPLY TREETAGGER
90 321 mdecorde
                files = new File(rootDir,"ptreetagger").listFiles()
91 321 mdecorde
                for(File f : files)
92 321 mdecorde
                {
93 757 sjacqu01
                        File modelfile = new File(Toolbox.getPreference(Toolbox.TREETAGGER_MODELS_PATH),"/afr.par");
94 321 mdecorde
                        File infile = f
95 321 mdecorde
                        File outfile = new File(rootDir+"/treetagger",f.getName());
96 321 mdecorde
                        println("3- APPLY TT on : "+infile+" with : "+modelfile +" >>  "+outfile);
97 321 mdecorde
98 757 sjacqu01
                        TreeTagger tt = new TreeTagger(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/");
99 321 mdecorde
                        tt.settoken();
100 321 mdecorde
                        tt.setlemma();
101 321 mdecorde
                        tt.setquiet();
102 321 mdecorde
                        tt.setsgml();
103 321 mdecorde
                        tt.setnounknown();
104 321 mdecorde
                        tt.seteostag("<s>");
105 321 mdecorde
                        tt.treetagger( modelfile.getAbsolutePath(), infile.getAbsolutePath(), outfile.getAbsolutePath())
106 321 mdecorde
                        //infile.delete();
107 321 mdecorde
                }
108 321 mdecorde
109 321 mdecorde
                //BUILD STAND-OFF FILES
110 321 mdecorde
                //contains txm:application/txm:commandLine
111 321 mdecorde
                File reportFile = new File(rootDir,"NLPToolsParameters.xml");
112 321 mdecorde
113 321 mdecorde
                String respPerson = System.getProperty("user.name");
114 321 mdecorde
                String respId = "txm";
115 321 mdecorde
                String respDesc = "NLP annotation tool";
116 321 mdecorde
                String respDate = "";
117 321 mdecorde
                String respWhen = ""
118 321 mdecorde
119 321 mdecorde
                String appIdent = "TreeTagger";
120 321 mdecorde
                String appVersion = "3.2";
121 321 mdecorde
122 321 mdecorde
                String distributor = "";
123 321 mdecorde
                String publiStmt = """""";
124 321 mdecorde
                String sourceStmt = """""";
125 321 mdecorde
126 321 mdecorde
                def types = [pos,lemme];
127 321 mdecorde
                def typesTITLE = ["",""];
128 321 mdecorde
                def typesDesc = ["",""];
129 321 mdecorde
                def typesTAGSET = ["",""];
130 321 mdecorde
                def typesWEB = ["",""];
131 321 mdecorde
                String idform ="w_";
132 321 mdecorde
133 321 mdecorde
                files = new File(rootDir,"treetagger").listFiles()
134 321 mdecorde
                for(File f : files)
135 321 mdecorde
                {
136 321 mdecorde
                        String target = f.getAbsolutePath();
137 321 mdecorde
                        File ttfile = f
138 321 mdecorde
                        File posfile = new File(rootDir+"annotations/",f.getName()+"STOFF.xml");
139 321 mdecorde
140 321 mdecorde
                        def encoding ="UTF-8";
141 321 mdecorde
                        def transfo = new CSV2W_ANA();
142 321 mdecorde
                        println("build w-interp "+ttfile.getName()+ ">>"+posfile.getName())
143 321 mdecorde
                        transfo.setAnnotationTypes( types, typesDesc, typesTAGSET, typesWEB, idform);
144 321 mdecorde
                        transfo.setResp(respId, respDesc,respDate, respPerson, respWhen);
145 321 mdecorde
                        transfo.setApp(appIdent, appVersion);
146 321 mdecorde
                        transfo.setTarget(target, reportFile);
147 321 mdecorde
                        transfo.setInfos(distributor,  publiStmt, sourceStmt);
148 321 mdecorde
                        transfo.process( ttfile, posfile, encoding );
149 321 mdecorde
                }
150 321 mdecorde
151 321 mdecorde
                files = new File(rootDir,"annotations").listFiles();
152 321 mdecorde
                List<File> txmfiles = new File(rootDir,"txm").listFiles();
153 321 mdecorde
                files.sort();
154 321 mdecorde
                txmfiles.sort();
155 321 mdecorde
                for(int i = 0 ; i< files.size();i++)
156 321 mdecorde
                {
157 321 mdecorde
                        File srcfile = txmfiles.get(i);
158 321 mdecorde
                        File pos1file = files.get(i);
159 321 mdecorde
                        File temp = new File(rootDir,"temp");
160 321 mdecorde
161 321 mdecorde
                        println("5- inject annotation in file : "+srcfile+" with : "+pos1file );
162 321 mdecorde
163 321 mdecorde
                        def builder = new org.txm.scripts.teitxm.AnnotationInjection(srcfile.toURL(), pos1file.toURL(), milestones);
164 321 mdecorde
                        builder.transfomFile(temp.getParent(),temp.getName());
165 321 mdecorde
166 321 mdecorde
                        if (!(srcfile.delete() && temp.renameTo(srcfile))) println "Warning can't rename file "+temp+" to "+srcfile
167 321 mdecorde
                }
168 321 mdecorde
169 321 mdecorde
        }
170 321 mdecorde
171 321 mdecorde
        /**
172 321 mdecorde
         * The main method.
173 321 mdecorde
         *
174 321 mdecorde
         * @param args the arguments
175 321 mdecorde
         */
176 321 mdecorde
        public static void main(String[] args)
177 321 mdecorde
        {
178 321 mdecorde
                File rootDir = new File("~/xml/bvh/");
179 321 mdecorde
                new annotate().run(rootDir,);
180 321 mdecorde
        }
181 321 mdecorde
}