Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / perrault / annotate.groovy @ 966

History | View | Annotate | Download (5.4 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 479 mdecorde
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (ven. 08 nov. 2013) $
25 321 mdecorde
// $LastChangedRevision: 2569 $
26 321 mdecorde
// $LastChangedBy: mdecorde $
27 321 mdecorde
//
28 321 mdecorde
package org.txm.importer.perrault
29 321 mdecorde
30 321 mdecorde
import org.txm.importer.*;
31 321 mdecorde
import org.txm.scripts.*;
32 321 mdecorde
import org.txm.importer.cwb.CwbEncode
33 321 mdecorde
import org.txm.importer.cwb.CwbMakeAll
34 927 mdecorde
import org.txm.importer.xmltxm.BuildTTSrc;
35 927 mdecorde
import org.txm.importer.xmltxm.*;
36 321 mdecorde
import org.txm.utils.treetagger.TreeTagger;
37 321 mdecorde
import org.txm.Toolbox;
38 321 mdecorde
39 321 mdecorde
// TODO: Auto-generated Javadoc
40 321 mdecorde
/**
41 321 mdecorde
 * The Class annotate.
42 321 mdecorde
 */
43 321 mdecorde
class annotate {
44 321 mdecorde
45 321 mdecorde
        /**
46 321 mdecorde
         * Run.
47 321 mdecorde
         *
48 321 mdecorde
         * @param rootDirFile the root dir file
49 321 mdecorde
         */
50 321 mdecorde
        public static void run(File rootDirFile)
51 321 mdecorde
        {
52 321 mdecorde
                String rootDir = rootDirFile.getAbsolutePath()+"/";
53 321 mdecorde
54 321 mdecorde
                //cleaning
55 321 mdecorde
                new File(rootDir,"annotations").deleteDir();
56 321 mdecorde
                new File(rootDir,"annotations").mkdir();
57 321 mdecorde
                new File(rootDir,"treetagger").deleteDir();
58 321 mdecorde
                new File(rootDir,"treetagger").mkdir();
59 321 mdecorde
60 321 mdecorde
                ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
61 321 mdecorde
                milestones.add("tagUsage");
62 321 mdecorde
                milestones.add("pb");
63 321 mdecorde
                milestones.add("lb");
64 321 mdecorde
65 321 mdecorde
                List<File> files = new File(rootDir,"txm").listFiles()
66 321 mdecorde
                //BUILD TT FILE READY TO BE TAGGED
67 321 mdecorde
                for(File f : files)
68 321 mdecorde
                {
69 321 mdecorde
                        File srcfile = f;
70 321 mdecorde
                        File resultfile = new File(rootDir+"treetagger/",f.getName()+".tt");
71 321 mdecorde
                        new BuildTTSrc(srcfile.toURL()).process(resultfile)
72 321 mdecorde
                }
73 321 mdecorde
74 321 mdecorde
                //APPLY TREETAGGER
75 321 mdecorde
                files = new File(rootDir,"treetagger").listFiles()
76 321 mdecorde
                for(File f : files)
77 321 mdecorde
                {
78 757 sjacqu01
                        File modelfile = new File(Toolbox.getPreference(Toolbox.TREETAGGER_MODELS_PATH),"/fr.par");
79 321 mdecorde
                        if(!modelfile.exists())
80 321 mdecorde
                        {
81 321 mdecorde
                                println "Skipping ANNOTATE: Incorrect modelfile path: "+modelfile;
82 321 mdecorde
                                return;
83 321 mdecorde
                        }
84 321 mdecorde
                        File infile = f
85 321 mdecorde
                        File outfile = new File(f.getParent(),f.getName()+"-out.tt");
86 321 mdecorde
                        println("3- APPLY TT on : "+infile+" with : "+modelfile +" >>  "+outfile);
87 321 mdecorde
88 757 sjacqu01
                        if(!new File(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/").exists())
89 321 mdecorde
                        {
90 757 sjacqu01
                                println("Path to TreeTagger is wrong "+Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/")
91 321 mdecorde
                                return;
92 321 mdecorde
                        }
93 757 sjacqu01
                        TreeTagger tt = new TreeTagger(Toolbox.getPreference(Toolbox.TREETAGGER_INSTALL_PATH)+"/bin/");
94 321 mdecorde
                        tt.settoken();
95 321 mdecorde
                        tt.setlemma();
96 321 mdecorde
                        tt.setquiet();
97 321 mdecorde
                        tt.setsgml();
98 321 mdecorde
                        tt.setnounknown();
99 321 mdecorde
                        tt.seteostag("<s>");
100 321 mdecorde
                        tt.treetagger( modelfile.getAbsolutePath(), infile.getAbsolutePath(), outfile.getAbsolutePath())
101 321 mdecorde
                        infile.delete();
102 321 mdecorde
                }
103 321 mdecorde
104 321 mdecorde
                //BUILD STAND-OFF FILES
105 321 mdecorde
                //contains txm:application/txm:commandLine
106 321 mdecorde
                File reportFile = new File(rootDir,"NLPToolsParameters.xml");
107 321 mdecorde
108 321 mdecorde
                String respPerson = System.getProperty("user.name");
109 321 mdecorde
                String respId = "txm";
110 321 mdecorde
                String respDesc = "NLP annotation tool";
111 321 mdecorde
                String respDate = "";
112 321 mdecorde
                String respWhen = ""
113 321 mdecorde
114 321 mdecorde
                String appIdent = "TreeTagger";
115 321 mdecorde
                String appVersion = "3.2";
116 321 mdecorde
117 321 mdecorde
                String distributor = "";
118 321 mdecorde
                String publiStmt = """""";
119 321 mdecorde
                String sourceStmt = """""";
120 321 mdecorde
121 321 mdecorde
                def types = ["pos","lemme"];
122 321 mdecorde
                def typesTITLE = ["",""];
123 321 mdecorde
                def typesDesc = ["",""];
124 321 mdecorde
                def typesTAGSET = ["",""];
125 321 mdecorde
                def typesWEB = ["",""];
126 321 mdecorde
                String idform ="w_c_";
127 321 mdecorde
128 321 mdecorde
                files = new File(rootDir,"treetagger").listFiles()
129 321 mdecorde
                for(File f : files)
130 321 mdecorde
                {
131 321 mdecorde
                        String target = f.getAbsolutePath();
132 321 mdecorde
                        File ttfile = f
133 321 mdecorde
                        File posfile = new File(rootDir+"annotations/",f.getName()+"-STOFF.xml");
134 321 mdecorde
135 321 mdecorde
                        def encoding ="UTF-8";
136 321 mdecorde
                        def transfo = new CSV2W_ANA();
137 321 mdecorde
                        println("build w-interp "+ttfile.getName()+ ">>"+posfile.getName())
138 321 mdecorde
                        transfo.setAnnotationTypes( types, typesDesc, typesTAGSET, typesWEB, idform);
139 321 mdecorde
                        transfo.setResp(respId, respDesc,respDate, respPerson, respWhen);
140 321 mdecorde
                        transfo.setApp(appIdent, appVersion);
141 321 mdecorde
                        transfo.setTarget(target, reportFile);
142 321 mdecorde
                        transfo.setInfos(distributor,  publiStmt, sourceStmt);
143 321 mdecorde
                        transfo.process( ttfile, posfile, encoding );
144 321 mdecorde
                }
145 321 mdecorde
146 321 mdecorde
                files = new File(rootDir,"annotations").listFiles();
147 321 mdecorde
                List<File> txmfiles = new File(rootDir,"txm").listFiles();
148 321 mdecorde
                files.sort();
149 321 mdecorde
                txmfiles.sort();
150 321 mdecorde
                for(int i = 0 ; i< files.size();i++)
151 321 mdecorde
                {
152 321 mdecorde
                        File srcfile = txmfiles.get(i);
153 321 mdecorde
154 321 mdecorde
                        File pos1file = files.get(i);
155 321 mdecorde
                        File temp = new File(rootDir,"temp");
156 321 mdecorde
157 321 mdecorde
                        println("5- inject annotation in file : "+srcfile+" with : "+pos1file );
158 321 mdecorde
159 321 mdecorde
                        def builder = new org.txm.scripts.teitxm.AnnotationInjection(srcfile.toURL(), pos1file.toURL(), milestones);
160 321 mdecorde
                        builder.transfomFile(temp.getParent(),temp.getName());
161 321 mdecorde
162 321 mdecorde
                        if (!(srcfile.delete() && temp.renameTo(srcfile))) println "Warning can't rename file "+temp+" to "+srcfile
163 321 mdecorde
                }
164 321 mdecorde
        }
165 321 mdecorde
166 321 mdecorde
        /**
167 321 mdecorde
         * The main method.
168 321 mdecorde
         *
169 321 mdecorde
         * @param args the arguments
170 321 mdecorde
         */
171 321 mdecorde
        public static void main(String[] args)
172 321 mdecorde
        {
173 321 mdecorde
                File rootDir = new File("~/xml/perrault/");
174 321 mdecorde
                new annotate().run(rootDir);
175 321 mdecorde
        }
176 321 mdecorde
}