Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / nlp / testTnT.groovy @ 1000

History | View | Annotate | Download (17.6 kB)

1 321 mdecorde
/**
2 321 mdecorde
 * Main.
3 321 mdecorde
 *
4 321 mdecorde
 * @param args the args
5 321 mdecorde
 */
6 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
7 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
9 321 mdecorde
// Sophia Antipolis, University of Paris 3.
10 321 mdecorde
//
11 321 mdecorde
// The TXM platform is free software: you can redistribute it
12 321 mdecorde
// and/or modify it under the terms of the GNU General Public
13 321 mdecorde
// License as published by the Free Software Foundation,
14 321 mdecorde
// either version 2 of the License, or (at your option) any
15 321 mdecorde
// later version.
16 321 mdecorde
//
17 321 mdecorde
// The TXM platform is distributed in the hope that it will be
18 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
19 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20 321 mdecorde
// PURPOSE. See the GNU General Public License for more
21 321 mdecorde
// details.
22 321 mdecorde
//
23 321 mdecorde
// You should have received a copy of the GNU General
24 321 mdecorde
// Public License along with the TXM platform. If not, see
25 321 mdecorde
// http://www.gnu.org/licenses.
26 321 mdecorde
//
27 321 mdecorde
//
28 321 mdecorde
//
29 479 mdecorde
// $LastChangedDate: 2016-03-29 09:51:35 +0200 (mar. 29 mars 2016) $
30 321 mdecorde
// $LastChangedRevision: 3185 $
31 321 mdecorde
// $LastChangedBy: mdecorde $
32 321 mdecorde
//
33 1000 mdecorde
package org.txm.scripts.scripts;
34 321 mdecorde
35 321 mdecorde
/**
36 321 mdecorde
 * test tnt Wrapper :
37 321 mdecorde
 *  add NLP annotations
38 321 mdecorde
 *  build result matrix
39 321 mdecorde
 *
40 321 mdecorde
 */
41 321 mdecorde
42 321 mdecorde
import org.txm.utils.*;
43 986 mdecorde
import org.txm.scripts.importer.*;
44 986 mdecorde
import org.txm.scripts.importer.RGAQCJ.*;
45 321 mdecorde
46 321 mdecorde
// TODO: Auto-generated Javadoc
47 321 mdecorde
/* (non-Javadoc)
48 321 mdecorde
 * @see groovy.lang.Script#run()
49 321 mdecorde
 */
50 321 mdecorde
String rootDir = "~/xml/rgaqcj/TnT/";
51 321 mdecorde
String modelsDir = rootDir+"models/";
52 321 mdecorde
String textsDir = rootDir+"texts/";
53 321 mdecorde
String projsDir = rootDir+"proj/";
54 321 mdecorde
String srcDir = rootDir+"src/"
55 321 mdecorde
String xslDir = rootDir+"xsl/"
56 321 mdecorde
String anaDir = rootDir+"anainline/"
57 321 mdecorde
new File(modelsDir).mkdir();
58 321 mdecorde
new File(textsDir).mkdir();
59 321 mdecorde
new File(projsDir).mkdir();
60 321 mdecorde
61 321 mdecorde
def texts =["roland","artu","qjm","commyn1","jehpar","rgaqcj"]
62 321 mdecorde
//def texts =["commyn1","jehpar"]
63 321 mdecorde
String initiales = "RAQCJZ"
64 321 mdecorde
65 321 mdecorde
//import src
66 321 mdecorde
def srcfiles = ["roland.xml","artu.xml","qjm.xml","commyn1.xml","jehpar.xml"];
67 321 mdecorde
def anafiles = ["roland-ana.xml","artu-ana.xml","qjm-ana.xml","commyn1-ana.xml","jehpar-ana.xml"];
68 321 mdecorde
def xslfiles = ["bfm2txm-w.xsl","bfm2txm-w.xsl","bfm2txm-w.xsl","bfm2txm-w.xsl","bfm2txm-w.xsl"];
69 321 mdecorde
/*
70 321 mdecorde
 for(int i=0 ; i < xslfiles.size() ; i++)
71 321 mdecorde
 {
72 321 mdecorde
 String xslfile = xslDir+xslfiles[i];
73 321 mdecorde
 String infile = srcDir+srcfiles[i];
74 321 mdecorde
 String outfile = anaDir+anafiles[i];
75 321 mdecorde
 println("create xml-txm from "+srcfiles[i]);
76 321 mdecorde
 ApplyXsl a = new ApplyXsl(xslfile);
77 321 mdecorde
 a.process(infile,outfile);
78 321 mdecorde
 }
79 321 mdecorde
 //Import to CWB
80 714 mdecorde
 BuildXmlRGAQCJ.process( anafiles,  rootDir)
81 321 mdecorde
 */
82 321 mdecorde
//build CWBDecode wrapper
83 321 mdecorde
ProcessBuilderBuilder.build(new File("src/groovy/org/txm/scripts/cwb-decode-wrapper-definition.xml"), new File("src/groovy/org/textometrie/scripts/CwbDecode.groovy"));
84 321 mdecorde
GroovyClassLoader gcl = new GroovyClassLoader();
85 321 mdecorde
gcl.addClasspath(".");
86 321 mdecorde
87 321 mdecorde
String registryPath = rootDir+"/registry";
88 321 mdecorde
String cwbdecodeexecDir = "~/Bureau/trunkCWB/cwb-3.0/utils/"
89 321 mdecorde
90 321 mdecorde
Class clazz = gcl.parseClass(new File("src/groovy/org/txm/scripts/CwbDecode.groovy"));
91 321 mdecorde
def cwbdecodewrapper = clazz.newInstance(cwbdecodeexecDir);
92 321 mdecorde
def pAttributesTrain = ["word","cat"];
93 321 mdecorde
def pAttributesTag = ["word"];
94 321 mdecorde
def sAttributes = ["s"];
95 321 mdecorde
cwbdecodewrapper.setC();
96 321 mdecorde
cwbdecodewrapper.setP(pAttributesTag);
97 321 mdecorde
cwbdecodewrapper.setS(sAttributes);
98 321 mdecorde
cwbdecodewrapper.setr(registryPath);
99 321 mdecorde
cwbdecodewrapper.debug(true);
100 321 mdecorde
int[] nbmots = [0,35306,134170,173526,198929,228149];
101 321 mdecorde
102 321 mdecorde
//create file to proj on
103 321 mdecorde
for(int i = 0 ; i < texts.size() ; i++)
104 321 mdecorde
{
105 321 mdecorde
        String text = texts[i];
106 321 mdecorde
        println("creation TTsrc "+text);
107 321 mdecorde
108 321 mdecorde
        //build src files
109 321 mdecorde
        cwbdecodewrapper.setP(pAttributesTag);
110 321 mdecorde
111 321 mdecorde
        if(!(initiales[i]+"").equals("Z"))
112 321 mdecorde
        {
113 321 mdecorde
                if(i > 0)
114 321 mdecorde
                        cwbdecodewrapper.sets(nbmots[i]+1)
115 321 mdecorde
                else
116 321 mdecorde
                        cwbdecodewrapper.sets(nbmots[i])
117 321 mdecorde
                cwbdecodewrapper.sete(nbmots[i+1])
118 321 mdecorde
        }
119 321 mdecorde
        else
120 321 mdecorde
        {
121 321 mdecorde
                cwbdecodewrapper.unsets();
122 321 mdecorde
                cwbdecodewrapper.unsete();
123 321 mdecorde
        }
124 321 mdecorde
        FileOutputStream fos = new FileOutputStream(textsDir+texts[i]+".t");
125 321 mdecorde
        PrintStream ps = new PrintStream(fos);
126 321 mdecorde
        def out = System.out;
127 321 mdecorde
        System.setOut(ps);
128 321 mdecorde
129 321 mdecorde
        if(System.getProperty("os.name").contains("Windows"))
130 321 mdecorde
                cwbdecodewrapper.cwbdecodeexe("RGAQCJ")
131 321 mdecorde
        else
132 321 mdecorde
                cwbdecodewrapper.cwbdecode("RGAQCJ")
133 321 mdecorde
        System.setOut(out);
134 321 mdecorde
135 321 mdecorde
        //build train files
136 321 mdecorde
        cwbdecodewrapper.setP(pAttributesTrain);
137 321 mdecorde
138 321 mdecorde
        fos = new FileOutputStream(modelsDir+texts[i]+".t");
139 321 mdecorde
        ps = new PrintStream(fos);
140 321 mdecorde
        System.setOut(ps);
141 321 mdecorde
142 321 mdecorde
        if(System.getProperty("os.name").contains("Windows"))
143 321 mdecorde
                cwbdecodewrapper.cwbdecodeexe("RGAQCJ")
144 321 mdecorde
        else
145 321 mdecorde
                cwbdecodewrapper.cwbdecode("RGAQCJ")
146 321 mdecorde
        System.setOut(out);
147 321 mdecorde
148 321 mdecorde
}
149 321 mdecorde
150 321 mdecorde
//need to replace <s> by nothing and </s> by \n
151 321 mdecorde
String encoding = "UTF-8"
152 321 mdecorde
for(String text : texts)
153 321 mdecorde
{
154 321 mdecorde
        //patch src files
155 321 mdecorde
        File f = new File(textsDir,text+".t");
156 321 mdecorde
        File temp = new File("tempFileCVScleaner")
157 321 mdecorde
        println("patch texts files "+f+": rmv <s> and replace </s>");
158 321 mdecorde
        Reader reader = new InputStreamReader(new FileInputStream(f),encoding);
159 321 mdecorde
        Writer writer = new FileWriter(temp);
160 321 mdecorde
        reader.eachLine
161 321 mdecorde
                        {
162 321 mdecorde
                                if(it.trim().startsWith("</s"))
163 321 mdecorde
                                        writer.write("\n")
164 321 mdecorde
                                else if(it.trim().startsWith("<s"))
165 321 mdecorde
                                        writer.write("")
166 321 mdecorde
                                else
167 321 mdecorde
                                        writer.write(it+"\n")
168 321 mdecorde
                        }
169 321 mdecorde
        reader.close();
170 321 mdecorde
        writer.close();
171 321 mdecorde
        if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f
172 321 mdecorde
}
173 321 mdecorde
//need to replace <s> by nothing and </s> by \n
174 321 mdecorde
for(String text : texts)
175 321 mdecorde
{
176 321 mdecorde
        //patch training files
177 321 mdecorde
        File f = new File(modelsDir,text+".t");
178 321 mdecorde
        File temp = new File("tempFileCVScleaner")
179 321 mdecorde
        println("patch models files "+f+": rmv <s> and replace </s>");
180 321 mdecorde
        Reader reader = new InputStreamReader(new FileInputStream(f),encoding);
181 321 mdecorde
        Writer writer = new FileWriter(temp);
182 321 mdecorde
        reader.eachLine
183 321 mdecorde
                        {
184 321 mdecorde
                                if(it.trim().startsWith("</s"))
185 321 mdecorde
                                        writer.write("\n")
186 321 mdecorde
                                else if(it.trim().startsWith("<s"))
187 321 mdecorde
                                        writer.write("")
188 321 mdecorde
                                else
189 321 mdecorde
                                        writer.write(it+"\n")
190 321 mdecorde
                        }
191 321 mdecorde
        reader.close();
192 321 mdecorde
        writer.close();
193 321 mdecorde
        if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f
194 321 mdecorde
}
195 321 mdecorde
196 321 mdecorde
//build Tnt Wrapper
197 321 mdecorde
//ProcessBuilderBuilder.build(new File("src/groovy/org/txm/scripts/tnt-wrapper-definition.xml"), new File("src/groovy/org/textometrie/scripts/TnT.groovy"));
198 321 mdecorde
gcl = new GroovyClassLoader();
199 321 mdecorde
gcl.addClasspath(".");
200 321 mdecorde
String tntexecDir = "~/Bureau/tnt/"
201 321 mdecorde
clazz = gcl.parseClass(new File("src/groovy/org/txm/scripts/TnT.groovy"));
202 321 mdecorde
def tntwrapper = clazz.newInstance(tntexecDir);
203 321 mdecorde
204 321 mdecorde
def ttrezfiles = [];
205 321 mdecorde
for(String text : texts)
206 321 mdecorde
{
207 321 mdecorde
        println "Apprentissage de "+text+"..."
208 321 mdecorde
        tntwrapper.tntpara(new File(modelsDir,text+".t"));
209 321 mdecorde
        //new File(text+".123").renameTo(new File(modelsDir,text+".123"));
210 321 mdecorde
        //new File(text+".lex").renameTo(new File(modelsDir,text+".lex"));
211 321 mdecorde
        for(String target : texts)
212 321 mdecorde
        {
213 321 mdecorde
                println "Projection de "+target+" sur "+target+"..."
214 321 mdecorde
                for(int mode=3 ; mode <=3 ; mode++)
215 321 mdecorde
                {
216 321 mdecorde
                        println("Mode "+mode+"...")
217 321 mdecorde
                        tntwrapper.setu(2);
218 321 mdecorde
219 321 mdecorde
                        FileOutputStream fos = new FileOutputStream(projsDir+"model_"+text+"_target_"+target+"_mode"+mode+".t");
220 321 mdecorde
                        ttrezfiles.add("model_"+text+"_target_"+target+"_mode"+mode+".t")
221 321 mdecorde
                        PrintStream ps = new PrintStream(fos);
222 321 mdecorde
                        def out = System.out;
223 321 mdecorde
                        System.setOut(ps);
224 321 mdecorde
225 321 mdecorde
                        tntwrapper.tnt(text,new File(textsDir,target+".t"));
226 321 mdecorde
227 321 mdecorde
                        System.setOut(out);
228 321 mdecorde
                }
229 321 mdecorde
        }
230 321 mdecorde
}
231 321 mdecorde
232 321 mdecorde
//def ttrezfiles = ["model_roland_targetroland_mode2.t", "model_roland_targetroland_mode3.t", "model_roland_targetartu_mode2.t", "model_roland_targetartu_mode3.t", "model_roland_targetqjm_mode2.t", "model_roland_targetqjm_mode3.t", "model_roland_targetcommyn1_mode2.t", "model_roland_targetcommyn1_mode3.t", "model_roland_targetjehpar_mode2.t", "model_roland_targetjehpar_mode3.t", "model_roland_targetrgaqcj_mode2.t", "model_roland_targetrgaqcj_mode3.t", "model_artu_targetroland_mode2.t","model_artu_targetroland_mode3.t", "model_artu_targetartu_mode2.t", "model_artu_targetartu_mode3.t", "model_artu_targetqjm_mode2.t", "model_artu_targetqjm_mode3.t", "model_artu_targetcommyn1_mode2.t", "model_artu_targetcommyn1_mode3.t", "model_artu_targetjehpar_mode2.t", "model_artu_targetjehpar_mode3.t", "model_artu_targetrgaqcj_mode2.t", "model_artu_targetrgaqcj_mode3.t", "model_qjm_targetroland_mode2.t, model_qjm_targetroland_mode3.t, model_qjm_targetartu_mode2.t, model_qjm_targetartu_mode3.t, model_qjm_targetqjm_mode2.t, model_qjm_targetqjm_mode3.t, model_qjm_targetcommyn1_mode2.t, model_qjm_targetcommyn1_mode3.t, model_qjm_targetjehpar_mode2.t, model_qjm_targetjehpar_mode3.t, model_qjm_targetrgaqcj_mode2.t, model_qjm_targetrgaqcj_mode3.t, model_commyn1_targetroland_mode2.t, model_commyn1_targetroland_mode3.t, model_commyn1_targetartu_mode2.t, model_commyn1_targetartu_mode3.t, model_commyn1_targetqjm_mode2.t, model_commyn1_targetqjm_mode3.t, model_commyn1_targetcommyn1_mode2.t, model_commyn1_targetcommyn1_mode3.t, model_commyn1_targetjehpar_mode2.t, model_commyn1_targetjehpar_mode3.t, model_commyn1_targetrgaqcj_mode2.t, model_commyn1_targetrgaqcj_mode3.t, model_jehpar_targetroland_mode2.t, model_jehpar_targetroland_mode3.t, model_jehpar_targetartu_mode2.t, model_jehpar_targetartu_mode3.t, model_jehpar_targetqjm_mode2.t, model_jehpar_targetqjm_mode3.t, model_jehpar_targetcommyn1_mode2.t, model_jehpar_targetcommyn1_mode3.t, model_jehpar_targetjehpar_mode2.t, model_jehpar_targetjehpar_mode3.t, model_jehpar_targetrgaqcj_mode2.t, model_jehpar_targetrgaqcj_mode3.t, model_rgaqcj_targetroland_mode2.t, model_rgaqcj_targetroland_mode3.t, model_rgaqcj_targetartu_mode2.t, model_rgaqcj_targetartu_mode3.t, model_rgaqcj_targetqjm_mode2.t, model_rgaqcj_targetqjm_mode3.t, model_rgaqcj_targetcommyn1_mode2.t, model_rgaqcj_targetcommyn1_mode3.t", "model_rgaqcj_targetjehpar_mode2.t", "model_rgaqcj_targetjehpar_mode3.t", "model_rgaqcj_targetrgaqcj_mode2.t", "model_rgaqcj_targetrgaqcj_mode3.t"];
233 321 mdecorde
println ttrezfiles;
234 321 mdecorde
//Build process infos
235 321 mdecorde
//remove lines which starts with %%
236 321 mdecorde
for(String text : ttrezfiles)
237 321 mdecorde
{
238 321 mdecorde
        //def encoding ="UTF-8"
239 321 mdecorde
        println "patch proj file"+text+" : remove %% lines and blank lines AND replace n\t by only one \t";
240 321 mdecorde
        File f = new File(projsDir,text);
241 321 mdecorde
        File temp = new File("tempFileCVScleaner")
242 321 mdecorde
        Reader reader = new InputStreamReader(new FileInputStream(f),encoding);
243 321 mdecorde
        Writer writer = new FileWriter(temp);
244 321 mdecorde
        reader.eachLine
245 321 mdecorde
                        {
246 321 mdecorde
                                if(it.trim().startsWith("%%") || it.length() == 0)
247 321 mdecorde
                                        writer.write("")
248 321 mdecorde
                                else
249 321 mdecorde
                                        writer.write(it.replaceAll("(\t)+","\t")+"\n")
250 321 mdecorde
                        }
251 321 mdecorde
        reader.close();
252 321 mdecorde
        writer.close();
253 321 mdecorde
        if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f
254 321 mdecorde
}
255 321 mdecorde
256 321 mdecorde
for(String text : texts)
257 321 mdecorde
{
258 321 mdecorde
        //def encoding ="UTF-8"
259 321 mdecorde
        println "patch model file"+text+" : remove blank lines";
260 321 mdecorde
        File f = new File(modelsDir,text+".t");
261 321 mdecorde
        File temp = new File("tempFileCVScleaner")
262 321 mdecorde
        Reader reader = new InputStreamReader(new FileInputStream(f),encoding);
263 321 mdecorde
        Writer writer = new FileWriter(temp);
264 321 mdecorde
        reader.eachLine
265 321 mdecorde
                        {
266 321 mdecorde
                                if(it.length() == 0)
267 321 mdecorde
                                        writer.write("")
268 321 mdecorde
                                else
269 321 mdecorde
                                        writer.write(it+"\n")
270 321 mdecorde
                        }
271 321 mdecorde
        reader.close();
272 321 mdecorde
        writer.close();
273 321 mdecorde
        if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f
274 321 mdecorde
}
275 321 mdecorde
276 321 mdecorde
HSQLFunctions.clearAll();
277 321 mdecorde
//import proj table into hsql
278 321 mdecorde
for(int i=0; i < ttrezfiles.size();i++)
279 321 mdecorde
{
280 321 mdecorde
        String csvfile = rootDir+"proj/"+ttrezfiles[i];
281 321 mdecorde
        def argsname = ["form","cat"];
282 321 mdecorde
        def types = ["VARCHAR(30)","VARCHAR(30)"];
283 321 mdecorde
        int linenumber = HSQLFunctions.ImportOrderedCSVTable(ttrezfiles[i].replace(".",""), argsname,types,new File(csvfile),"\t","UTF-8");
284 321 mdecorde
        println("create Table "+ttrezfiles[i].replace(".","")+" : "+linenumber+" lines");
285 321 mdecorde
}
286 321 mdecorde
287 321 mdecorde
for(String text : texts)
288 321 mdecorde
{
289 321 mdecorde
        File f = new File(modelsDir,text+".t");
290 321 mdecorde
        def argsname = ["form","cat"];
291 321 mdecorde
        def types = ["VARCHAR(30)","VARCHAR(30)"];
292 321 mdecorde
        int linenumber = HSQLFunctions.ImportOrderedCSVTable("lexbrut_"+text.replace(".",""), argsname,types,f,"\t","UTF-8");
293 321 mdecorde
        println("create Table lexbrut_"+text.replace(".","")+" : "+linenumber+" lines");
294 321 mdecorde
}
295 321 mdecorde
296 321 mdecorde
//calc richesses lexicales
297 321 mdecorde
LinkedHashMap<String,ArrayList<String>> richesses = new LinkedHashMap<String,ArrayList<String>>();
298 321 mdecorde
richesses.put("Text", ["TTrola","TTartu","TTqjm","TTcomm","TTjehpar","TTrgaqcj"]);
299 321 mdecorde
300 321 mdecorde
LinkedHashMap<String,ArrayList<String>> richessesCat = new LinkedHashMap<String,ArrayList<String>>();
301 321 mdecorde
richessesCat.put("Text", ["TTrola","TTartu","TTqjm","TTcomm","TTjehpar","TTrgaqcj"]);
302 321 mdecorde
303 321 mdecorde
LinkedHashMap<String,ArrayList<String>> richessesocc = new LinkedHashMap<String,ArrayList<String>>();
304 321 mdecorde
richessesocc.put("Text", ["TTrola","TTartu","TTqjm","TTcomm","TTjehpar","TTrgaqcj"]);
305 321 mdecorde
306 321 mdecorde
LinkedHashMap<String,ArrayList<String>> richessesoccCat = new LinkedHashMap<String,ArrayList<String>>();
307 321 mdecorde
richessesoccCat.put("Text", ["TTrola","TTartu","TTqjm","TTcomm","TTjehpar","TTrgaqcj"]);
308 321 mdecorde
for(String text : texts)
309 321 mdecorde
{
310 321 mdecorde
        richesses.put(text, new ArrayList<String>());
311 321 mdecorde
        richessesCat.put(text, new ArrayList<String>());
312 321 mdecorde
        richessesocc.put(text, new ArrayList<String>());
313 321 mdecorde
        richessesoccCat.put(text, new ArrayList<String>());
314 321 mdecorde
}
315 321 mdecorde
316 321 mdecorde
for(String text : texts)
317 321 mdecorde
{
318 321 mdecorde
        for(String model : texts)
319 321 mdecorde
        {
320 321 mdecorde
                if(text.matches(model))
321 321 mdecorde
                {
322 321 mdecorde
                        richesses.get(text).add( "0" ) ;
323 321 mdecorde
                        richessesCat.get(text).add( "0") ;
324 321 mdecorde
                        richessesocc.get(text).add( "0" ) ;
325 321 mdecorde
                        richessesoccCat.get(text).add( "0" ) ;
326 321 mdecorde
                }
327 321 mdecorde
                else
328 321 mdecorde
                {
329 321 mdecorde
                        String query = "SELECT count(*) FROM ((SELECT DISTINCT form FROM lexbrut_"+text+" ) MINUS (SELECT DISTINCT form FROM lexbrut_"+model+"))"
330 321 mdecorde
                        println(query);
331 321 mdecorde
                        String query2 = "SELECT count(*) FROM ((SELECT DISTINCT cat FROM lexbrut_"+text+" ) MINUS (SELECT DISTINCT cat FROM lexbrut_"+model+"))"
332 321 mdecorde
                        println(query2);
333 321 mdecorde
                        String query3 = "SELECT count(form) FROM lexbrut_"+text+" WHERE form NOT IN (SELECT form FROM lexbrut_"+model+")"
334 321 mdecorde
                        println(query3);
335 321 mdecorde
                        String query4 = "SELECT count(cat) FROM lexbrut_"+text+" WHERE cat NOT IN (SELECT cat FROM lexbrut_"+model+")"
336 321 mdecorde
                        println(query4);
337 321 mdecorde
                        HSQLFunctions.getGroovySql().eachRow(query) {
338 321 mdecorde
                                def rich = it.getAt(0);
339 321 mdecorde
                                println("rich form :"+ rich);
340 321 mdecorde
                                richesses.get(text).add( ""+rich ) ;
341 321 mdecorde
                        }
342 321 mdecorde
343 321 mdecorde
                        HSQLFunctions.getGroovySql().eachRow(query2) {
344 321 mdecorde
                                def rich = it.getAt(0);
345 321 mdecorde
                                println("rich cat :"+ rich);
346 321 mdecorde
                                richessesCat.get(text).add( ""+rich ) ;
347 321 mdecorde
                        }
348 321 mdecorde
349 321 mdecorde
350 321 mdecorde
                        HSQLFunctions.getGroovySql().eachRow(query3) {
351 321 mdecorde
                                def rich = it.getAt(0);
352 321 mdecorde
                                println("richocc form :"+ rich);
353 321 mdecorde
                                richessesocc.get(text).add( ""+rich ) ;
354 321 mdecorde
                        }
355 321 mdecorde
356 321 mdecorde
                        HSQLFunctions.getGroovySql().eachRow(query4) {
357 321 mdecorde
                                def rich = it.getAt(0);
358 321 mdecorde
                                println("richocc cat :"+ rich);
359 321 mdecorde
                                richessesoccCat.get(text).add( ""+rich ) ;
360 321 mdecorde
                        }
361 321 mdecorde
                }
362 321 mdecorde
        }
363 321 mdecorde
}
364 321 mdecorde
365 321 mdecorde
LinkedHashMap<String,ArrayList<String>> matrix = new LinkedHashMap<String,ArrayList<String>>();
366 321 mdecorde
367 321 mdecorde
matrix.put("Text", ["T    ","Vf    ","Vc    ","TTrola","TTartu","TTqjm","TTcomm","TTjehpar","TTrgaqcj"]);
368 321 mdecorde
for(String text : texts)
369 321 mdecorde
{
370 321 mdecorde
        matrix.put(text, new ArrayList<String>());
371 321 mdecorde
}
372 321 mdecorde
373 321 mdecorde
ArrayList<Integer> countOccForm = new ArrayList<Integer>();
374 321 mdecorde
ArrayList<Integer> countOccCat = new ArrayList<Integer>();
375 321 mdecorde
ArrayList<Integer> countLexForm = new ArrayList<Integer>();
376 321 mdecorde
ArrayList<Integer> countLexCat = new ArrayList<Integer>();
377 321 mdecorde
378 321 mdecorde
for(String text : texts)
379 321 mdecorde
{
380 321 mdecorde
        println("count occ and voc : lexbrut_"+text)
381 321 mdecorde
        String query1 = "SELECT count(*) FROM (SELECT form from lexbrut_"+text+")";
382 321 mdecorde
        String query2 = "SELECT count(*) FROM (SELECT cat from lexbrut_"+text+")";
383 321 mdecorde
        String query3 = "SELECT count(*) FROM (SELECT DISTINCT form from lexbrut_"+text+")";
384 321 mdecorde
        String query4 = "SELECT count(*) FROM (SELECT DISTINCT cat from lexbrut_"+text+")";
385 321 mdecorde
386 321 mdecorde
        HSQLFunctions.getGroovySql().eachRow(query1) {
387 321 mdecorde
                def rich = it.getAt(0);
388 321 mdecorde
                println("count occ form :"+ rich);
389 321 mdecorde
                countOccForm.add( ""+rich ) ;
390 321 mdecorde
        }
391 321 mdecorde
        HSQLFunctions.getGroovySql().eachRow(query2) {
392 321 mdecorde
                def rich = it.getAt(0);
393 321 mdecorde
                println("coutn occ cat :"+ rich);
394 321 mdecorde
                countOccCat.add( ""+rich ) ;
395 321 mdecorde
        }
396 321 mdecorde
        HSQLFunctions.getGroovySql().eachRow(query3) {
397 321 mdecorde
                def rich = it.getAt(0);
398 321 mdecorde
                println("count lex form :"+ rich);
399 321 mdecorde
                countLexForm.add( ""+rich ) ;
400 321 mdecorde
        }
401 321 mdecorde
        HSQLFunctions.getGroovySql().eachRow(query4) {
402 321 mdecorde
                def rich = it.getAt(0);
403 321 mdecorde
                println("count lex cat :"+ rich);
404 321 mdecorde
                countLexCat.add( ""+rich ) ;
405 321 mdecorde
        }
406 321 mdecorde
        //println("col1 et 2 : "+nbOccurencesTxt.get("lexbrut_"+text)+" "+nbFormLexique.get("lex_"+text))
407 321 mdecorde
        matrix.get(text).add( "0") ;
408 321 mdecorde
        matrix.get(text).add( "0") ;
409 321 mdecorde
        matrix.get(text).add( "0") ;
410 321 mdecorde
}
411 321 mdecorde
412 321 mdecorde
for(String text : texts)
413 321 mdecorde
{
414 321 mdecorde
        int textindex = 0;
415 321 mdecorde
        for(String target : texts)
416 321 mdecorde
        {
417 321 mdecorde
                //get occurences ordonnées de la gold
418 321 mdecorde
                        //get occurences ordonnées de la projection
419 321 mdecorde
                        //compar
420 321 mdecorde
                String proj = "model_"+text+"_target_"+target+"_mode"+3+"t"
421 321 mdecorde
                println("comp "+proj+" VS lexbrut_"+target);
422 321 mdecorde
                String query = "SELECT count(*) FROM ((SELECT n,form,cat FROM lexbrut_"+target+" ) MINUS (SELECT n,form,cat FROM "+proj+"))"
423 321 mdecorde
                //HSQLFunctions.executeQuery(query);
424 321 mdecorde
                //println(query)
425 321 mdecorde
                int ttotal=0;
426 321 mdecorde
                int total ;
427 321 mdecorde
                HSQLFunctions.getGroovySql().eachRow(query) {
428 321 mdecorde
                        def dif = it.getAt(0);
429 321 mdecorde
                        /*if((""+initiales[id]) != "Z")
430 321 mdecorde
                         {
431 321 mdecorde
                         total = nbmots[id+1] - nbmots[id];
432 321 mdecorde
                         ttotal += total;
433 321 mdecorde
                         }
434 321 mdecorde
                         else
435 321 mdecorde
                         total= ttotal;
436 321 mdecorde
                         */
437 321 mdecorde
438 321 mdecorde
                        //total = nbOccurencesTxt.get("lexbrut_"+initiales[id])
439 321 mdecorde
                        println("textindex : "+textindex);
440 321 mdecorde
                        total = Integer.parseInt(countOccForm[textindex]);
441 321 mdecorde
                        println("dif "+dif+"/ tot "+total+" = "+((float)dif/(float)total));
442 321 mdecorde
                        Float perf = ((float)dif/(float)total)*100f;
443 321 mdecorde
                        matrix.get(target).add( ""+perf ) ;
444 321 mdecorde
                }
445 321 mdecorde
                textindex++;
446 321 mdecorde
        }
447 321 mdecorde
448 321 mdecorde
}
449 321 mdecorde
450 321 mdecorde
println("Matrice d'erreur : ");
451 321 mdecorde
for(String k : matrix.keySet())
452 321 mdecorde
{
453 321 mdecorde
        print(k)
454 321 mdecorde
        for(String f : matrix.get(k))
455 321 mdecorde
                print("\t"+f)
456 321 mdecorde
        println()
457 321 mdecorde
}
458 321 mdecorde
println("richesse lexiques en Form : ");
459 321 mdecorde
for(String k : richesses.keySet())
460 321 mdecorde
{
461 321 mdecorde
        print(k)
462 321 mdecorde
        for(String f : richesses.get(k))
463 321 mdecorde
                print("\t"+f)
464 321 mdecorde
        println()
465 321 mdecorde
}
466 321 mdecorde
println("richesse lexiques en Cat : ");
467 321 mdecorde
for(String k : richessesCat.keySet())
468 321 mdecorde
{
469 321 mdecorde
        print(k)
470 321 mdecorde
        for(String f : richessesCat.get(k))
471 321 mdecorde
                print("\t"+f)
472 321 mdecorde
        println()
473 321 mdecorde
}
474 321 mdecorde
475 321 mdecorde
println("richesse occurance en Form : ");
476 321 mdecorde
for(String k : richessesocc.keySet())
477 321 mdecorde
{
478 321 mdecorde
        print(k)
479 321 mdecorde
        for(String f : richessesocc.get(k))
480 321 mdecorde
                print("\t"+f)
481 321 mdecorde
        println()
482 321 mdecorde
}
483 321 mdecorde
println("richesse occurance en Cat : ");
484 321 mdecorde
for(String k : richessesoccCat.keySet())
485 321 mdecorde
{
486 321 mdecorde
        print(k)
487 321 mdecorde
        for(String f : richessesoccCat.get(k))
488 321 mdecorde
                print("\t"+f)
489 321 mdecorde
        println()
490 321 mdecorde
}
491 321 mdecorde
492 321 mdecorde
println("RichBrutes        roland        artu        qjm        comm        jehpar        rgaqcj");
493 321 mdecorde
print("Focc")
494 321 mdecorde
for(int i=0;i < countOccForm.size() ; i++)
495 321 mdecorde
        print("\t"+countOccForm[i]);
496 321 mdecorde
print("\nCocc")
497 321 mdecorde
for(int i=0;i < countOccCat.size() ; i++)
498 321 mdecorde
        print("\t"+countOccCat[i]);
499 321 mdecorde
print("\nFvoc")
500 321 mdecorde
for(int i=0;i < countLexForm.size() ; i++)
501 321 mdecorde
        print("\t"+countLexForm[i]);
502 321 mdecorde
print("\nCvoc")
503 321 mdecorde
for(int i=0;i < countLexCat.size() ; i++)
504 321 mdecorde
        print("\t"+countLexCat[i]);
505 321 mdecorde
println()