Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / RGAQCJ / importer.groovy @ 1000

History | View | Annotate | Download (7.7 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.importer.RGAQCJ
29

    
30
import org.txm.scripts.importer.*;
31
import org.txm.importer.scripts.filters.*;
32
import org.txm.scripts.*;
33
import org.txm.importer.scripts.xmltxm.*;
34
import org.txm.utils.treetagger.TreeTagger;
35
import javax.xml.stream.*;
36
import java.net.URL;
37
import java.util.Properties;
38

    
39
import org.txm.scripts.filters.CutHeader.*;
40
import org.txm.scripts.filters.Tokeniser.*;
41
import org.txm.scripts.filters.FusionHeader.*;
42

    
43
import java.io.BufferedReader;
44
import java.io.FileInputStream;
45
import java.io.InputStreamReader;
46

    
47
import org.apache.commons.lang.StringUtils;
48
import org.txm.*;
49
import org.txm.core.engines.*;
50

    
51
// TODO: Auto-generated Javadoc
52
/**
53
 * The Class importer.
54
 */
55
class importer {
56
        
57
        /** The tokenize. */
58
        boolean tokenize = true;
59
        
60
        /**
61
         * Run.
62
         *
63
         * @param dir the dir
64
         * @param paramfile the paramfile
65
         * @param basename the basename
66
         * @return true, if successful
67
         */
68
        public boolean run(File dir, File paramfile, String basename)
69
        {
70
                if(!paramfile.exists())
71
                {
72
                        System.err.println("Parameter file does not exists: "+paramfile.getAbsolutePath());
73
                        return false;
74
                }
75
                Properties properties = new Properties();
76
                BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(paramfile) , "UTF-8"));
77
                String line = input.readLine();
78
                while(line != null)
79
                {
80
                        String[] split = line.split("="); //$NON-NLS-1$
81
                        if(split.length == 2)
82
                        {
83
                                properties.put(split[0], split[1]);
84
                        }
85
                        else if(split.length > 2)
86
                        {
87
                                String[] subsplit = new String[split.length -1 ]; 
88
                                System.arraycopy(split, 1, subsplit, 0, split.length-1);
89
                                properties.put(split[0], StringUtils.join(subsplit, "=")); //$NON-NLS-1$
90
                        }
91
                        line = input.readLine();
92
                }
93
                
94
                if(!(properties.containsKey("nbP") && properties.containsKey("nbTaxo") && properties.containsKey("nbResp")))
95
                {
96
                        System.err.println("Missing property: nbP or nbTaxo or nbResp");
97
                        return false;
98
                }
99
                int nbP = Integer.parseInt(properties.getProperty("nbP"));
100
                int nbTaxo = Integer.parseInt(properties.getProperty("nbTaxo"));
101
                int nbResp = Integer.parseInt(properties.getProperty("nbResp"));
102
                
103
                if(nbResp == 0)
104
                {
105
                        System.err.println("No resp ");
106
                        return false;
107
                }
108
                String rootDir = dir.getAbsolutePath()+"/";
109
                ArrayList<String> milestones = new ArrayList<String>();
110
                
111
                //where the binaries will be created
112
                File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename);
113
                binDir.deleteDir();
114
                binDir.mkdir();
115
                
116
                new File(binDir,"txm").deleteDir();
117
                new File(binDir,"txm").mkdir();
118
                
119
                List<File> files = new File(rootDir,"").listFiles();
120
                
121
                //set working directory
122
                rootDir = binDir.getAbsolutePath()+"/";
123
                
124
                //Set import parameters
125
                def correspType = new HashMap<String,String>();
126
                // correspType(attribut word wlx, attribut type de la propriété ana du w txm)
127
                for(int p = 1 ; p <= nbP ; p++)
128
                {
129
                        correspType.put("p"+p, properties.get(properties.get("P"+p+"_taxo")));
130
                }
131
                
132
                def correspRef = new HashMap<String,String>()
133
                // correspRef (attribut word wlx, attribut ref de la propriété ana du w txm. ref pointe vers l'identifiant du respStmt du TEIheader)
134
                for(int p = 1 ; p <= nbP ; p++)
135
                {
136
                        String taxo=properties.get("P"+p+"_taxo")
137
                        String resp=properties.get(taxo+"_resp")
138
                        correspRef.put("p"+p,properties.get(resp));
139
                }
140
                
141
                //il faut lister les id de tous les respStmt
142
                def respId = [];
143
                for(int r = 1 ; r <= nbResp ; r++)
144
                        respId << properties.get("R"+r);
145
                
146
                //fait la correspondance entre le respId et le rapport d'execution de l'outil
147
                
148
                def applications = new HashMap<String,HashMap<String,String>>();        
149
                //fait la correspondance entre le respId et les attributs type de la propriété ana du w txm
150
                //pour construire les ref vers les taxonomies
151
                for(int r = 1 ; r <= nbResp ; r++)
152
                {
153
                        applications.put(properties.get("R"+r), new ArrayList<String>());
154
                        applications.get(properties.get("R"+r)).add(properties.get("R"+r+"_app_id"));//app ident
155
                        applications.get(properties.get("R"+r)).add(properties.get("R"+r+"_app_version"));//app version
156
                        applications.get(properties.get("R"+r)).add(properties.get("R"+r+"_app_reportfile"));//app report file path
157
                }
158

    
159
                
160
                def taxonomiesUtilisees = new HashMap<String,String[]>();
161
                //associe un id d'item avec sa description et son URI
162
                for(int t = 1 ; t <= nbTaxo ; t++)
163
                {
164
                        String resp_id = properties.get(properties.get("T"+t+"_resp"))
165
                        if(!taxonomiesUtilisees.containsKey(resp_id))
166
                                taxonomiesUtilisees.put(resp_id,[]);//,"lemma","lasla","grace"]);
167
                        taxonomiesUtilisees.get(resp_id) << properties.get("T"+t)
168
                }
169
                
170
                def itemsURI = new HashMap<String,HashMap<String,String>>();
171
                //informations de respStmt
172
                //resps (respId <voir ci-dessus>, [description, person, date])
173
                for(int t = 1 ; t <= nbTaxo ; t++)
174
                {
175
                        String taxo = properties.get("T"+t);
176
                        itemsURI.put(taxo,new HashMap<String,String>());
177
                        itemsURI.get(taxo).put("tagset",properties.get("T"+t+"_tagset"));
178
                        itemsURI.get(taxo).put("website",properties.get("T"+t+"_web"));
179
                }
180
                def resps = new HashMap<String,String[]>();
181
                for(int r = 1 ; r <= nbResp ; r++)
182
                {
183
                        resps.put(properties.get("R"+r), [properties.get("R"+r+"_desc"),properties.get("R"+r+"_who"),properties.get("R"+r+"_when"),properties.get("R"+r+"_day")])
184
                }
185
                
186
                println("Weblex import parameters : ")
187
                println("resps id "+respId);
188
                println("resps infos"+resps);
189
                println("applications "+applications);
190
                
191
                println("correspType "+correspType)
192
                println("correspRef "+correspRef)
193
                
194
                println("taxonomiesUtilisees "+taxonomiesUtilisees)
195
                println("itemsURI "+itemsURI)
196
                                
197
                //TRANSFORM INTO XML-TEI-TXM
198
                for(File f : files)
199
                {
200
                        //ArrayList<String> milestones = new ArrayList<String>();
201
                        File file = f; 
202
                        String txmfile = f.getName();
203
                        println("Building xml-tei-txm "+f+ " >> "+rootDir+"txm/"+txmfile)
204
                        
205
                        //lance le traitement
206
                        def builder3 = new Xml2Ana(file);
207
                        builder3.setCorrespondances(correspRef, correspType);
208
                        builder3.setHeaderInfos(respId,resps, applications, taxonomiesUtilisees, itemsURI)
209
                        builder3.transformFile(rootDir+"txm/",txmfile);
210
                }
211
                return true;
212
        }
213
        
214
        /**
215
         * The main method.
216
         *
217
         * @param args the arguments
218
         */
219
        public static void main(String[] args)
220
        {
221
                File dir = new File("~/xml/bfm/")
222
                new importer().run(dir);
223
        }
224
}
225

    
226
/* PARAM FILE EXAMPLE
227
nbP=3
228
nbTaxo=2
229
nbResp=2
230

231
R1=init
232
R1_desc=initial taggin
233
R1_who=al
234
R1_when=2010
235
R1_day=Tue Mar  2 21:02:55 Paris, Madrid 2010
236
R1_app_id=appR1
237
R1_app_version=app1V
238
R1_app_reportfile=
239

240
R2=apinit
241
R2_desc=second taggin
242
R2_who=slh
243
R2_when=2010
244
R2_day=Tue Mar  2 21:02:55 Paris, Madrid 2010
245
R2_app_id=appR2
246
R2_app_version=app2V
247
R2_app_reportfile=
248

249
P1=truc1
250
P1_taxo=T1
251

252
P2=bidul2
253
P2_taxo=T1
254

255
P3=machin3
256
P3_taxo=T2
257

258
T1=CATTEX
259
T1_web=www.google.fr
260
T1_tagset=www.google.fr
261
T1_resp=R1
262

263
T2=TTFR
264
T2_web=www.bing.fr
265
T2_tagset=www.bing.fr
266
T2_resp=R2 
267
*/