Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / RGAQCJ / importer.groovy @ 187

History | View | Annotate | Download (7.6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.RGAQCJ
29

    
30
import org.txm.importer.*;
31
import org.txm.importer.filters.*;
32
import org.txm.scripts.*;
33
import org.txm.scripts.teitxm.*;
34
import org.txm.utils.treetagger.TreeTagger;
35
import javax.xml.stream.*;
36
import java.net.URL;
37
import java.util.Properties;
38

    
39
import filters.CutHeader.*;
40
import filters.Tokeniser.*;
41
import filters.FusionHeader.*;
42

    
43
import java.io.BufferedReader;
44
import java.io.FileInputStream;
45
import java.io.InputStreamReader;
46

    
47
import org.apache.commons.lang.StringUtils;
48
import org.txm.*;
49

    
50
// TODO: Auto-generated Javadoc
51
/**
52
 * The Class importer.
53
 */
54
class importer {
55
        
56
        /** The tokenize. */
57
        boolean tokenize = true;
58
        
59
        /**
60
         * Run.
61
         *
62
         * @param dir the dir
63
         * @param paramfile the paramfile
64
         * @param basename the basename
65
         * @return true, if successful
66
         */
67
        public boolean run(File dir, File paramfile, String basename)
68
        {
69
                if(!paramfile.exists())
70
                {
71
                        System.err.println("Parameter file does not exists: "+paramfile.getAbsolutePath());
72
                        return false;
73
                }
74
                Properties properties = new Properties();
75
                BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(paramfile) , "UTF-8"));
76
                String line = input.readLine();
77
                while(line != null)
78
                {
79
                        String[] split = line.split("="); //$NON-NLS-1$
80
                        if(split.length == 2)
81
                        {
82
                                properties.put(split[0], split[1]);
83
                        }
84
                        else if(split.length > 2)
85
                        {
86
                                String[] subsplit = new String[split.length -1 ]; 
87
                                System.arraycopy(split, 1, subsplit, 0, split.length-1);
88
                                properties.put(split[0], StringUtils.join(subsplit, "=")); //$NON-NLS-1$
89
                        }
90
                        line = input.readLine();
91
                }
92
                
93
                if(!(properties.containsKey("nbP") && properties.containsKey("nbTaxo") && properties.containsKey("nbResp")))
94
                {
95
                        System.err.println("Missing property: nbP or nbTaxo or nbResp");
96
                        return false;
97
                }
98
                int nbP = Integer.parseInt(properties.getProperty("nbP"));
99
                int nbTaxo = Integer.parseInt(properties.getProperty("nbTaxo"));
100
                int nbResp = Integer.parseInt(properties.getProperty("nbResp"));
101
                
102
                if(nbResp == 0)
103
                {
104
                        System.err.println("No resp ");
105
                        return false;
106
                }
107
                String rootDir = dir.getAbsolutePath()+"/";
108
                ArrayList<String> milestones = new ArrayList<String>();
109
                
110
                //where the binaries will be created
111
                File binDir = new File(Toolbox.getParam(Toolbox.USER_TXM_HOME),"corpora/"+basename);
112
                binDir.deleteDir();
113
                binDir.mkdir();
114
                
115
                new File(binDir,"txm").deleteDir();
116
                new File(binDir,"txm").mkdir();
117
                
118
                List<File> files = new File(rootDir,"").listFiles();
119
                
120
                //set working directory
121
                rootDir = binDir.getAbsolutePath()+"/";
122
                
123
                //Set import parameters
124
                def correspType = new HashMap<String,String>();
125
                // correspType(attribut word wlx, attribut type de la propriété ana du w txm)
126
                for(int p = 1 ; p <= nbP ; p++)
127
                {
128
                        correspType.put("p"+p, properties.get(properties.get("P"+p+"_taxo")));
129
                }
130
                
131
                def correspRef = new HashMap<String,String>()
132
                // correspRef (attribut word wlx, attribut ref de la propriété ana du w txm. ref pointe vers l'identifiant du respStmt du TEIheader)
133
                for(int p = 1 ; p <= nbP ; p++)
134
                {
135
                        String taxo=properties.get("P"+p+"_taxo")
136
                        String resp=properties.get(taxo+"_resp")
137
                        correspRef.put("p"+p,properties.get(resp));
138
                }
139
                
140
                //il faut lister les id de tous les respStmt
141
                def respId = [];
142
                for(int r = 1 ; r <= nbResp ; r++)
143
                        respId << properties.get("R"+r);
144
                
145
                //fait la correspondance entre le respId et le rapport d'execution de l'outil
146
                
147
                def applications = new HashMap<String,HashMap<String,String>>();        
148
                //fait la correspondance entre le respId et les attributs type de la propriété ana du w txm
149
                //pour construire les ref vers les taxonomies
150
                for(int r = 1 ; r <= nbResp ; r++)
151
                {
152
                        applications.put(properties.get("R"+r), new ArrayList<String>());
153
                        applications.get(properties.get("R"+r)).add(properties.get("R"+r+"_app_id"));//app ident
154
                        applications.get(properties.get("R"+r)).add(properties.get("R"+r+"_app_version"));//app version
155
                        applications.get(properties.get("R"+r)).add(properties.get("R"+r+"_app_reportfile"));//app report file path
156
                }
157

    
158
                
159
                def taxonomiesUtilisees = new HashMap<String,String[]>();
160
                //associe un id d'item avec sa description et son URI
161
                for(int t = 1 ; t <= nbTaxo ; t++)
162
                {
163
                        String resp_id = properties.get(properties.get("T"+t+"_resp"))
164
                        if(!taxonomiesUtilisees.containsKey(resp_id))
165
                                taxonomiesUtilisees.put(resp_id,[]);//,"lemma","lasla","grace"]);
166
                        taxonomiesUtilisees.get(resp_id) << properties.get("T"+t)
167
                }
168
                
169
                def itemsURI = new HashMap<String,HashMap<String,String>>();
170
                //informations de respStmt
171
                //resps (respId <voir ci-dessus>, [description, person, date])
172
                for(int t = 1 ; t <= nbTaxo ; t++)
173
                {
174
                        String taxo = properties.get("T"+t);
175
                        itemsURI.put(taxo,new HashMap<String,String>());
176
                        itemsURI.get(taxo).put("tagset",properties.get("T"+t+"_tagset"));
177
                        itemsURI.get(taxo).put("website",properties.get("T"+t+"_web"));
178
                }
179
                def resps = new HashMap<String,String[]>();
180
                for(int r = 1 ; r <= nbResp ; r++)
181
                {
182
                        resps.put(properties.get("R"+r), [properties.get("R"+r+"_desc"),properties.get("R"+r+"_who"),properties.get("R"+r+"_when"),properties.get("R"+r+"_day")])
183
                }
184
                
185
                println("Weblex import parameters : ")
186
                println("resps id "+respId);
187
                println("resps infos"+resps);
188
                println("applications "+applications);
189
                
190
                println("correspType "+correspType)
191
                println("correspRef "+correspRef)
192
                
193
                println("taxonomiesUtilisees "+taxonomiesUtilisees)
194
                println("itemsURI "+itemsURI)
195
                                
196
                //TRANSFORM INTO XML-TEI-TXM
197
                for(File f : files)
198
                {
199
                        //ArrayList<String> milestones = new ArrayList<String>();
200
                        File file = f; 
201
                        String txmfile = f.getName();
202
                        println("Building xml-tei-txm "+f+ " >> "+rootDir+"txm/"+txmfile)
203
                        
204
                        //lance le traitement
205
                        def builder3 = new Xml2Ana(file);
206
                        builder3.setCorrespondances(correspRef, correspType);
207
                        builder3.setHeaderInfos(respId,resps, applications, taxonomiesUtilisees, itemsURI)
208
                        builder3.transformFile(rootDir+"txm/",txmfile);
209
                }
210
                return true;
211
        }
212
        
213
        /**
214
         * The main method.
215
         *
216
         * @param args the arguments
217
         */
218
        public static void main(String[] args)
219
        {
220
                File dir = new File("~/xml/bfm/")
221
                new importer().run(dir);
222
        }
223
}
224

    
225
/* PARAM FILE EXAMPLE
226
nbP=3
227
nbTaxo=2
228
nbResp=2
229

230
R1=init
231
R1_desc=initial taggin
232
R1_who=al
233
R1_when=2010
234
R1_day=Tue Mar  2 21:02:55 Paris, Madrid 2010
235
R1_app_id=appR1
236
R1_app_version=app1V
237
R1_app_reportfile=
238

239
R2=apinit
240
R2_desc=second taggin
241
R2_who=slh
242
R2_when=2010
243
R2_day=Tue Mar  2 21:02:55 Paris, Madrid 2010
244
R2_app_id=appR2
245
R2_app_version=app2V
246
R2_app_reportfile=
247

248
P1=truc1
249
P1_taxo=T1
250

251
P2=bidul2
252
P2_taxo=T1
253

254
P3=machin3
255
P3_taxo=T2
256

257
T1=CATTEX
258
T1_web=www.google.fr
259
T1_tagset=www.google.fr
260
T1_resp=R1
261

262
T2=TTFR
263
T2_web=www.bing.fr
264
T2_tagset=www.bing.fr
265
T2_resp=R2 
266
*/