Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / fleurs / importer.groovy @ 479

History | View | Annotate | Download (7.1 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.fleurs
29

    
30
import org.txm.importer.*;
31
import org.txm.importer.filters.*;
32
import org.txm.scripts.*;
33
import org.txm.scripts.teitxm.BuildTTSrc;
34
import org.txm.scripts.teitxm.*;
35
import javax.xml.stream.*;
36
import java.io.BufferedWriter;
37
import java.io.FileOutputStream;
38
import java.io.OutputStreamWriter;
39
import java.io.PrintStream;
40
import java.net.URL;
41
import java.util.List;
42

    
43
import org.txm.*;
44

    
45
// TODO: Auto-generated Javadoc
46
/**
47
 * The Class importer.
48
 */
49
class importer 
50
{
51
        
52
        /**
53
         * Run.
54
         *
55
         * @param dir the dir
56
         * @param basename the basename
57
         */
58
        public void run(File dir, String basename)
59
        {
60
                String rootDir = dir.getAbsolutePath()+"/"
61
                //cleaning
62
                File binDir = new File(Toolbox.getParam(Toolbox.USER_TXM_HOME),"corpora/"+basename);
63
                binDir.deleteDir();
64
                binDir.mkdir();
65
                new File(binDir,"txm").deleteDir();
66
                new File(binDir,"txm").mkdir();
67
                
68
                File srcfile = new File(rootDir).listFiles()[0];
69
                File resultfile = new File(binDir,"txm/"+srcfile.getName());
70
                
71
                this.process(srcfile, resultfile);                
72
        }
73
        
74
        /**
75
         * Process.
76
         *
77
         * @param srcfile the srcfile
78
         * @param resultfile the resultfile
79
         */
80
        public void process(File srcfile, File resultfile)
81
        {
82
                
83
                String localname= "";
84
                URL url = srcfile.toURI().toURL();
85
                def inputData = url.openStream();
86
                def factory = XMLInputFactory.newInstance();
87
                XMLStreamReader parser = factory.createXMLStreamReader(inputData);
88
                OutputStreamWriter output = new OutputStreamWriter(new FileOutputStream(resultfile) , "UTF-8");
89
                
90
                println resultfile.getAbsolutePath()
91
                
92
                String poeme_titre ="";
93
                String poeme_type ="";
94
                String poeme_genre ="";
95
                String poeme_section ="";
96
                String poeme_tranchechrono ="";
97
                String poeme_date ="";
98
                String poeme_annee ="";
99
                String poeme_titreabr ="";
100
                String poeme_preorig ="";
101
                String poeme_schema1 ="";
102
                String poeme_schema2 ="";
103
                
104
                int wordid = 1;
105
                
106
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) 
107
                {
108
                        switch (event) {
109
                                case XMLStreamConstants.START_ELEMENT:
110
                                        localname = parser.getLocalName();
111
                                        switch (localname) {
112
                                                case "M"://f, l, x test si null
113
                                                case "MC":
114
                                                case "PC":
115
                                                        String x = parser.getAttributeValue(null,"x");
116
                                                        String z = parser.getAttributeValue(null,"z");
117
                                                        String l = parser.getAttributeValue(null,"l");
118
                                                        String f = parser.getAttributeValue(null,"f");
119
                                                        String pos = "none";
120
                                                        String lemme = "none";
121
                                                        if(l != null)
122
                                                        {
123
                                                                String[] split = l.split("_");
124
                                                                if(split.length == 2)
125
                                                                {
126
                                                                        pos = split[1];
127
                                                                        lemme = split[0];
128
                                                                }
129
                                                        }
130
                                                
131
                                                        if(x == null)
132
                                                                x = "n/a";
133
                                                        if(z == null)
134
                                                                z = "1";
135
                                                        output.write("<w id=\"w_"+(wordid++)+"\" l=\""+l+"\" lemme=\""+lemme+"\" pos=\""+pos+"\" x=\""+x+"\" z=\""+z+"\">"+f+"</w>\n")
136
                                                        break;
137
                                                
138
                                                case "RECUEIL":
139
                                                        output.write("<recueil>\n");
140
                                                
141
                                                        break;
142
                                                case "POEME":
143
                                                        poeme_titre = parser.getAttributeValue(null,"TITRE");
144
                                                        poeme_type = parser.getAttributeValue(null,"TYPE");
145
                                                        poeme_genre = parser.getAttributeValue(null,"GENRE");
146
                                                        poeme_section = parser.getAttributeValue(null,"SECTION");
147
                                                        poeme_tranchechrono = parser.getAttributeValue(null,"TRANCHE_CHRONO");
148
                                                        poeme_date = parser.getAttributeValue(null,"DATE");
149
                                                        poeme_annee = parser.getAttributeValue(null,"ANNEE");
150
                                                        poeme_titreabr = parser.getAttributeValue(null,"TITRE_ABR");
151
                                                        poeme_preorig = parser.getAttributeValue(null,"PREORIG");
152
                                                        poeme_schema1 = parser.getAttributeValue(null,"SCHEMA1");
153
                                                        poeme_schema2 = parser.getAttributeValue(null,"SCHEMA2");
154
                                                        break;        
155
                                                case "ASTX_ATTR":
156
                                                        output.write("<poeme");
157
                                                        output.write(" titre=\""+poeme_titre+"\"");
158
                                                        if(poeme_type == "")
159
                                                                output.write(" type=\"v\"");
160
                                                        else
161
                                                                output.write(" type=\""+poeme_type+"\"");
162
                                                        if(poeme_genre == "")
163
                                                                output.write(" genre=\"v\"");
164
                                                        else
165
                                                                output.write(" genre=\""+poeme_genre+"\"");
166
                                                        output.write(" section=\""+poeme_section+"\"");
167
                                                        output.write(" tranche_chrono=\""+poeme_tranchechrono+"\"");
168
                                                        output.write(" date=\""+poeme_date+"\"");
169
                                                        output.write(" annee=\""+poeme_annee+"\"");
170
                                                        output.write(" titre_abr=\""+poeme_titreabr+"\"");
171
                                                        output.write(" preorig=\""+poeme_preorig+"\"");
172
                                                        output.write(" schema1=\""+poeme_schema1+"\"");
173
                                                        output.write(" schema2=\""+poeme_schema2+"\"");
174
                                                        output.write(" n=\""+poeme_schema2+"\"");
175
                                                        output.write(">\n");
176
                                                        break;
177
                                                
178
                                                case "HUITAIN":
179
                                                case "SIZAIN":
180
                                                        output.write("<div type=\""+localname.toLowerCase()+"\"");
181
                                                        for(int i =0; i < parser.getAttributeCount() ; i++)
182
                                                                output.write(" "+parser.getAttributeLocalName(i).toLowerCase()+"=\""+parser.getAttributeValue(i)+"\"")
183
                                                        output.write(">\n");
184
                                                        break;
185
                                                case "STROPHE":
186
                                                case "VERS":
187
                                                        output.write("<"+localname.toLowerCase());
188
                                                        for(int i =0; i < parser.getAttributeCount() ; i++)
189
                                                                output.write(" "+parser.getAttributeLocalName(i).toLowerCase()+"=\""+parser.getAttributeValue(i)+"\"")
190
                                                        output.write(">\n");
191
                                                        break;
192
                                        }
193
                                        break;
194
                                case XMLStreamConstants.END_ELEMENT:
195
                                        localname = parser.getLocalName();
196
                                        switch (localname) {
197
                                                
198
                                                case "RECUEIL":
199
                                                        output.write("</recueil>\n");
200
                                                        break;
201
                                                
202
                                                case "ASTX_ATTR":
203
                                                        output.write("</poeme>\n");
204
                                                        break;
205
                                                
206
                                                case "HUITAIN":
207
                                                case "SIZAIN":
208
                                                        output.write("</div>\n");
209
                                                        break;
210
                                                case "STROPHE":
211
                                                case "VERS":
212
                                                        output.write("</"+localname.toLowerCase()+">\n");
213
                                                        break;
214
                                        }
215
                                        break;
216
                                case XMLStreamConstants.CHARACTERS:
217
                                //output.write(parser.getText().trim());                                
218
                                        break;
219
                        }
220
                }
221
                output.close();
222
                parser.close();
223
                inputData.close();
224
        }
225
        
226
        /**
227
         * The main method.
228
         *
229
         * @param args the arguments
230
         */
231
        public static void main(String[] args)
232
        {
233
                File dir = new File(System.getProperty("user.home"),"xml/fleurs/");
234
                String rootDir = dir.getAbsolutePath();
235
                File homedir = new File(rootDir);
236
                def imp = new importer();
237
                imp.run(homedir);
238
        }
239
}