Révision 2358

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/FixINAXML.groovy (revision 2358)
1 1
import org.txm.utils.ConsoleProgressBar
2 2

  
3
def directory = new File("/home/mdecorde/TEMP/ANTRACT/AF/trs")
3
def directory = new File("C:\\Users\\mdecorde\\xml\\trs")
4 4
println "Fixing $directory"
5 5
def files = directory.listFiles()
6 6
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2358)
118 118
}
119 119

  
120 120
final HashMap<String, String> textordersInfo = new HashMap<String, String>();
121
for (String t : metadatas.keySet()) {
122
	def ti = metadatas.get(t)
123
	for (org.txm.metadatas.Entry e : ti) {
124
		if ("textorder".equals(e.getId())) {
125
			textordersInfo[t+".trs"] = ti.value()
121
if (metadatas != null) {
122
	for (String t : metadatas.keySet()) {
123
		def ti = metadatas.get(t)
124
		for (org.txm.metadatas.Entry e : ti) {
125
			if ("textorder".equals(e.getId())) {
126
				String k = ""+t+".xml" // the sort test will use the xml-txm file names
127
				textordersInfo[k] = e.value
128
			}
126 129
		}
127 130
	}
128
}
129

  
131
}
130 132
File propertyFile = new File(srcDir, "import.properties")//default
131 133
Properties props = new Properties();
132 134
String[] metadatasToKeep;
......
134 136
	FileInputStream input = new FileInputStream(propertyFile);
135 137
	props.load(input);
136 138
	input.close();
137
	
139

  
138 140
	if (props.getProperty("removeInterviewer") != null)
139 141
		removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString());
140 142
	if (props.getProperty("ignoreTranscriberMetadata") != null)
......
145 147
		csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|");
146 148
	//if (props.getProperty("includeComments") != null)
147 149
	//	includeComments = props.get("includeComments").toString();
148
	
150

  
149 151
	println "import properties: "
150 152
	println " removeInterviewer: "+removeInterviewer
151 153
	println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata
......
168 170
				srcDir = new File(binDir, "src");
169 171
			println ""
170 172
		}
171
		
173

  
172 174
		// select only trs files
173 175
		String ext = "trs";
174 176
		ArrayList<File> trsfiles = srcDir.listFiles(); //find all trs files
......
183 185
				i--;
184 186
			}
185 187
		}
186
		
188

  
187 189
		if (trsfiles.size() == 0) {
188 190
			println ("No transcription file (*.trs) found in "+srcDir.getAbsolutePath()+". Aborting.")
189 191
			return false;
190 192
		}
191
		
193

  
192 194
		if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
193 195
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
194 196
		println "-- IMPORTER"
......
198 200
			return;
199 201
		}
200 202
		if (MONITOR != null) MONITOR.worked(20)
201
		
203

  
202 204
		println "-- Xml Validation"
203 205
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
204 206
		for (File infile : txmDir.listFiles()) {
......
207 209
				infile.delete();
208 210
			}
209 211
		}
210
		
212

  
211 213
		if (MONITOR != null) MONITOR.worked(5)
212 214
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
213 215
		println "-- Remove interviewer: "+removeInterviewer
......
221 223
					int idx = filename.indexOf(".xml");
222 224
					if (idx > 0)
223 225
						filename = filename.substring(0, idx);
224
					
226

  
225 227
					ArrayList<Pair<String, String>> metas = metadatas.get(filename)
226 228
					//println "filename=$filename metas= $metas"
227 229
					for (Pair p : metas) {
......
232 234
				}
233 235
			}
234 236
		}
235
		
237

  
236 238
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
237 239
		if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
238
		
240

  
239 241
		boolean annotationSuccess = false;
240 242
		if (annotate) {
241 243
			println "-- ANNOTATE - Running NLP tools"
......
244 246
				annotationSuccess = true;
245 247
			}
246 248
		}
247
	}
248
	trsfiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
249
	} // end of importer and annotate steps
249 250
	
251
	xmltxmFiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
250 252
	if (metadatas != null && metadatas.getPropertyNames().contains("textorder")) {
251
		Collections.sort(trsfiles, new Comparator<File>() {
253
		Collections.sort(xmltxmFiles, new Comparator<File>() {
252 254
					public int compare(File f1, File f2) {
253
						String o1 = textorder[f1.getName()];
254
						String o2 = textorder[f2.getName()];
255
						String o1 = textordersInfo[f1.getName()];
256
						String o2 = textordersInfo[f2.getName()];
255 257
						if (o1 == null && o2 == null) {
256 258
							return f1.compareTo(f2);
257 259
						} else if (o1 == null) {
......
266 268
					}
267 269
				});
268 270
	} else {
269
		Collections.sort(trsfiles);
271
		Collections.sort(xmltxmFiles);
270 272
	}
271
	
273

  
272 274
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
273 275
	if (MONITOR != null) MONITOR.worked(25, "COMPILING")
274 276
	println "--COMPILING - Building Search Engine indexes"
275
	
277

  
276 278
	def comp = new compiler()
277 279
	if(debug) comp.setDebug();
278 280
	comp.removeInterviewers(removeInterviewer);
279 281
	comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata);
280
	if (!comp.run(project, trsfiles, corpusname, "default", binDir)) {
282
	if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
281 283
		println "Failed to compile files";
282 284
		return;
283 285
	}
284
	
286

  
285 287
	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
286
	
288

  
287 289
	File htmlDir = new File(binDir,"HTML/$corpusname");
288 290
	htmlDir.deleteDir()
289 291
	htmlDir.mkdirs();
290 292
	if (build_edition) {
291
		
293

  
292 294
		if (MONITOR != null) MONITOR.worked(20, "EDITION")
293 295
		println "-- EDITION - Building editions"
294
		
296

  
295 297
		def second = 0
296
		
297
		println "Paginating "+trsfiles.size()+" texts"
298
		ConsoleProgressBar cpb = new ConsoleProgressBar(trsfiles.size());
299
		for (File txmFile : trsfiles) {
298

  
299
		println "Paginating "+xmltxmFiles.size()+" texts"
300
		ConsoleProgressBar cpb = new ConsoleProgressBar(xmltxmFiles.size());
301
		for (File txmFile : xmltxmFiles) {
300 302
			cpb.tick()
301 303
			String txtname = txmFile.getName();
302 304
			int i = txtname.lastIndexOf(".");
303 305
			if(i > 0) txtname = txtname.substring(0, i);
304
			
306

  
305 307
			List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
306 308
			List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
307
			
309

  
308 310
			Text t = project.getText(txtname)
309 311
			if (t == null) {
310
				new Text(project);
312
				t = new Text(project);
311 313
				t.setName(txtname);
312
				t.setSourceFile(txmFile)
313
				t.setTXMFile(txmFile)
314
			}
315
			t.setSourceFile(txmFile)
316
			t.setTXMFile(txmFile)
317
			
318
			Edition edition = t.getEdition("default")
319
			if (edition != null) {
320
				edition.delete();
321
				edition = null;
314 322
			}
315
			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
316
			Edition edition = new Edition(t);
323
			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
324
			edition = t.getEdition("default")
325
			edition = new Edition(t);
317 326
			edition.setName("default");
327
			
318 328
			edition.setIndex(htmlDir.getAbsolutePath());
319 329
			for (i = 0 ; i < ed.getPageFiles().size();) {
320 330
				File f = ed.getPageFiles().get(i);
......
324 334
			}
325 335
		}
326 336
		cpb.done()
327
		
337

  
328 338
		//copy transcriber.css
329 339
		File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css")
330 340
		File cssTXMFile = new File(Toolbox.getTxmHomePath(), "css/txm.css")
......
333 343
			FileCopy.copy(cssfile, new File(htmlDir, "default/txm.css"));
334 344
			FileCopy.copy(cssfile, new File(htmlDir, "default/transcriber.css"));
335 345
		}
336
		
346

  
337 347
		//copy media files
338
		println "Copying media files if any (mp3, wav, mp4 or avi) "+trsfiles.size()+" texts"
339
		cpb = new ConsoleProgressBar(trsfiles.size());
340
		for (File txmFile : trsfiles) {
348
		println "Copying media files if any (mp3, wav, mp4 or avi) "+xmltxmFiles.size()+" texts"
349
		cpb = new ConsoleProgressBar(xmltxmFiles.size());
350
		for (File txmFile : xmltxmFiles) {
341 351
			cpb.tick()
342 352
			String txtname = txmFile.getName();
343 353
			int i = txtname.lastIndexOf(".");
......
346 356
			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
347 357
			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
348 358
			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
349
			
359

  
350 360
			if (mediaFile.exists()) {
351 361
				File copy = new File(binDir, "media/"+mediaFile.getName())
352 362
				copy.getParentFile().mkdirs()
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2358)
118 118
	 * @return true, if successful
119 119
	 */
120 120
	public boolean run(Project project, List<File> xmlfiles, String corpusname, String projectname, File binDir) {
121
		Collections.sort(xmlfiles);
121
		
122 122
		//println "run compiler with $xmlfiles, $basename and $outdir"
123 123
		this.outdir = binDir;
124 124
		this.corpusname = corpusname;
......
144 144
		corpus.setDescription("Built with the XML-TRS import module");
145 145
		
146 146
		cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
147
		cqpFile.delete()
147 148
		new File(binDir,"cqp").mkdirs()
148 149
		new File(binDir,"data").mkdirs()
149 150
		new File(binDir,"registry").mkdirs()
......
157 158
		output.write("<txmcorpus lang=\"fr\">\n")
158 159
		output.close();
159 160

  
161
		println "TEXTS="+xmlfiles
162
		
160 163
		println("Compiling "+xmlfiles.size()+" files")
161 164
		ConsoleProgressBar cpb = new ConsoleProgressBar(xmlfiles.size())
162 165
		for (File txmFile :xmlfiles) {

Formats disponibles : Unified diff