/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 2358

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/FixINAXML.groovy (revision 2358)
1	1	import org.txm.utils.ConsoleProgressBar
2	2
3		def directory = new File("/home/mdecorde/TEMP/ANTRACT/AF/trs")
	3	def directory = new File("C:\\Users\\mdecorde\\xml\\trs")
4	4	println "Fixing $directory"
5	5	def files = directory.listFiles()
6	6	ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())

+    }
     final HashMap<String, String> textordersInfo = new HashMap<String, String>();
     for (String t : metadatas.keySet()) {
     	def ti = metadatas.get(t)
     	for (org.txm.metadatas.Entry e : ti) {
     		if ("textorder".equals(e.getId())) {
     			textordersInfo[t+".trs"] = ti.value()
     if (metadatas != null) {
     	for (String t : metadatas.keySet()) {
     		def ti = metadatas.get(t)
     		for (org.txm.metadatas.Entry e : ti) {
     			if ("textorder".equals(e.getId())) {
     				String k = ""+t+".xml" // the sort test will use the xml-txm file names
     				textordersInfo[k] = e.value
+    			}
+    		}
+    	}
+    }
+    }
     File propertyFile = new File(srcDir, "import.properties")//default
     Properties props = new Properties();
     String[] metadatasToKeep;
-...
     	FileInputStream input = new FileInputStream(propertyFile);
     	props.load(input);
     	input.close();
     	if (props.getProperty("removeInterviewer") != null)
     		removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString());
     	if (props.getProperty("ignoreTranscriberMetadata") != null)
-...
     		csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|");
     	//if (props.getProperty("includeComments") != null)
     	//	includeComments = props.get("includeComments").toString();
     	println "import properties: "
     	println " removeInterviewer: "+removeInterviewer
     	println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata
-...
     				srcDir = new File(binDir, "src");
     			println ""
+    		}
     		// select only trs files
     		String ext = "trs";
     		ArrayList<File> trsfiles = srcDir.listFiles(); //find all trs files
-...
     				i--;
+    			}
+    		}
     		if (trsfiles.size() == 0) {
     			println ("No transcription file (*.trs) found in "+srcDir.getAbsolutePath()+". Aborting.")
     			return false;
+    		}
     		if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
     		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
     		println "-- IMPORTER"
-...
     			return;
+    		}
     		if (MONITOR != null) MONITOR.worked(20)
     		println "-- Xml Validation"
     		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
     		for (File infile : txmDir.listFiles()) {
-...
     				infile.delete();
+    			}
+    		}
     		if (MONITOR != null) MONITOR.worked(5)
     		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
     		println "-- Remove interviewer: "+removeInterviewer
-...
     					int idx = filename.indexOf(".xml");
     					if (idx > 0)
     						filename = filename.substring(0, idx);
     					ArrayList<Pair<String, String>> metas = metadatas.get(filename)
     					//println "filename=$filename metas= $metas"
     					for (Pair p : metas) {
-...
+    				}
+    			}
+    		}
     		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
     		if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
     		boolean annotationSuccess = false;
     		if (annotate) {
     			println "-- ANNOTATE - Running NLP tools"
-...
     				annotationSuccess = true;
+    			}
+    		}
+    	}
     	trsfiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
     	} // end of importer and annotate steps
     	xmltxmFiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
     	if (metadatas != null && metadatas.getPropertyNames().contains("textorder")) {
     		Collections.sort(trsfiles, new Comparator<File>() {
     		Collections.sort(xmltxmFiles, new Comparator<File>() {
     					public int compare(File f1, File f2) {
     						String o1 = textorder[f1.getName()];
     						String o2 = textorder[f2.getName()];
     						String o1 = textordersInfo[f1.getName()];
     						String o2 = textordersInfo[f2.getName()];
     						if (o1 == null && o2 == null) {
     							return f1.compareTo(f2);
     						} else if (o1 == null) {
-...
+    					}
     				});
     	} else {
     		Collections.sort(trsfiles);
     		Collections.sort(xmltxmFiles);
+    	}
     	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
     	if (MONITOR != null) MONITOR.worked(25, "COMPILING")
     	println "--COMPILING - Building Search Engine indexes"
     	def comp = new compiler()
     	if(debug) comp.setDebug();
     	comp.removeInterviewers(removeInterviewer);
     	comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata);
     	if (!comp.run(project, trsfiles, corpusname, "default", binDir)) {
     	if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
     		println "Failed to compile files";
     		return;
+    	}
     	if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
     	File htmlDir = new File(binDir,"HTML/$corpusname");
     	htmlDir.deleteDir()
     	htmlDir.mkdirs();
     	if (build_edition) {
     		if (MONITOR != null) MONITOR.worked(20, "EDITION")
     		println "-- EDITION - Building editions"
     		def second = 0
     		println "Paginating "+trsfiles.size()+" texts"
     		ConsoleProgressBar cpb = new ConsoleProgressBar(trsfiles.size());
     		for (File txmFile : trsfiles) {
     		println "Paginating "+xmltxmFiles.size()+" texts"
     		ConsoleProgressBar cpb = new ConsoleProgressBar(xmltxmFiles.size());
     		for (File txmFile : xmltxmFiles) {
     			cpb.tick()
     			String txtname = txmFile.getName();
     			int i = txtname.lastIndexOf(".");
     			if(i > 0) txtname = txtname.substring(0, i);
     			List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
     			List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
     			Text t = project.getText(txtname)
     			if (t == null) {
     				new Text(project);
     				t = new Text(project);
     				t.setName(txtname);
     				t.setSourceFile(txmFile)
     				t.setTXMFile(txmFile)
+    			}
     			t.setSourceFile(txmFile)
     			t.setTXMFile(txmFile)
     			Edition edition = t.getEdition("default")
     			if (edition != null) {
     				edition.delete();
     				edition = null;
+    			}
     			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
     			Edition edition = new Edition(t);
     			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
     			edition = t.getEdition("default")
     			edition = new Edition(t);
     			edition.setName("default");
     			edition.setIndex(htmlDir.getAbsolutePath());
     			for (i = 0 ; i < ed.getPageFiles().size();) {
     				File f = ed.getPageFiles().get(i);
-...
+    			}
+    		}
     		cpb.done()
     		//copy transcriber.css
     		File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css")
     		File cssTXMFile = new File(Toolbox.getTxmHomePath(), "css/txm.css")
-...
     			FileCopy.copy(cssfile, new File(htmlDir, "default/txm.css"));
     			FileCopy.copy(cssfile, new File(htmlDir, "default/transcriber.css"));
+    		}
     		//copy media files
     		println "Copying media files if any (mp3, wav, mp4 or avi) "+trsfiles.size()+" texts"
     		cpb = new ConsoleProgressBar(trsfiles.size());
     		for (File txmFile : trsfiles) {
     		println "Copying media files if any (mp3, wav, mp4 or avi) "+xmltxmFiles.size()+" texts"
     		cpb = new ConsoleProgressBar(xmltxmFiles.size());
     		for (File txmFile : xmltxmFiles) {
     			cpb.tick()
     			String txtname = txmFile.getName();
     			int i = txtname.lastIndexOf(".");
-...
     			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
     			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
     			if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
     			if (mediaFile.exists()) {
     				File copy = new File(binDir, "media/"+mediaFile.getName())
     				copy.getParentFile().mkdirs()

     	 * @return true, if successful
     	 */
     	public boolean run(Project project, List<File> xmlfiles, String corpusname, String projectname, File binDir) {
     		Collections.sort(xmlfiles);
     		//println "run compiler with $xmlfiles, $basename and $outdir"
     		this.outdir = binDir;
     		this.corpusname = corpusname;
-...
     		corpus.setDescription("Built with the XML-TRS import module");
     		cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
     		cqpFile.delete()
     		new File(binDir,"cqp").mkdirs()
     		new File(binDir,"data").mkdirs()
     		new File(binDir,"registry").mkdirs()
-...
     		output.write("<txmcorpus lang=\"fr\">\n")
     		output.close();
     		println "TEXTS="+xmlfiles
     		println("Compiling "+xmlfiles.size()+" files")
     		ConsoleProgressBar cpb = new ConsoleProgressBar(xmlfiles.size())
     		for (File txmFile :xmlfiles) {

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 2358