/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 2792

     import java.io.File;
     import org.txm.*
     import org.txm.scripts.importer.SAttributesListener
     import org.txm.importer.SAttributesListener
     import org.txm.importer.cwb.*
     import org.txm.utils.ConsoleProgressBar
     import org.txm.utils.logger.Log
-...
     class XTZCompiler extends Compiler {
     	SAttributesListener sattrsListener; // store scanned structures
     	private def anatypes = new HashSet<String>() // store scanned word attributes
     	String regPath;
     	String corpusname;
-...
     		doNormalizeAnaValues = "true".equals(module.getProject().getPreferencesScope().getNode("import").get(TBXPreferences.NORMALISEANAVALUES, "false"))
+    	}
     	/**
     	 * the Text list
     	 */
     	def texts;
     	/**
     	 * the Text to process (dirty or newer than the cqp files) list
     	 */
     	def textsToProcess;
     	@Override
     	public void process(List<String> orderedTextIDs) {
     		super.process(orderedTextIDs); // set member
-...
     		if (orderedTextIDs == null) orderedTextIDs = module.getProject().getTextsID() ;
     		Project project = module.getProject();
     		texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
     		textsToProcess = getTextsToProcess(texts)
     		// get all structures
     		sattrsListener = new SAttributesListener() // will store the structure and properties declaration
     		sattrsListener.W = wtag
     //		File regFile = new File(regPath) // The properties recovery must be done using each Texts property declarations
     //		if (project.getDoUpdate() && regFile.exists() ) { // this optimisation must be done before clearing the corpus files
     //			println "Recovering structures&properties declaration from previous import registry file $regFile..."
     //			ReadRegistryFile rrf = new ReadRegistryFile(regFile);
     //
     //			sattrsListener.initialize(rrf.getPAttributes(), rrf.getSAttributesMap(), rrf.getSAttributesProfs())
     //
     //			System.out.println("	pAttributes: "+sattrsListener.getAnatypes());
     //			System.out.println("	sAttributes: "+sattrsListener.getStructs());
     //		}
     		CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
     		if (corpus != null) {
     			if (project.getDoUpdate()) {
-...
     	 * Scan all XML-TXM files to find out structures and word properties
     	 */
     	public boolean doScanStep() {
     		// get all anatypes
     		sattrsListener = SAttributesListener.scanFiles(inputDirectory, wtag)
     		def texts = module.getProject().getTexts()
     		println "-- Scanning structures&properties to create for "+texts.size()+" texts..."
     //		def initialTypesValues = new HashSet<String>()
     //		initialTypesValues.addAll(sattrsListener.getAnatypes())
     		// get all word properties
     		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
     		for (Text t : texts) {
     			try {
     				cpb.tick();
     				getAnaTypes(t.getXMLTXMFile())
     				sattrsListener.scanFile(t.getXMLTXMFile()); // results saved in 'listener' data
     				//				println "LISTENER RESULT with ${xmlFile.getName()}: "+listener
     				//				println " prof: "+listener.getStructs()
     				//				println " prof: "+listener.getProfs()
     				//				println " path: "+listener.structPath
     			} catch (Exception e) {
     				println "Error while processing $t text XML-TXM file : "+t.getSource()+". Error: "+e
     				e.printStackTrace();
     				return false;
+    			}
+    		}
     //		if (initialTypesValues != sattrsListener.getAnatypes()) { // the word properties changed all CQP files must be recreated
     //			textsToProcess.clear()
     //			textsToProcess.addAll(texts)
     //		}
     		println ""
     		return true;
+    	}
     	private void getAnaTypes(File xmlFile) {
     		def inputData = xmlFile.toURI().toURL().openStream();
     		def factory = XMLInputFactory.newInstance();
     		def parser = factory.createXMLStreamReader(inputData);
     		boolean start = false;
     		String ANA = "ana"
     		String TYPE = "type"
     		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
     			if (event == XMLStreamConstants.START_ELEMENT) { // start elem
     				if (wtag.equals(parser.getLocalName())) {
     					start = true;
     				} else if (start && ANA.equals(parser.getLocalName())) { // ana elem
     					for (int i = 0 ; i < parser.getAttributeCount(); i++) { // find @type
     						if (TYPE.equals(parser.getAttributeLocalName(i))) { // @type
     							anatypes.add(parser.getAttributeValue(i).substring(1)); //remove the #
     							break;
+    						}
+    					}
+    				}
     			} else if (event == XMLStreamConstants.END_ELEMENT) { // start elem
     				if (wtag.equals(parser.getLocalName())) {
     					start = false;
+    				}
+    			}
+    		}
     		if (parser != null) parser.close();
     		if (inputData != null) inputData.close();
     		//		for (String type : types)
     		//			if (!anatypes.contains(type))
     		//				anatypes << type
+    	}
     	def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
     	int cqpFilesUpdated = 0;
     	public boolean doCQPStep() {
     		cqpDirectory.mkdir(); // if not created
     		def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
     	def getTextsToProcess(def texts) {
     		def textsToProcess = texts.findAll() { text ->
     			File xmlFile = text.getXMLTXMFile()
     			String textname = text.getName()
-...
     			return true
+    		}
     		return textsToProcess
+    	}
     	def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
     	int cqpFilesUpdated = 0;
     	public boolean doCQPStep() {
     		cqpDirectory.mkdir(); // if not created
     		println "-- Building CQP files ${textsToProcess.size()}/${texts.size()}..."
     		ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
-...
     			cqpFilesUpdated++
     			XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", anatypes, wtag)
     			XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", sattrsListener.getAnatypes(), wtag)
     			step.setNormalizeAnaValues(doNormalizeAnaValues)
     			step.setNormalizeAttributeValues(doNormalizeAttributeValues)
     			if (!step.process()) {
-...
     		CwbEncode cwbEn = new CwbEncode()
     		cwbEn.setDebug(debug)
     		List<String> pargs = []
     		pargs.add("id")
     		for (String ana : anatypes) {
     		List<String> pargs = ["id"]
     		for (String ana : sattrsListener.getAnatypes()) {
     			if (ana == "word") continue; // no need to be added, cwb will declared it automatically
     			if (ana == "id") continue; // no need to be added, we did it already
     			pargs.add(ana)
+    		}
     		String[] pAttrs = pargs
     		def structs = sattrsListener.getStructs()
-...
     		List<String> sargs = new ArrayList<String>()
     		def tmpTextAttrs = []
     		for (String name : structs.keySet()) {
     			if (name == "txmcorpus") continue;
     			if (name == "text") {
     				for (String value : structs.get(name)) // append the attributes
     					tmpTextAttrs << value // added after
-...
     				return false;
+    			}
     			new File(regPath).delete()// ensure the registry file is deleted
     			if (!cwbEn.run(outputDirectory.getAbsolutePath() + "/$corpusname",
     				allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) {
     				println "** cwb-encode did not ends well. Activate finer logs to see details."
     				println "** cwb-encode did not ends well. Please activate a finer log level to see more details."
     				return false;
+    			}

     package org.txm.scripts.importer.xml;
     import java.util.ArrayList
     import java.util.Collections
     import org.txm.importer.SAttributesListener
     import org.txm.importer.cwb.BuildCwbEncodeArgs
     import org.txm.importer.cwb.CwbEncode
     import org.txm.importer.cwb.CwbMakeAll

     //
     package org.txm.scripts.importer.bfm
     import org.txm.Toolbox;
     import org.txm.Toolbox;
     import org.txm.importer.SAttributesListener
     import org.txm.importer.cwb.*
     import org.txm.scripts.importer.*;
     import org.txm.scripts.*;

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/compiler.groovy (revision 2792)
32	32	import org.txm.importer.cwb.CwbEncode
33	33	import org.txm.importer.cwb.CwbMakeAll
34	34	import org.txm.importer.cwb.PatchCwbRegistry;
	35	import org.txm.importer.SAttributesListener
35	36	import org.txm.scripts.importer.*;
36	37	import org.txm.scripts.*;
37	38	import org.txm.importer.scripts.xmltxm.*;

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 2792