/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 3364

     // Copyright © 2020 ENS de Lyon, CNRS, University of Franche-Comté
     // Copyright © 2020-2022 ENS de Lyon, CNRS, University of Franche-Comté
     // @author mdecorde
     // @author sheiden
     // STANDARD DECLARATIONS
     package org.txm.macro.export
-...
     // PARAMETERS
     @Field @Option(name="exportDirectory", usage="results directory", widget="Folder", required=true, def="export")
     File exportDirectory
     @Field @Option(name="outputDirectory", usage="results output directory", widget="Folder", required=true, def="")
     File outputDirectory
     @Field @Option(name="lineSeparatorStructureName", usage="name of the structure to use to produce the output lines", widget="String", required=false, def="p")
     def lineSeparatorStructureName
     @Field @Option(name="wordProperty", usage="word property to export", widget="String", required=true, def="word")
     def wordProperty
     @Field @Option(name="oneWordPerLine", usage="output one word per line", widget="Boolean", required=false, def="false")
     def oneWordPerLine
     @Field @Option(name="oneSentencePerLine", usage="output one sentence per line", widget="Boolean", required=false, def="true")
     def oneSentencePerLine
     @Field @Option(name="sentenceStructureName", usage="name of the structure encoding sentences", widget="String", required=false, def="")
     def sentenceStructureName
     if (!ParametersDialog.open(this)) return
     // BEGINNING
     if (!exportDirectory.exists()) exportDirectory.mkdirs()
     if (!outputDirectory.exists()) outputDirectory.mkdirs()
     def corpus = corpusViewSelection
     def corpusName = corpus.getName()
     def CQI = CQPSearchEngine.getCqiClient()
     def lineSeparatorStructure = corpus.getStructuralUnit(lineSeparatorStructureName)
     if (wordProperty == null || !(wordProperty.length() > 0)) {
     	println "** Please provide a word property name in parameter 'wordProperty', for example 'word'. Aborting..."
     	return 1
+    }
     if (lineSeparatorStructure == null) {
     	println "** The $lineSeparatorStructureName structure does not exist in the $corpus corpus"
     	return
     if (oneSentencePerLine && (sentenceStructureName == null || !(sentenceStructureName.length() > 0))) {
     	println "** Please provide a name for the structure encoding sentences in parameter 'sentenceStructureName', or uncheck parameter 'oneSentencePerLine'. Aborting..."
     	return 1
+    }
     def breaks_pos = Arrays.asList(corpus.query(new CQLQuery("[]</"+lineSeparatorStructureName+">"),"test", false).getEnds())
     println "End of structure positions = "+breaks_pos
     if (oneSentencePerLine) {
     println "Exporting $corpus text content to $exportDirectory..."
     	lineSeparatorStructure = corpus.getStructuralUnit(sentenceStructureName)
     def wordProperty = corpus.getWordProperty()
     	if (lineSeparatorStructure == null) {
     		println "** No $sentenceStructureName structure in the $corpus corpus. Aborting..."
     		return 1
+    	}
     	breaks_pos = Arrays.asList(corpus.query(new CQLQuery("[]</"+sentenceStructureName+">"),"test", false).getEnds())
+    }
     println "Exporting $corpus text content to $outputDirectory..."
     def wordPropertyI = corpus.getProperty(wordProperty)
     if (wordPropertyI == null) {
     	println "** No '$wordProperty' word property in the $corpus corpus. Aborting..."
     	return 1
+    }
     def textidProperty = corpus.getStructuralUnit("text").getProperty("id")
     def textStartBoundaries = corpus.getTextStartLimits()
     def textEndBoundaries = corpus.getTextEndLimits()
     int[] struct_pos = CQI.cpos2Struc(textidProperty.getQualifiedName(), textStartBoundaries)
     String[] textids =  CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
     if (textStartBoundaries.size() == 1) {
     	println "1 text"
     	} else {
-...
     	int start = textStartBoundaries[i]
     	int end = textEndBoundaries[i]
     	File txtFile = new File(exportDirectory, textids[i]+".txt")
     	File txtFile = new File(outputDirectory, textids[i]+".txt")
     	print "."
     	def writer = txtFile.newWriter("UTF-8")
     	int[] positions = new int[end - start + 1]
-...
     	for (int p : start..end) {
     		positions[c++] = p
+    	}
     	int[] idx = CQI.cpos2Id(wordProperty.getQualifiedName(), positions)
     	def words = CQI.id2Str(wordProperty.getQualifiedName(), idx)
     	int[] idx = CQI.cpos2Id(wordPropertyI.getQualifiedName(), positions)
     	def words = CQI.id2Str(wordPropertyI.getQualifiedName(), idx)
     	def tmp = []
     	for (int j = 0 ; j < positions.length ; j++) {
     		int p = positions[j]
     		tmp << words[j]
     		if (breaks_pos.contains(p)) {
     			writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
     		if (oneSentencePerLine && breaks_pos.contains(p)) {
     			if (oneWordPerLine) {
     				tmp.each { word -> writer.println word }
     			} else {
     				writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
+    			}
     			tmp = []
+    		}
+    	}
     	if (tmp.size() > 0) {
     		writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
     		if (oneWordPerLine) {
     			tmp.each { word -> writer.println word }
     		} else {
     			writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
+    		}
+    	}
     	writer.close()
+    }
     println "\nDone, result saved in "+exportDirectory.getAbsolutePath()
     println "\nDone, result saved in "+outputDirectory.getAbsolutePath()

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 3364