Révision 3364

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/export/ExportTextContentMacro.groovy (revision 3364)
1
// Copyright © 2020 ENS de Lyon, CNRS, University of Franche-Comté
1
// Copyright © 2020-2022 ENS de Lyon, CNRS, University of Franche-Comté
2 2
// @author mdecorde
3
// @author sheiden
3 4

  
4 5
// STANDARD DECLARATIONS
5 6
package org.txm.macro.export
......
21 22

  
22 23
// PARAMETERS
23 24

  
24
@Field @Option(name="exportDirectory", usage="results directory", widget="Folder", required=true, def="export")
25
File exportDirectory
25
@Field @Option(name="outputDirectory", usage="results output directory", widget="Folder", required=true, def="")
26
File outputDirectory
26 27

  
27
@Field @Option(name="lineSeparatorStructureName", usage="name of the structure to use to produce the output lines", widget="String", required=false, def="p")
28
def lineSeparatorStructureName
28
@Field @Option(name="wordProperty", usage="word property to export", widget="String", required=true, def="word")
29
def wordProperty
29 30

  
31
@Field @Option(name="oneWordPerLine", usage="output one word per line", widget="Boolean", required=false, def="false")
32
def oneWordPerLine
33

  
34
@Field @Option(name="oneSentencePerLine", usage="output one sentence per line", widget="Boolean", required=false, def="true")
35
def oneSentencePerLine
36

  
37
@Field @Option(name="sentenceStructureName", usage="name of the structure encoding sentences", widget="String", required=false, def="")
38
def sentenceStructureName
39

  
30 40
if (!ParametersDialog.open(this)) return
31 41

  
32 42
// BEGINNING
33 43

  
34
if (!exportDirectory.exists()) exportDirectory.mkdirs()
44
if (!outputDirectory.exists()) outputDirectory.mkdirs()
35 45

  
36 46
def corpus = corpusViewSelection
47
def corpusName = corpus.getName()
37 48
def CQI = CQPSearchEngine.getCqiClient()
38 49

  
39
def lineSeparatorStructure = corpus.getStructuralUnit(lineSeparatorStructureName)
50
if (wordProperty == null || !(wordProperty.length() > 0)) {
51
	println "** Please provide a word property name in parameter 'wordProperty', for example 'word'. Aborting..."
52
	return 1
53
}
40 54

  
41
if (lineSeparatorStructure == null) {
42
	println "** The $lineSeparatorStructureName structure does not exist in the $corpus corpus"
43
	return
55
if (oneSentencePerLine && (sentenceStructureName == null || !(sentenceStructureName.length() > 0))) {
56
	println "** Please provide a name for the structure encoding sentences in parameter 'sentenceStructureName', or uncheck parameter 'oneSentencePerLine'. Aborting..."
57
	return 1
44 58
}
45 59

  
46
def breaks_pos = Arrays.asList(corpus.query(new CQLQuery("[]</"+lineSeparatorStructureName+">"),"test", false).getEnds())
47
println "End of structure positions = "+breaks_pos
60
if (oneSentencePerLine) {
48 61

  
49
println "Exporting $corpus text content to $exportDirectory..."
62
	lineSeparatorStructure = corpus.getStructuralUnit(sentenceStructureName)
50 63

  
51
def wordProperty = corpus.getWordProperty()
64
	if (lineSeparatorStructure == null) {
65
		println "** No $sentenceStructureName structure in the $corpus corpus. Aborting..."
66
		return 1
67
	}
68
	
69
	breaks_pos = Arrays.asList(corpus.query(new CQLQuery("[]</"+sentenceStructureName+">"),"test", false).getEnds())
70
}
71

  
72
println "Exporting $corpus text content to $outputDirectory..."
73

  
74
def wordPropertyI = corpus.getProperty(wordProperty)
75

  
76
if (wordPropertyI == null) {
77
	println "** No '$wordProperty' word property in the $corpus corpus. Aborting..."
78
	return 1
79
}
80

  
52 81
def textidProperty = corpus.getStructuralUnit("text").getProperty("id")
53 82
def textStartBoundaries = corpus.getTextStartLimits()
54 83
def textEndBoundaries = corpus.getTextEndLimits()
55 84
int[] struct_pos = CQI.cpos2Struc(textidProperty.getQualifiedName(), textStartBoundaries)
56 85
String[] textids =  CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
86

  
57 87
if (textStartBoundaries.size() == 1) {
58 88
	println "1 text"
59 89
	} else {
......
64 94
	int start = textStartBoundaries[i]
65 95
	int end = textEndBoundaries[i]
66 96

  
67
	File txtFile = new File(exportDirectory, textids[i]+".txt")
97
	File txtFile = new File(outputDirectory, textids[i]+".txt")
68 98
	print "."
69 99
	def writer = txtFile.newWriter("UTF-8")
70 100
	int[] positions = new int[end - start + 1]
......
72 102
	for (int p : start..end) {
73 103
		positions[c++] = p
74 104
	}
75
	int[] idx = CQI.cpos2Id(wordProperty.getQualifiedName(), positions)
76
	def words = CQI.id2Str(wordProperty.getQualifiedName(), idx)
105
	int[] idx = CQI.cpos2Id(wordPropertyI.getQualifiedName(), positions)
106
	def words = CQI.id2Str(wordPropertyI.getQualifiedName(), idx)
77 107
	def tmp = []
78 108
	for (int j = 0 ; j < positions.length ; j++) {
79 109
		int p = positions[j]
80 110
		tmp << words[j]
81
		if (breaks_pos.contains(p)) {
82
			writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
111
		if (oneSentencePerLine && breaks_pos.contains(p)) {
112
			if (oneWordPerLine) {
113
				tmp.each { word -> writer.println word }
114
			} else {
115
				writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
116
			}
83 117
			tmp = []
84 118
		} 
85 119
	}
86 120
	if (tmp.size() > 0) {
87
		writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
121
		if (oneWordPerLine) {
122
			tmp.each { word -> writer.println word }
123
		} else {
124
			writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
125
		}
88 126
	} 
89 127
	writer.close()
90 128
}  
91 129

  
92
println "\nDone, result saved in "+exportDirectory.getAbsolutePath()
130
println "\nDone, result saved in "+outputDirectory.getAbsolutePath()

Formats disponibles : Unified diff