Révision 3624

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/export/ExportTextsContentMacro.groovy (revision 3624)
16 16
import org.txm.rcp.swt.widget.parameters.*
17 17

  
18 18
if (!(corpusViewSelection instanceof CQPCorpus)) {
19
	println "Please select a corpus"
20
	return
21
}
19
	monitorShowError("Erreur : la sélection dans la vue Corpus n'est pas un corpus ou un sous-corpus ($corpusViewSelection).")
20
	return false
21
} 
22 22

  
23 23
// PARAMETERS
24 24

  
......
31 31
@Field @Option(name="oneWordPerLine", usage="output one word per line", widget="Boolean", required=false, def="false")
32 32
def oneWordPerLine
33 33

  
34
@Field @Option(name="oneSentencePerLine", usage="output one sentence per line", widget="Boolean", required=false, def="true")
34
@Field @Option(name="oneSentencePerLine", usage="output one sentence per line", widget="Boolean", required=false, def="false")
35 35
def oneSentencePerLine
36 36

  
37 37
@Field @Option(name="sentenceStructureName", usage="name of the structure encoding sentences", widget="String", required=false, def="")
......
44 44
if (!outputDirectory.exists()) outputDirectory.mkdirs()
45 45

  
46 46
def corpus = corpusViewSelection
47
corpus.compute()
48
def mainCorpus = corpus.getMainCorpus()
47 49
def corpusName = corpus.getName()
48 50
def CQI = CQPSearchEngine.getCqiClient()
49 51

  
......
78 80
	return 1
79 81
}
80 82

  
81
def textidProperty = corpus.getStructuralUnit("text").getProperty("id")
82
def textStartBoundaries = corpus.getTextStartLimits()
83
def textEndBoundaries = corpus.getTextEndLimits()
83
def textidProperty = mainCorpus.getStructuralUnit("text").getProperty("id")
84
def textStartBoundaries = mainCorpus.getTextStartLimits()
85
def textEndBoundaries = mainCorpus.getTextEndLimits()
86

  
84 87
int[] struct_pos = CQI.cpos2Struc(textidProperty.getQualifiedName(), textStartBoundaries)
85
String[] textids =  CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
88
String[] allTextIds =  CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
86 89

  
87
if (textStartBoundaries.size() == 1) {
88
	println "1 text"
90
corpusTextIds = new HashSet<String>(corpus.getStructuralUnit("text").getProperty("id").getValues(corpus))
91

  
92
// https://stackoverflow.com/questions/150750/hashset-vs-list-performance
93

  
94
if (corpusTextIds.size() == 1) {
95
	println "1 text ("+corpusTextIds+")"
89 96
	} else {
90
	println ""+textStartBoundaries.size()+" texts"
97
	println ""+corpusTextIds.size()+" texts ("+corpusTextIds+")"
91 98
}
92 99

  
93
for (int i = 0 ; i < textStartBoundaries.size() ; i++) {
94
	int start = textStartBoundaries[i]
95
	int end = textEndBoundaries[i]
100
for (int i = 0; i < textStartBoundaries.size(); i++) {
96 101

  
97
	File txtFile = new File(outputDirectory, textids[i]+".txt")
98
	print "."
99
	def writer = txtFile.newWriter("UTF-8")
100
	int[] positions = new int[end - start + 1]
101
	int c = 0
102
	for (int p : start..end) {
103
		positions[c++] = p
104
	}
105
	int[] idx = CQI.cpos2Id(wordPropertyI.getQualifiedName(), positions)
106
	def words = CQI.id2Str(wordPropertyI.getQualifiedName(), idx)
107
	def tmp = []
108
	for (int j = 0 ; j < positions.length ; j++) {
109
		int p = positions[j]
110
		tmp << words[j]
111
		if (oneSentencePerLine && breaks_pos.contains(p)) {
112
			if (oneWordPerLine) {
113
				tmp.each { word -> writer.println word }
114
			} else {
115
				writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
116
			}
117
			tmp = []
118
		} 
119
	}
120
	if (tmp.size() > 0) {
121
		if (oneWordPerLine) {
122
			tmp.each { word -> writer.println word }
123
		} else {
124
			writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
125
		}
126
	} 
127
	writer.close()
102
    if (corpusTextIds.contains(allTextIds[i])) {
103
        
104
        int start = textStartBoundaries[i]
105
        int end = textEndBoundaries[i]
106
        File txtFile = new File(outputDirectory, allTextIds[i] + ".txt")
107

  
108
        def writer = txtFile.newWriter("UTF-8")
109
        int [] positions = new int [end - start + 1]
110
        int c = 0
111
        for (int p: start..end) {
112
            positions[c++] = p
113
        }
114
        int [] idx = CQI.cpos2Id(wordPropertyI.getQualifiedName(), positions)
115
        def words = CQI.id2Str(wordPropertyI.getQualifiedName(), idx)
116
        def tmp = []
117
        for (int j = 0; j < positions.length; j++) {
118
            int p = positions[j]
119
            tmp << words[j]
120
            if (oneSentencePerLine && breaks_pos.contains(p)) {
121
                if (oneWordPerLine) {
122
                    tmp.each {
123
                        word -> writer.println word
124
                    }
125
                } else {
126
                    writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
127
                }
128
                tmp = []
129
            }
130
        }
131
        if (tmp.size() > 0) {
132
            if (oneWordPerLine) {
133
                tmp.each {
134
                    word -> writer.println word
135
                }
136
            } else {
137
                writer.println LangFormater.format(tmp.join(" "), corpus.getLang())
138
            }
139
        }
140
        writer.close()
141
    }
128 142
}  
129 143

  
130
println "\nDone, result saved in "+outputDirectory.getAbsolutePath()
144
println "Done, result saved in "+outputDirectory.getAbsolutePath()
145

  
146
def monitorShowError(String message) {
147
	monitor.syncExec(new Runnable() {
148
		public void run() {
149
			org.eclipse.jface.dialogs.MessageDialog.openError(org.eclipse.swt.widgets.Display.getCurrent().getActiveShell(), "Erreur", message)
150
		}
151
	})
152
}

Formats disponibles : Unified diff