Révision 3624
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/export/ExportTextsContentMacro.groovy (revision 3624) | ||
---|---|---|
16 | 16 |
import org.txm.rcp.swt.widget.parameters.* |
17 | 17 |
|
18 | 18 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
19 |
println "Please select a corpus"
|
|
20 |
return |
|
21 |
} |
|
19 |
monitorShowError("Erreur : la sélection dans la vue Corpus n'est pas un corpus ou un sous-corpus ($corpusViewSelection).")
|
|
20 |
return false
|
|
21 |
}
|
|
22 | 22 |
|
23 | 23 |
// PARAMETERS |
24 | 24 |
|
... | ... | |
31 | 31 |
@Field @Option(name="oneWordPerLine", usage="output one word per line", widget="Boolean", required=false, def="false") |
32 | 32 |
def oneWordPerLine |
33 | 33 |
|
34 |
@Field @Option(name="oneSentencePerLine", usage="output one sentence per line", widget="Boolean", required=false, def="true")
|
|
34 |
@Field @Option(name="oneSentencePerLine", usage="output one sentence per line", widget="Boolean", required=false, def="false")
|
|
35 | 35 |
def oneSentencePerLine |
36 | 36 |
|
37 | 37 |
@Field @Option(name="sentenceStructureName", usage="name of the structure encoding sentences", widget="String", required=false, def="") |
... | ... | |
44 | 44 |
if (!outputDirectory.exists()) outputDirectory.mkdirs() |
45 | 45 |
|
46 | 46 |
def corpus = corpusViewSelection |
47 |
corpus.compute() |
|
48 |
def mainCorpus = corpus.getMainCorpus() |
|
47 | 49 |
def corpusName = corpus.getName() |
48 | 50 |
def CQI = CQPSearchEngine.getCqiClient() |
49 | 51 |
|
... | ... | |
78 | 80 |
return 1 |
79 | 81 |
} |
80 | 82 |
|
81 |
def textidProperty = corpus.getStructuralUnit("text").getProperty("id") |
|
82 |
def textStartBoundaries = corpus.getTextStartLimits() |
|
83 |
def textEndBoundaries = corpus.getTextEndLimits() |
|
83 |
def textidProperty = mainCorpus.getStructuralUnit("text").getProperty("id") |
|
84 |
def textStartBoundaries = mainCorpus.getTextStartLimits() |
|
85 |
def textEndBoundaries = mainCorpus.getTextEndLimits() |
|
86 |
|
|
84 | 87 |
int[] struct_pos = CQI.cpos2Struc(textidProperty.getQualifiedName(), textStartBoundaries) |
85 |
String[] textids = CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
|
|
88 |
String[] allTextIds = CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
|
|
86 | 89 |
|
87 |
if (textStartBoundaries.size() == 1) { |
|
88 |
println "1 text" |
|
90 |
corpusTextIds = new HashSet<String>(corpus.getStructuralUnit("text").getProperty("id").getValues(corpus)) |
|
91 |
|
|
92 |
// https://stackoverflow.com/questions/150750/hashset-vs-list-performance |
|
93 |
|
|
94 |
if (corpusTextIds.size() == 1) { |
|
95 |
println "1 text ("+corpusTextIds+")" |
|
89 | 96 |
} else { |
90 |
println ""+textStartBoundaries.size()+" texts"
|
|
97 |
println ""+corpusTextIds.size()+" texts ("+corpusTextIds+")"
|
|
91 | 98 |
} |
92 | 99 |
|
93 |
for (int i = 0 ; i < textStartBoundaries.size() ; i++) { |
|
94 |
int start = textStartBoundaries[i] |
|
95 |
int end = textEndBoundaries[i] |
|
100 |
for (int i = 0; i < textStartBoundaries.size(); i++) { |
|
96 | 101 |
|
97 |
File txtFile = new File(outputDirectory, textids[i]+".txt") |
|
98 |
print "." |
|
99 |
def writer = txtFile.newWriter("UTF-8") |
|
100 |
int[] positions = new int[end - start + 1] |
|
101 |
int c = 0 |
|
102 |
for (int p : start..end) { |
|
103 |
positions[c++] = p |
|
104 |
} |
|
105 |
int[] idx = CQI.cpos2Id(wordPropertyI.getQualifiedName(), positions) |
|
106 |
def words = CQI.id2Str(wordPropertyI.getQualifiedName(), idx) |
|
107 |
def tmp = [] |
|
108 |
for (int j = 0 ; j < positions.length ; j++) { |
|
109 |
int p = positions[j] |
|
110 |
tmp << words[j] |
|
111 |
if (oneSentencePerLine && breaks_pos.contains(p)) { |
|
112 |
if (oneWordPerLine) { |
|
113 |
tmp.each { word -> writer.println word } |
|
114 |
} else { |
|
115 |
writer.println LangFormater.format(tmp.join(" "), corpus.getLang()) |
|
116 |
} |
|
117 |
tmp = [] |
|
118 |
} |
|
119 |
} |
|
120 |
if (tmp.size() > 0) { |
|
121 |
if (oneWordPerLine) { |
|
122 |
tmp.each { word -> writer.println word } |
|
123 |
} else { |
|
124 |
writer.println LangFormater.format(tmp.join(" "), corpus.getLang()) |
|
125 |
} |
|
126 |
} |
|
127 |
writer.close() |
|
102 |
if (corpusTextIds.contains(allTextIds[i])) { |
|
103 |
|
|
104 |
int start = textStartBoundaries[i] |
|
105 |
int end = textEndBoundaries[i] |
|
106 |
File txtFile = new File(outputDirectory, allTextIds[i] + ".txt") |
|
107 |
|
|
108 |
def writer = txtFile.newWriter("UTF-8") |
|
109 |
int [] positions = new int [end - start + 1] |
|
110 |
int c = 0 |
|
111 |
for (int p: start..end) { |
|
112 |
positions[c++] = p |
|
113 |
} |
|
114 |
int [] idx = CQI.cpos2Id(wordPropertyI.getQualifiedName(), positions) |
|
115 |
def words = CQI.id2Str(wordPropertyI.getQualifiedName(), idx) |
|
116 |
def tmp = [] |
|
117 |
for (int j = 0; j < positions.length; j++) { |
|
118 |
int p = positions[j] |
|
119 |
tmp << words[j] |
|
120 |
if (oneSentencePerLine && breaks_pos.contains(p)) { |
|
121 |
if (oneWordPerLine) { |
|
122 |
tmp.each { |
|
123 |
word -> writer.println word |
|
124 |
} |
|
125 |
} else { |
|
126 |
writer.println LangFormater.format(tmp.join(" "), corpus.getLang()) |
|
127 |
} |
|
128 |
tmp = [] |
|
129 |
} |
|
130 |
} |
|
131 |
if (tmp.size() > 0) { |
|
132 |
if (oneWordPerLine) { |
|
133 |
tmp.each { |
|
134 |
word -> writer.println word |
|
135 |
} |
|
136 |
} else { |
|
137 |
writer.println LangFormater.format(tmp.join(" "), corpus.getLang()) |
|
138 |
} |
|
139 |
} |
|
140 |
writer.close() |
|
141 |
} |
|
128 | 142 |
} |
129 | 143 |
|
130 |
println "\nDone, result saved in "+outputDirectory.getAbsolutePath() |
|
144 |
println "Done, result saved in "+outputDirectory.getAbsolutePath() |
|
145 |
|
|
146 |
def monitorShowError(String message) { |
|
147 |
monitor.syncExec(new Runnable() { |
|
148 |
public void run() { |
|
149 |
org.eclipse.jface.dialogs.MessageDialog.openError(org.eclipse.swt.widgets.Display.getCurrent().getActiveShell(), "Erreur", message) |
|
150 |
} |
|
151 |
}) |
|
152 |
} |
Formats disponibles : Unified diff