Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / export / ExportTextContentMacro.groovy @ 499

History | View | Annotate | Download (2.6 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macro.export
3

    
4
import org.txm.searchengine.cqp.corpus.*
5
import org.txm.searchengine.cqp.corpus.query.Query
6
import org.txm.Toolbox
7
import org.txm.utils.i18n.LangFormater;
8
import org.apache.commons.lang.StringUtils;
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12

    
13
println "corpora selection: "+corpusViewSelection
14

    
15
if (!(corpusViewSelection instanceof Corpus)) {
16
        println "Please select a corpus"
17
        return;
18
}
19

    
20
// PARAMETERS
21

    
22
@Field @Option(name="exportDir", usage="Result directory ", widget="Folder", required=true, def="export")
23
File exportDir
24

    
25
@Field @Option(name="lineSeparatorStructureName", usage="line separator structure", widget="String", required=false, def="p")
26
def lineSeparatorStructureName
27

    
28
if (!ParametersDialog.open(this)) return
29

    
30
// BEGINNING
31

    
32
if (!exportDir.exists()) exportDir.mkdirs()
33

    
34
Corpus corpus = corpusViewSelection
35
CQI = Toolbox.getCqiClient();
36

    
37
def lineSeparatorStructure = corpus.getStructuralUnit(lineSeparatorStructureName)
38

    
39
if (lineSeparatorStructure == null) {
40
        println "The $lineSeparatorStructureName structure does not exists in the $corpus corpus"
41
        return;
42
}
43

    
44
def breaks_pos = Arrays.asList(corpus.query(new Query("[]</"+lineSeparatorStructureName+">"),"test", false).getEnds())
45
println breaks_pos
46

    
47
println "Exporting $corpus text content to $exportDir"
48

    
49
def wordProperty = corpus.getWordProperty()
50
def textidProperty = corpus.getStructuralUnit("text").getProperty("id")
51
def textStartBoundaries = corpus.getTextStartLimits();
52
def textEndBoundaries = corpus.getTextEndLimits();
53
int[] struct_pos = CQI.cpos2Struc(textidProperty.getQualifiedName(), textStartBoundaries)
54
String[] textids =  CQI.struc2Str(textidProperty.getQualifiedName(), struct_pos)
55
println ""+textStartBoundaries.size()+" texts"
56

    
57
for (int i = 0 ; i < textStartBoundaries.size() ; i++) {
58
        int start = textStartBoundaries[i];
59
        int end = textEndBoundaries[i]
60

    
61
        File txtFile = new File(exportDir, textids[i]+".txt")
62
        print ".."
63
        def writer = txtFile.newWriter("UTF-8")
64
        int[] positions = new int[end - start + 1]
65
        int c = 0;
66
        for (int p : start..end) {
67
                positions[c++] = p
68
        }
69
        int[] idx = CQI.cpos2Id(wordProperty.getQualifiedName(), positions)
70
        def words = CQI.id2Str(wordProperty.getQualifiedName(), idx)
71
        for (int j = 0 ; j < positions.length ; j++) {
72
                int p = positions[j]
73
                if (breaks_pos.contains(p)) words[j] = words[j] +"\n" 
74
        }
75
        writer.println LangFormater.format(StringUtils.join(words, " "),
76
                                corpus.getAttribute("lang"));
77
        writer.close();
78
}  
79

    
80
println "\nDone, result saved in "+exportDir.getAbsolutePath()