Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macroproto / export / ExportTextsMetadataMacro.groovy @ 479

History | View | Annotate | Download (2.3 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macroproto.export
3

    
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7
import org.txm.searchengine.cqp.corpus.*
8
import org.txm.Toolbox
9

    
10
if (!(corpusViewSelection instanceof Corpus)) {
11
        println "Please select a corpus first"
12
        return
13
}
14

    
15
// PARAMETERS
16

    
17
@Field @Option(name="tsvFile", usage="CSV file path", widget="File", required=true, def="/tmp/metadata.csv")
18
def File tsvFile
19

    
20
@Field @Option(name="columnSeparator", usage="column columnSeparator", widget="String", required=false, def="\t")
21
def columnSeparator
22

    
23
if (!ParametersDialog.open(this)) return
24

    
25
// BEGINNING
26
def Corpus corpus = corpusViewSelection
27
def CQI = Toolbox.getCqiClient()
28
def writer = tsvFile.newWriter("UTF-8")
29
def internalTextProperties = ["project", "base", "path"]
30

    
31
println "Exporting $corpus metadata to $tsvFile file."
32

    
33
def text = corpus.getStructuralUnit("text")
34
def properties = text.getProperties()
35

    
36
// putting the "id" property in the first position
37
def idi = properties.findIndexOf { prop -> prop.toString()=="id" }
38
if (idi != 0 && idi > 0) {
39
        def tmp = properties[0]
40
        properties[0] = properties[idi]
41
        properties[idi] = tmp
42
} else if (idi == -1) {
43
println sprintf("** Warning: incoherent metadata content found for %s corpus - no 'id' metadata found", corpus)
44
println "** Aborting"
45
return
46
}
47

    
48
// getting values for all texts and all text metadata
49
def propertyValues = [:]
50
def numberOfTexts = CQI.attributeSize(properties[0].getQualifiedName())
51
def int[] struct_pos = new int[numberOfTexts]
52

    
53
struct_pos.eachWithIndex { c, i -> struct_pos[i] = i }
54

    
55
properties.each { property ->
56
        def values = CQI.struc2Str(property.getQualifiedName(), struct_pos)
57
        propertyValues[property] = values
58
}
59

    
60
// writing result to tsvFile
61
def c = 0
62
properties.each { property ->
63
        if (!internalTextProperties.contains(property.getName())) {
64
                if (c++ > 0) writer.print columnSeparator
65
                writer.print "$property"
66
        }
67
}
68
writer.println ""
69

    
70
numberOfTexts.times {
71
        c = 0
72
        properties.each { property ->
73
                if (!internalTextProperties.contains(property.getName())) {
74
                        if (c++ > 0) writer.print columnSeparator
75
                        writer.print propertyValues[property][it]
76
                }
77
        }
78
        writer.println ""
79
}
80

    
81
writer.close()
82
println sprintf("Wrote %d different metadata for %d texts.", c, numberOfTexts)
83