Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / annotation / BuildWordPropTableMacro.groovy @ 345

History | View | Annotate | Download (5.9 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
// $LastChangedDate: 2012-10-15 17:35:36 +0200 (lun., 15 oct. 2012) $
24
// $LastChangedRevision: 2279 $
25
// $LastChangedBy: mdecorde $
26
//
27
package org.txm.macro.annotation
28

    
29
import org.txm.Toolbox
30
import org.txm.searchengine.cqp.corpus.*
31
import org.txm.searchengine.cqp.corpus.query.*
32
import org.txm.concordances.functions.*
33
import org.txm.concordances.functions.comparators.*
34
import org.txm.functions.ReferencePattern
35
import org.kohsuke.args4j.*
36
import groovy.transform.Field
37
import org.txm.rcp.swt.widget.parameters.*
38

    
39
def corpus = corpusViewSelection
40
if (!(corpus instanceof Corpus)) {
41
        println "Error: you must select a corpus or a subcorpus to export properties"
42
        return false;
43
}
44

    
45
@Field @Option(name="properties", usage="columns to inject separated by commas", widget="String", required=true, def="p1, p2, ... , pn")
46
                def properties = "pos"
47

    
48
@Field @Option(name="query", usage="The query to select words to annotate", widget="Query", required=true, def="[]")
49
                def query
50

    
51
@Field @Option(name="leftcontextsize", usage="The context sizes", widget="Integer", required=true, def="10")
52
                def leftcontextsize = 10
53

    
54
@Field @Option(name="rightcontextsize", usage="The context sizes", widget="Integer", required=true, def="10")
55
                def rightcontextsize = 10
56

    
57
@Field @Option(name="references", usage="references to show", widget="String", required=true, def="text_id")
58
                def references = "text_id, p_id, s_id"
59

    
60
@Field @Option(name="tsvFile", usage="The result TSV file", widget="File", required=true, def="export.tsv")
61
                def tsvFile
62

    
63
if (!ParametersDialog.open(this)) return;
64

    
65
def split = properties.split(",")
66
if (split.length == 0) {
67
        println "ERROR: no property given"
68
        return false
69
}
70
properties = []
71
for (def p : split) properties << p.trim()
72

    
73
println "Building Annotation Table with the following parameters: "
74
println "- corpus: $corpus"
75
println "- query: $query"
76
println "- annotation properties: $properties"
77
println "- TSV file: $tsvFile"
78

    
79

    
80
if (!tsvFile.getParentFile().canWrite()) {
81
        println "Error: "+tsvFile.getParentFile()+" is not writable."
82
        return;
83
}
84

    
85
def annots = []
86
for (def p : properties) {
87
        def prop = corpus.getProperty(p)
88
        if (prop == null) {
89
                println "No such property $p in the $corpus corpus"
90
                return false;
91
        }
92
        annots << prop
93
}
94
def id = corpus.getProperty("id")
95
def word = corpus.getProperty("word")
96
def text = corpus.getStructuralUnit("text")
97
def text_id = text.getProperty("id")
98
def refs = references.split(",")
99

    
100
if (annots == null || annots.size() == 0) {
101
        println "No such property given"//$annots in the $corpus corpus"
102
        return false;
103
}
104

    
105
println "Exporting ..."
106
def start = System.currentTimeMillis()
107

    
108
// define the references pattern for each concordance line
109
def referencePattern = new ReferencePattern()
110
referencePattern.addProperty(text_id)
111
referencePattern.addProperty(id)
112
for (def annot : annots) referencePattern.addProperty(annot)
113

    
114
def refProperties = []
115
for (def ref : refs) {
116
        ref = ref.trim()
117
        try {
118
                if (ref.contains("_")) {
119
                        def split2 = ref.split("_",2)
120
                        def refp = corpus.getStructuralUnit(split2[0].trim()).getProperty(split2[1].trim())
121
                        if (ref != "text_id")
122
                                referencePattern.addProperty(refp)
123
                        refProperties << refp
124
                } else {
125
                        def p = corpus.getProperty(ref)
126
                        if (p != null) referencePattern.addProperty(p)
127
                }
128
        } catch(Exception e) { println "Error while parsing references: "+ref+" "+e}
129
}
130

    
131
// compute the concordance with contexts of 15 words on each side of the keyword
132
//query = new Query(Query.fixQuery(query))
133
def viewprops = [word]
134
viewprops.addAll(annots)
135
def concordance = new Concordance(corpus, query, word, viewprops, referencePattern, referencePattern, leftcontextsize, rightcontextsize)
136

    
137
//println "Conc done "+(System.currentTimeMillis()-start)
138

    
139
def writer = tsvFile.newWriter("UTF-8");
140

    
141
//println "Writing lines..."
142
start = System.currentTimeMillis()
143
String annotHeader = ""
144
for (def annot : annots) annotHeader += "\t$annot"
145
writer.write("N"+"\t"+"Références"+"\t"+"ContexteGauche"+"\t"+"Pivot"+annotHeader+"\t"+"ContexteDroit"+"\t"+"id"+"\t"+"text_id\n")
146

    
147
// define which occurrence properties will be displayed
148
concordance.setViewProperties([word])
149
int NLines = concordance.getNLines();
150
int chunk = 1000;
151
for (int i = 0 ; i < concordance.getNLines() ; i += chunk) {
152
        List<Line> lines
153
        if (i+chunk > NLines)
154
                lines = concordance.getLines(i, concordance.getNLines()-1)
155
        else
156
                lines = concordance.getLines(i, i+chunk-1) // e.g. from 0 to 999
157

    
158
        int n = 1
159
        for (Line l : lines) {
160
                String ntext = l.getViewRef().getValue(text_id)
161
                String lcontext = l.leftContextToString()
162
                String pivot = l.keywordToString()
163
                String rcontext = l.rightContextToString()
164
                String ids = l.getViewRef().getValue(id)
165
                String refValue = "";
166
                for (def refp : refProperties) refValue += " "+l.getViewRef().getValue(refp)
167

    
168
                String poss = "";
169
                for (def annot : annots) {
170
                        poss += "\t"+l.getViewRef().getValue(annot)
171
                }
172

    
173
                //println l.getKeywordsViewProperties().get(pos)
174
                writer.write(""+(l.getKeywordPosition())+"\t"+refValue+"\t"+lcontext+"\t"+pivot+poss+"\t"+rcontext+"\t"+ids+"\t"+ntext+"\n")
175
        }
176
        writer.flush()
177
}
178
println "Saved in "+tsvFile.getAbsolutePath()
179
writer.close()