Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / annotation / BuildWordPropTableMacro.groovy @ 499

History | View | Annotate | Download (6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
// $LastChangedDate: 2012-10-15 17:35:36 +0200 (lun., 15 oct. 2012) $
24
// $LastChangedRevision: 2279 $
25
// $LastChangedBy: mdecorde $
26
//
27
package org.txm.macro.annotation
28

    
29
import org.txm.Toolbox
30
import org.txm.searchengine.cqp.corpus.*
31
import org.txm.searchengine.cqp.corpus.query.*
32
import org.txm.concordance.core.functions.Concordance
33
import org.txm.concordance.core.functions.Line
34
import org.txm.functions.concordances.*
35
import org.txm.functions.concordances.comparators.*
36
import org.txm.searchengine.cqp.ReferencePattern
37
import org.kohsuke.args4j.*
38
import groovy.transform.Field
39
import org.txm.rcp.swt.widget.parameters.*
40

    
41
def corpus = corpusViewSelection
42
if (!(corpus instanceof Corpus)) {
43
        println "Error: you must select a corpus or a subcorpus to export properties"
44
        return false;
45
}
46

    
47
@Field @Option(name="properties", usage="columns to inject separated by commas", widget="String", required=true, def="p1, p2, ... , pn")
48
                def properties = "pos"
49

    
50
@Field @Option(name="query", usage="The query to select words to annotate", widget="Query", required=true, def="[]")
51
                def query
52

    
53
@Field @Option(name="leftcontextsize", usage="The context sizes", widget="Integer", required=true, def="10")
54
                def leftcontextsize = 10
55

    
56
@Field @Option(name="rightcontextsize", usage="The context sizes", widget="Integer", required=true, def="10")
57
                def rightcontextsize = 10
58

    
59
@Field @Option(name="references", usage="references to show", widget="String", required=true, def="text_id")
60
                def references = "text_id, p_id, s_id"
61

    
62
@Field @Option(name="tsvFile", usage="The result TSV file", widget="File", required=true, def="export.tsv")
63
                def tsvFile
64

    
65
if (!ParametersDialog.open(this)) return;
66

    
67
def split = properties.split(",")
68
if (split.length == 0) {
69
        println "ERROR: no property given"
70
        return false
71
}
72
properties = []
73
for (def p : split) properties << p.trim()
74

    
75
println "Building Annotation Table with the following parameters: "
76
println "- corpus: $corpus"
77
println "- query: $query"
78
println "- annotation properties: $properties"
79
println "- TSV file: $tsvFile"
80

    
81
tsvFile = tsvFile.getAbsoluteFile()
82
if (!tsvFile.getParentFile().canWrite()) {
83
        println "Error: "+tsvFile.getParentFile()+" is not writable."
84
        return;
85
}
86

    
87
def annots = []
88
for (def p : properties) {
89
        def prop = corpus.getProperty(p)
90
        if (prop == null) {
91
                println "No such property $p in the $corpus corpus"
92
                return false;
93
        }
94
        annots << prop
95
}
96
def id = corpus.getProperty("id")
97
def word = corpus.getProperty("word")
98
def text = corpus.getStructuralUnit("text")
99
def text_id = text.getProperty("id")
100
def refs = references.split(",")
101

    
102
if (annots == null || annots.size() == 0) {
103
        println "No such property given"//$annots in the $corpus corpus"
104
        return false;
105
}
106

    
107
println "Exporting ..."
108
def start = System.currentTimeMillis()
109

    
110
// define the references pattern for each concordance line
111
def referencePattern = new ReferencePattern()
112
referencePattern.addProperty(text_id)
113
referencePattern.addProperty(id)
114
for (def annot : annots) referencePattern.addProperty(annot)
115

    
116
def refProperties = []
117
for (def ref : refs) {
118
        ref = ref.trim()
119
        try {
120
                if (ref.contains("_")) {
121
                        def split2 = ref.split("_",2)
122
                        def refp = corpus.getStructuralUnit(split2[0].trim()).getProperty(split2[1].trim())
123
                        if (ref != "text_id")
124
                                referencePattern.addProperty(refp)
125
                        refProperties << refp
126
                } else {
127
                        def p = corpus.getProperty(ref)
128
                        if (p != null) referencePattern.addProperty(p)
129
                }
130
        } catch(Exception e) { println "Error while parsing references: "+ref+" "+e}
131
}
132

    
133
// compute the concordance with contexts of 15 words on each side of the keyword
134
//query = new Query(Query.fixQuery(query))
135
def viewprops = [word]
136
viewprops.addAll(annots)
137
def concordance = new Concordance(corpus, query, word, viewprops, referencePattern, referencePattern, leftcontextsize, rightcontextsize)
138

    
139
//println "Conc done "+(System.currentTimeMillis()-start)
140

    
141
def writer = tsvFile.newWriter("UTF-8");
142

    
143
//println "Writing lines..."
144
start = System.currentTimeMillis()
145
String annotHeader = ""
146
for (def annot : annots) annotHeader += "\t$annot"
147
writer.write("N"+"\t"+"Références"+"\t"+"ContexteGauche"+"\t"+"Pivot"+annotHeader+"\t"+"ContexteDroit"+"\t"+"id"+"\t"+"text_id\n")
148

    
149
// define which occurrence properties will be displayed
150
concordance.setViewProperties([word])
151
int NLines = concordance.getNLines();
152
int chunk = 1000;
153
for (int i = 0 ; i < concordance.getNLines() ; i += chunk) {
154
        List<Line> lines
155
        if (i+chunk > NLines)
156
                lines = concordance.getLines(i, concordance.getNLines()-1)
157
        else
158
                lines = concordance.getLines(i, i+chunk-1) // e.g. from 0 to 999
159

    
160
        int n = 1
161
        for (Line l : lines) {
162
                String ntext = l.getViewRef().getValue(text_id)
163
                String lcontext = l.leftContextToString()
164
                String pivot = l.keywordToString()
165
                String rcontext = l.rightContextToString()
166
                String ids = l.getViewRef().getValue(id)
167
                String refValue = "";
168
                for (def refp : refProperties) refValue += " "+l.getViewRef().getValue(refp)
169

    
170
                String poss = "";
171
                for (def annot : annots) {
172
                        poss += "\t"+l.getViewRef().getValue(annot)
173
                }
174

    
175
                //println l.getKeywordsViewProperties().get(pos)
176
                writer.write(""+(l.getKeywordPosition())+"\t"+refValue+"\t"+lcontext+"\t"+pivot+poss+"\t"+rcontext+"\t"+ids+"\t"+ntext+"\n")
177
        }
178
        writer.flush()
179
}
180
println "Saved in "+tsvFile.getAbsolutePath()
181
writer.close()