Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / export / ConcordancesExport_discours.groovy @ 399

History | View | Annotate | Download (6.4 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2012-10-15 17:35:36 +0200 (lun., 15 oct. 2012) $
30
// $LastChangedRevision: 2279 $
31
// $LastChangedBy: mdecorde $ 
32
//
33

    
34
// WARNING when this script is executed for the second time,
35
// the window asking for the query parameter is displayed behind all the other windows,
36
// so that one can have the feeling that TXM is frozen :
37
// just move the other windows to get and answer the query-window hidden behind,
38
// and this solves the problem.
39

    
40
package org.txm.export
41

    
42
// import the packages containing the functions we are going to use
43
import org.txm.concordance.core.functions.*
44
import org.txm.concordance.core.functions.comparators.*
45
import org.txm.searchengine.cqp.ReferencePattern
46
import org.txm.searchengine.cqp.corpus.*
47
import org.txm.searchengine.cqp.corpus.query.*
48

    
49
// TODO: Auto-generated Javadoc
50
/* (non-Javadoc)
51
 * @see groovy.lang.Script#run()
52
 */
53
//"ExportConcordanceBFM.csv"
54
def outfile = new File("ExportConcordanceVOEUX.csv")
55

    
56
// get the corpus
57
// **PARAMETER** : name of the corpus (here DISCOURS)
58
// def discours = CorpusManager.getCorpusManager().getCorpus("QUETE")
59
def discours = CorpusManager.getCorpusManager().getCorpus("DISCOURS")
60

    
61
// **PARAMETER** : selection and name of the properties - "word", "id", "text" and "text_id" are always available
62
// get some properties
63
// def pos = discours.getProperty("ttpos")
64
// def pos = discours.getProperty("frpos")
65
def pos = discours.getProperty("pos")
66
println "pos: "+pos
67
// def pb = discours.getProperty("pb")
68
// println "pb: "+pb
69
def id = discours.getProperty("id")
70
println "id: "+id
71
def word = discours.getProperty("word")
72
println "word: "+word
73
def text = discours.getStructuralUnit("text")
74
println "text: "+text
75
def text_id = text.getProperty("id")
76
println "text_id: "+text_id
77
def text_date = text.getProperty("date")
78
println "text_date: "+text_date
79
def text_loc = text.getProperty("loc")
80
println "text_loc: "+text_loc
81

    
82
// create a query. Edit the query here
83
def querys = javax.swing.JOptionPane.showInputDialog(null, "query ")
84
if (querys == null)
85
        return
86
def query = new Query(Query.fixQuery(querys))
87

    
88
println "Compute concordance "
89
def start = System.currentTimeMillis()
90

    
91
// define the references pattern for each concordance line
92
def referencePattern = new ReferencePattern()
93
// **PARAMETER** take properties among the ones selected and defined above,
94
// to compose the reference pattern (in the first column)
95
//referencePattern.addProperty(text_id)
96
referencePattern.addProperty(text_loc)
97
referencePattern.addProperty(text_date)
98
//referencePattern.addProperty(pb)
99
//referencePattern.addProperty(pos)
100
//referencePattern.addProperty(id)
101

    
102
def sortReferencePattern = new ReferencePattern()
103
// **PARAMETER** take properties among the ones selected and defined above,
104
// to set the sorting order of the first column
105
sortReferencePattern.addProperty(text_id)
106
//sortReferencePattern.addProperty(pb)
107
//sortReferencePattern.addProperty(pos)
108
sortReferencePattern.addProperty(id)
109

    
110

    
111
// compute the concordance with contexts of 15 words on each side of the keyword
112
// **PARAMETER** choose the properties : here [word, pos] are the properties according to which the pivot is sorted
113
// def concordance = new Concordance(discours, query, word, [word, pos], referencePattern, 10, 10)
114
def concordance = new Concordance(discours, query, word, [word, pos], referencePattern, sortReferencePattern, 10, 10)
115

    
116
// get a builtin sort function
117
def comparator = new LexicographicKeywordComparator()
118
comparator.initialize(discours)
119

    
120
// sort the concordance
121
//concordance.sort(comparator)
122

    
123
println "done "+(System.currentTimeMillis()-start)
124

    
125
// **PARAMETER** name of the output file
126
//def writer = new FileWriter("ExportConcordanceBFM3TT.csv")
127
def writer = new FileWriter(outfile)
128

    
129
println "write lines"
130
start = System.currentTimeMillis()
131

    
132
// define which occurrence properties will be displayed
133
concordance.setViewProperties([word])
134

    
135
for (int i = 0 ; i < concordance.getNLines() ; i += 1000) {
136
        List<Line> lines
137
        if (i+1000 > concordance.getNLines())
138
                lines = concordance.getLines(i, concordance.getNLines()-1)
139
        else
140
                lines = concordance.getLines(i, i+1000)
141
                
142
        // println "lines: from "+i
143
        for (Line l : lines) {
144
                /*
145
                 A : référence (nom de texte + page)
146
                 B : contexte gauche (10 mots)
147
                 C : pivot (cf. la remarque sur "ce que")
148
                 D : pos du pivot
149
                 E : colonne vide pour les commentaires
150
                 F : contexte droit (10 mots)
151
                 G : identifiant de l'occurrence (pour injecter les corrections après) 
152
                 */
153

    
154
// **PARAMETERS** define the strings which will be used to print the result                
155
//                String ntext = l.getViewRef().getValue(text_id)
156
                String loc = l.getViewRef().getValue(text_loc)
157
                String date = l.getViewRef().getValue(text_date)
158
//                String page = l.getViewRef().getValue(pb)
159
                String lcontext = l.leftContextToString()
160
                String pivot = l.keywordToString()
161
//                String rien = ""
162
                String rcontext = l.rightContextToString()
163
//                String ids = l.getViewRef().getValue(id)
164
                
165
                String poss = ""
166
                for (String s : l.getKeywordsViewProperties().get(pos)) {
167
                        poss += s+" "
168
                }
169
                
170
                //println l.getKeywordsViewProperties().get(pos)
171
// **PARAMETERS** composition of the printed line, cf. strings defined above
172
//                writer.write(ntext+"_"+page+"\t"+lcontext+"\t"+pivot+"\t"+poss+"\t"+rien+"\t"+rcontext+"\t"+ids+"\n")
173
                writer.write(loc+", "+date+"\t"+lcontext+"\t"+pivot+"\t"+poss+"\t"+rcontext+"\n")
174
        }
175
        writer.flush()
176
}
177
writer.close()
178

    
179
println "done "+(System.currentTimeMillis()-start)+" results"
180

    
181
//BP change nom du fichier
182
println("Concordance "+query.getQueryString()+" saved in file "+outfile.getAbsolutePath())