Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / export / ConcordancesExport.groovy @ 1000

History | View | Annotate | Download (4.8 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
//
27
//
28
//
29
// $LastChangedDate: 2017-04-19 16:23:38 +0200 (mer. 19 avril 2017) $
30
// $LastChangedRevision: 3430 $
31
// $LastChangedBy: mdecorde $
32
//
33
package org.txm.scripts.export
34

    
35
// import the packages containing the functions we are going to use
36
import org.txm.Toolbox
37
import org.txm.searchengine.cqp.corpus.*
38
import org.txm.searchengine.cqp.corpus.query.*
39
import org.txm.concordance.core.functions.Concordance
40
import org.txm.concordance.core.functions.Line
41
import org.txm.concordance.core.functions.comparators.LexicographicKeywordComparator
42
import org.txm.functions.concordances.*
43
import org.txm.functions.concordances.comparators.*
44
import org.txm.searchengine.cqp.ReferencePattern
45

    
46
// TODO: Auto-generated Javadoc
47
/* (non-Javadoc)
48
 * @see groovy.lang.Script#run()
49
 */
50
def manualInit = false
51
if (!org.txm.Toolbox.isInitialized())
52
{
53
        manualInit = true
54
        org.txm.Toolbox.initialize(new File("~/TXM/install.prefs"))
55
}
56

    
57
// get the BFM corpus
58
def discours = CorpusManager.getCorpusManager().getCorpus("VOEUX")
59

    
60
// get some properties
61
def pos = discours.getProperty("frpos")
62
println "pos: "+pos
63
def id = discours.getProperty("id")
64
println "id: "+id
65
def word = discours.getProperty("word")
66
println "word: "+word
67
def text = discours.getStructuralUnit("text")
68
println "text: "+text
69
def text_id = text.getProperty("id")
70
println "text_id: "+text_id
71

    
72
// create a query. Edit the query here
73
def querys = javax.swing.JOptionPane.showInputDialog(null, "query ")
74
if (querys == null)
75
        return
76
def query = new Query(Query.fixQuery(querys))
77

    
78
println "Compute concordance "
79
def start = System.currentTimeMillis()
80

    
81
// define the references pattern for each concordance line
82
def referencePattern = new ReferencePattern()
83
referencePattern.addProperty(text_id)
84
referencePattern.addProperty(pos)
85
referencePattern.addProperty(id)
86

    
87
def sortReferencePattern = new ReferencePattern()
88
sortReferencePattern.addProperty(text_id)
89
sortReferencePattern.addProperty(pos)
90
sortReferencePattern.addProperty(id)
91

    
92
// compute the concordance with contexts of 15 words on each side of the keyword
93
def concordance = new Concordance(discours, query, word, [word, pos], referencePattern, sortReferencePattern, 10, 10)
94

    
95
// get a builtin sort function
96
def comparator = new LexicographicKeywordComparator()
97
comparator.initialize(discours)
98

    
99
// sort the concordance
100
//concordance.sort(comparator)
101

    
102
println "done "+(System.currentTimeMillis()-start)
103

    
104
def writer = new FileWriter("ExportConcordanceBFM3TT.csv")
105

    
106
println "write lines"
107
start = System.currentTimeMillis()
108

    
109
// define which occurrence properties will be displayed
110
concordance.setViewProperties([word])
111

    
112
for (int i = 0 ; i < concordance.getNLines() ; i += 1000)
113
{
114
        List<Line> lines
115
        if (i+1000 > concordance.getNLines())
116
                lines = concordance.getLines(i, concordance.getNLines()-1)
117
        else
118
                lines = concordance.getLines(i, i+1000)
119
                
120
        // println "lines: from "+i
121
        for (Line l : lines)
122
        {
123
                /*
124
                 A : référence (nom de texte + page)
125
                 B : contexte gauche (10 mots)
126
                 C : pivot (cf. la remarque sur "ce que")
127
                 D : pos du pivot
128
                 E : colonne vide pour les commentaires
129
                 F : contexte droit (10 mots)
130
                 G : identifiant de l'occurrence (pour injecter les corrections après)
131
                 */
132
                
133
                String ntext = l.getViewRef().getValue(text_id)
134
                String lcontext = l.leftContextToString()
135
                String pivot = l.keywordToString()
136
                String rien = ""
137
                String rcontext = l.rightContextToString()
138
                String ids = l.getViewRef().getValue(id)
139
                
140
                String poss = ""
141
                for(String s : l.getKeywordsViewProperties().get(pos))
142
                {
143
                        poss += s+" "
144
                }
145
                
146
                //println l.getKeywordsViewProperties().get(pos)
147
                writer.write(ntext+"\t"+lcontext+"\t"+pivot+"\t"+poss+"\t"+rien+"\t"+rcontext+"\t"+ids+"\n")
148
        }
149
        writer.flush()
150
}
151
writer.close()
152

    
153
println "done "+(System.currentTimeMillis()-start)+" results"
154

    
155
println("Concordance "+query.getQueryString()+" saved in file "+new File("ExportConcordanceBFM3TT.csv").getAbsolutePath())
156

    
157
if(manualInit)
158
        org.txm.Toolbox.shutdown()
159