Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / export / ParaConc.groovy @ 187

History | View | Annotate | Download (4.6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2011-09-27 16:44:37 +0200 (mar., 27 sept. 2011) $
25
// $LastChangedRevision: 2008 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.export
29

    
30
import org.txm.Toolbox
31
import org.txm.searchengine.cqp.corpus.*
32
import org.txm.searchengine.cqp.corpus.query.*
33
import org.txm.functions.concordances.*
34
import org.txm.functions.concordances.comparators.*
35
import org.txm.functions.ReferencePattern
36

    
37
////// PARAMETERS //////
38

    
39
def output = "nomfichier"
40

    
41
def tgtCorpusName = "BLLATLAT"
42
def srcCorpusName = "BLLATFRO"
43

    
44
def tgtWords = "sum|ego" // use to highlight lat words
45
def srcWords = "je|j'" // use to highlight fro words
46

    
47
def tgtQuery = "<seg> []* \""+tgtWords+"\" []* </seg> :"+srcCorpusName+" \""+srcWords+"\"" // default lat query
48
def srcQuery = "<seg> []* \""+srcWords+"\" []* </seg> :"+tgtCorpusName+" \""+tgtWords+"\"" // default fro query
49

    
50
println tgtCorpusName+": "+tgtQuery
51
println srcCorpusName+": "+srcQuery
52

    
53
new File(output).withWriter("UTF-8"){writer->
54
//// LAT CONCORDANCE ////
55
def tgtCorpus = CorpusManager.getCorpusManager().getCorpus(tgtCorpusName)
56
def text_id = tgtCorpus.getStructuralUnit("text").getProperty("id")
57
def seg_id = tgtCorpus.getStructuralUnit("seg").getProperty("id")
58
def word = tgtCorpus.getProperty("word")
59
def referencePattern = new ReferencePattern()
60
//referencePattern.addProperty(text_id)
61
referencePattern.addProperty(seg_id)
62
def tgtConcordance = new Concordance(tgtCorpus, new Query(tgtQuery), word, [word], referencePattern, referencePattern, 0, 0)
63

    
64
//// LAT CONCORDANCE ////
65
def srcCorpus = CorpusManager.getCorpusManager().getCorpus(srcCorpusName)
66
text_id = srcCorpus.getStructuralUnit("text").getProperty("id")
67
seg_id = srcCorpus.getStructuralUnit("seg").getProperty("id")
68
word = srcCorpus.getProperty("word")
69
referencePattern = new ReferencePattern()
70
//referencePattern.addProperty(text_id)
71
referencePattern.addProperty(seg_id)
72
def srcConcordance = new Concordance(srcCorpus, new Query(srcQuery), word, [word], referencePattern, referencePattern, 0, 0)
73

    
74
//// MERGE CONCORDANCE RESULT ////
75
HashSet<String> allKeys = new HashSet<String>()
76
HashMap<String, Line> srcLines = [:]
77
HashMap<String, Line> tgtLines = [:]
78

    
79
println "nb tgt result: "+tgtConcordance.getNLines()
80
for(Line line : tgtConcordance.getLines(0, tgtConcordance.getNLines()))
81
{
82
        allKeys.add(line.getViewRef().toString())
83
        tgtLines.put(line.getViewRef().toString(), line)
84
}
85

    
86
println "nb src result: "+srcConcordance.getNLines()
87
for(Line line : srcConcordance.getLines(0, srcConcordance.getNLines()))
88
{
89
        allKeys.add(line.getViewRef().toString())
90
        srcLines.put(line.getViewRef().toString(), line)
91
}
92
println "src seg: "+srcLines.keySet()
93
println "tgt seg: "+tgtLines.keySet()
94
println "nb segments: "+allKeys.size()
95

    
96
writer.println "segment\tsource\ttarget"
97
for(Line line : srcConcordance.getLines(0, srcConcordance.getNLines()))
98
{
99
        String key = line.getViewRef().toString();
100
        def tgtLine = tgtLines.get(key)
101
        String srcString = line.keywordToString();
102
        String tgtString = "";
103
        if(tgtLine != null)        {
104
                tgtString = tgtLine.keywordToString();
105
                tgtLines.remove(key)
106
                }else
107
        {
108
                key = key+"*"
109
        }
110
        srcString = srcString.replaceAll("("+srcWords+")", "*\$1*");
111
        tgtString = tgtString.replaceAll("("+tgtWords+")", "*\$1*");
112
        writer.println key+"\t"+srcString+"\t"+tgtString
113
}
114

    
115
println "\n"
116
for(String key : tgtLines.keySet()) // process remaining tgt lines
117
{
118
        def srcline = srcLines.get(key)
119
        def tgtline = tgtLines.get(key)
120
        String srcString = "";
121
        String tgtString = tgtLines.keywordToString();
122
        if(srcline == null){
123
                srcString = srcline.keywordToString();
124
        }else
125
        {
126
                key = key+"*"
127
        }
128
        srcString = srcString.replaceAll("("+srcWords+")", "*\$1*");
129
        tgtString = tgtString.replaceAll("("+tgtWords+")", "*\$1*");
130
        writer.println key+"\t"+srcString+"\t"+tgtString
131
}
132
}
133

    
134
println "Done : written in "+new File(output).getAbsolutePath();