Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macroprototypes / urs / cqp / WordUnitsInserter.groovy @ 2087

History | View | Annotate | Download (3.7 kB)

1
package org.txm.macroprototypes.urs.cqp
2

    
3
import java.io.IOException
4
import java.util.Date
5
import java.util.LinkedHashMap
6

    
7
import javax.xml.stream.XMLStreamException
8

    
9
import org.txm.Toolbox
10
import org.txm.importer.StaxIdentityParser
11
import org.txm.macro.urs.AnalecUtils
12
import org.txm.scripts.importer.GetAttributeValue
13
import org.txm.searchengine.cqp.CQPSearchEngine
14
import org.txm.searchengine.cqp.corpus.MainCorpus
15
import visuAnalec.elements.Unite
16

    
17
public class WordUnitsInserter extends StaxIdentityParser {
18

    
19
        List<Unite> units
20
        String[] ids
21
        File inputFile
22
        def id2Units = [:]
23

    
24
        LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>();
25

    
26
        public WordUnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units) {
27
                super(inputFile)
28
                this.inputFile = inputFile
29

    
30
                this.units = units
31
                this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
32

    
33
                for (int i = 0 ; i < units.size() ; i++) {
34
                        Unite u = units[i]
35
                        int[] positions = u.getDeb()..u.getFin()
36

    
37
                        ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions)
38

    
39
                        for (String id : ids) {
40
                                if (!id2Units.containsKey(id)) {
41
                                        id2Units[id] = []
42
                                }
43
                                id2Units[id] << u
44
                        }
45
                }
46
        }
47

    
48
        boolean inAna = false
49
        String ana_type, ana_resp, ana_value
50
        boolean inW = false
51
        String word_id = null
52

    
53
        protected void processStartElement() throws XMLStreamException, IOException {
54

    
55
                if ("w".equals(localname)) {
56
                        
57
                        inW = true
58
                        word_id = this.getParserAttributeValue("id")
59

    
60
                        if (id2Units.containsKey(word_id)) {
61
                                for (Unite u : id2Units[word_id]) {
62
                                        def props = u.getProps();
63
                                        for (String p : props.keySet()) {
64
                                                if ("type".equals(p)) continue;
65
                                                if ("written".equals(p)) continue;
66
                                                if ("milestone".equals(p)) continue;
67
                                                
68
                                                if (!anaValues.containsKey(p)) {
69
                                                        anaValues[p] = ""
70
                                                        ana_resp = "#txm"
71
                                                }
72
                                                anaValues[p] = (anaValues[p]+" "+props.get(p)).trim()
73
                                        }
74
                                }
75
                                if (anaValues.size() > 0) println anaValues
76
                        }
77

    
78
                        super.processStartElement(); // write the tag
79
                        
80
                } else if ("ana".equals(localname) && inW) {
81
                        
82
                        inAna = true
83
                        ana_type = this.getParserAttributeValue("type").substring(1)
84
                        ana_resp = this.getParserAttributeValue("resp")
85
                        ana_value = ""
86
                        
87
                } else {
88
                        super.processStartElement()
89
                }
90
        }
91

    
92
        @Override
93
        public void processCharacters() throws XMLStreamException {
94
                if (inAna) {
95
                        ana_value += parser.getText().trim()
96
                } else {
97
                        super.processCharacters()
98
                }
99
        }
100

    
101
        protected void processEndElement() throws XMLStreamException {
102

    
103
                if ("w".equals(localname)) {
104
                        
105
                        // write the last values
106
                        for (String ana_type : anaValues.keySet()) {
107
                                writer.writeStartElement("txm:ana")
108
                                writer.writeAttribute("type", "#" + ana_type)
109
                                writer.writeAttribute("resp", "#txm") // change
110
                                writer.writeCharacters(anaValues[ana_type])
111
                                writer.writeEndElement()
112
                        }
113
                        
114
                        anaValues.clear()
115
                        super.processEndElement() // finally write word then close annotations
116
                        inW = false
117
                        
118
                } else if ("ana".equals(localname) && inW) {
119

    
120
                        if (!anaValues.containsKey(ana_type)) {
121
                                anaValues[ana_type] = ana_value.trim()
122
                        } else {
123
                                ana_resp = "#txm" // set the resp to txm since anaValues update the ana value
124
                                anaValues[ana_type] = (anaValues[ana_type]+" "+ana_value.trim()).trim()
125
                        }
126

    
127
                        String value = anaValues[ana_type]
128

    
129
                        writer.writeStartElement("txm:ana")
130
                        writer.writeAttribute("type", "#" + ana_type)
131
                        writer.writeAttribute("resp", ana_resp) // change
132
                        writer.writeCharacters(value)
133
                        writer.writeEndElement()
134
                        
135
                        anaValues.remove(ana_type)
136
                        
137
                        inAna = false
138
                        ana_type = null
139
                        ana_resp = null
140
                        ana_value = null
141

    
142
                        // write ana later
143
                } else {
144
                        super.processEndElement()
145
                }
146
        }
147
}