Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macroprototypes / urs / cqp / UnitsInserter.groovy @ 2087

History | View | Annotate | Download (4.3 kB)

1
package org.txm.macroprototypes.urs.cqp
2

    
3
import java.io.IOException
4

    
5
import javax.xml.stream.XMLStreamException
6

    
7
import org.txm.importer.StaxIdentityParser
8
import org.txm.macro.urs.AnalecUtils
9
import org.txm.searchengine.cqp.CQPSearchEngine
10
import org.txm.searchengine.cqp.corpus.MainCorpus
11
import visuAnalec.elements.Unite
12

    
13
public class UnitsInserter extends StaxIdentityParser {
14

    
15
        List<Unite> units
16
        String[] ids
17
        File inputFile
18
        def open_id2Units = [:]
19
        def close_id2Units = [:]
20
        def writing_units = []
21
        def writing_stacks = []
22
        
23
        def stack = "";
24

    
25
        def writing_start, writing_end;
26
        
27
        def positions2id = [:] // used to relocate end of units
28

    
29
        public UnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String type) {
30
                super(inputFile);
31
                this.inputFile = inputFile;
32
                this.units = units;
33
                
34
                this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: b.getFin() <=> a.getFin() }
35

    
36
                // get words id limits
37
                int[] positions = new int[units.size()];
38
                for( int i = 0 ; i < units.size() ; i++) {
39
                        positions[i] = units.get(i).getDeb();
40
                }
41
                ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
42
                for (int i = 0 ; i < ids.length ; i++) {
43
                        String id = ids[i]
44
                        
45
                        if (id != null) {
46
                                positions2id[positions[i]] = id
47
                                if (!open_id2Units.containsKey(id)) open_id2Units[id] = []
48
                                open_id2Units[id] << units[i]
49
                        }
50
                }
51

    
52
                positions = new int[units.size()];
53
                for( int i = 0 ; i < units.size() ; i++) {
54
                        positions[i] = units.get(i).getFin();
55
                }
56
                ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
57
                for (int i = 0 ; i < ids.length ; i++) {
58
                        String id = ids[i]
59
                        if (id != null) {
60
                                positions2id[positions[i]] = id
61
                                if (!close_id2Units.containsKey(id)) close_id2Units[id] = new HashSet<Unite>()
62
                                close_id2Units[id] << units[i]
63
                        }
64
                }
65
        }
66

    
67
        boolean start = false;
68
        String word_id = null;
69
        @Override
70
        protected void processStartElement() throws XMLStreamException, IOException {
71

    
72
                stack += "/"+localname
73

    
74
                if ("text".equals(localname)) {
75
                        start = true;
76
                } else if ("w".equals(localname) && start) {
77
                        word_id = getParserAttributeValue("id");
78
                        if (word_id == null) {
79
                                println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile
80
                        } else {
81
                                writeOpenUnits()
82
                        }
83
                }
84

    
85
                super.processStartElement();
86
        }
87

    
88
        protected void writeOpenUnits() {
89
                
90
                def toWrite = open_id2Units[word_id]
91
                
92
                if (toWrite != null) {
93
                        for (Unite unite : toWrite) {
94
                                
95
                                for (int i = 0 ; i < writing_units.size() ; i++) {
96
                                        Unite u = writing_units.get(i);
97
                                        if (unite.getFin() > u.getFin()) {
98
                                                // add unite to close_id2Units
99
                                                String id = positions2id[u.getFin()]
100
                                                close_id2Units[id] << unite // close the unite at the same moment
101
                                        }
102
                                }
103
                                
104
                                writeUnit(unite);
105
                        }
106
                }
107
        }
108

    
109
        protected void writeCloseUnits() {
110
                if (word_id != null) {
111
                        def toClose = close_id2Units[word_id]
112
                        if (toClose != null) {
113
                                for (int i = 0 ; i < writing_units.size() ; i++) {
114
                                        Unite u = writing_units.get(i);
115
                                        if (toClose.contains(u)) {
116
                                                writing_stacks.remove(i)
117
                                                writing_units.remove(i)
118
                                                writer.writeEndElement();
119
                                                i--;
120
                                        }
121
                                }
122
                        }
123
                } else {
124
                        for (int i = 0 ; i < writing_stacks.size() ; i++) {
125
                                if (writing_stacks[i].equals(stack)) {
126
                                        writing_stacks.remove(i)
127
                                        writing_units.remove(i)
128
                                        writer.writeEndElement();
129
                                        i--
130
                                }
131
                        }
132
                }
133
        }
134

    
135
        protected void writeUnit(Unite currentUnit) {
136

    
137
                writing_units << currentUnit
138
                writing_stacks << currentUnit
139
                if (currentUnit.getDeb() > writing_start)
140

    
141
                writer.writeStartElement(currentUnit.getProp("type"));
142
                HashMap props = currentUnit.getProps();
143
                for (String p : props.keySet()) {
144
                        if (p.equals("type")) continue; // ignore the type since written in tag name
145
                        writer.writeAttribute(p, ""+props.get(p));
146
                }
147
        }
148

    
149
        @Override
150
        protected void processEndElement() throws XMLStreamException {
151
                
152
//                println "writing_stacks=$writing_stacks"
153
//                println "stack=$stack"
154
                
155
                if (writing_stacks.size() > 0 && writing_stacks[-1].equals(stack)) {
156
                        writeCloseUnits()
157
                }
158

    
159
                super.processEndElement();
160
                
161
                stack = stack.substring(0, stack.length() - localname.length() - 1);
162
                
163
                if ("w".equals(localname)) {
164
                        if (start && word_id != null) {
165
                                writeCloseUnits()
166
                        }
167
                        word_id = null;
168
                }
169
        }
170
}