Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macroprototypes / urs / cqp / MileStoneInserter.groovy @ 2087

History | View | Annotate | Download (2.4 kB)

1
package org.txm.macroprototypes.urs.cqp
2

    
3
import java.io.IOException
4

    
5
import javax.xml.stream.XMLStreamException
6

    
7
import org.txm.importer.StaxIdentityParser
8
import org.txm.macro.urs.AnalecUtils
9
import org.txm.searchengine.cqp.CQPSearchEngine
10
import org.txm.searchengine.cqp.corpus.MainCorpus
11
import visuAnalec.elements.Unite
12

    
13
public class MileStoneInserter extends StaxIdentityParser {
14
        
15
        List<Unite> units
16
        String[] ids
17
        File inputFile
18
        def id2Units = [:]
19
        
20
        public MileStoneInserter(MainCorpus corpus, File inputFile, List<Unite> units) {
21
                super(inputFile);
22
                this.inputFile = inputFile;
23
                
24
                this.units = units;
25
                this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
26
                
27
                int[] positions = new int[units.size()];
28
                for( int i = 0 ; i < units.size() ; i++) {
29
                        positions[i] = units.get(i).getDeb();
30
                }
31
                
32
                ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
33
                
34
                for (int i = 0 ; i < ids.length ; i++) {
35
                        String id = ids[i]
36
                        if (id != null) {
37
                                if (!id2Units.containsKey(id)) id2Units[id] = []
38
                                
39
                                id2Units[id] << units[i]
40
                        }
41
                }
42
        }
43
        
44
        boolean start = false;
45
        String word_id = null;
46
        @Override
47
        protected void processStartElement() throws XMLStreamException, IOException {
48
                
49
                if ("text".equals(localname)) {
50
                        start = true;
51
                } else if ("w".equals(localname) && start) {
52
                        word_id = getParserAttributeValue("id");
53
                        if (word_id == null) {
54
                                println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile
55
                        } else {
56
                                writeAllUnits(word_id, "before")
57
                        }
58
                }
59
                
60
                super.processStartElement();
61
        }
62
        
63
        protected void writeAllUnits(String id, String position) {
64
                
65
                def units = id2Units[id]
66
                if (units == null) return; // no units to write
67
                
68
                for (Unite currentUnit : units) {
69
                        
70
                        if (!position.equals(currentUnit.getProp("position"))) return;
71
                                
72
                        writer.writeStartElement(currentUnit.getProp("type"));
73
                        HashMap props = currentUnit.getProps();
74
                        for (String p : props.keySet()) {
75
                                if (p.equals("type")) continue; // ignore the type since written in tag name
76
                                writer.writeAttribute(p, ""+props.get(p));
77
                        }
78
                        writer.writeEndElement();
79
                }
80
        }
81
        
82
        @Override
83
        protected void processEndElement() throws XMLStreamException {
84
                super.processEndElement();
85
        
86
                if ("w".equals(localname) && start && word_id != null) {
87
                        writeAllUnits(word_id, "after")
88
                }
89
                
90
                if ("w".equals(localname)) {
91
                        word_id = null;
92
                }
93
        }
94
}