Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / SAttributesListener.groovy @ 1000

History | View | Annotate | Download (6.3 kB)

1
package org.txm.scripts.importer
2

    
3
import java.util.ArrayList;
4
import java.util.HashMap;
5

    
6
import javax.xml.stream.*;
7

    
8
/**
9
 * Read an XML file and find out : XML elements, their attributes and recursivity level
10
 * names are lowercases
11
 * 
12
 * @author mdecorde
13
 *
14
 */
15
class SAttributesListener {
16

    
17
        private static HashMap<String,ArrayList<String>> structs = new HashMap<String, ArrayList<String>>();
18
        private static HashMap<String, Integer> structsCountProf = new HashMap<String, Integer>();
19
        private static HashMap<String, Integer> structsMaxProf = new HashMap<String, Integer>();
20
        private static String structPath = "/";
21
        private XMLStreamReader parser;
22

    
23
        SAttributesListener() {
24
                structs = new HashMap<String, ArrayList<String>>();
25
                structsCountProf = new HashMap<String, Integer>();
26
                structsMaxProf = new HashMap<String, Integer>();
27
                structPath = "/";
28
        }
29
        
30
        SAttributesListener(XMLStreamReader parser) {
31
                this();
32
                this.parser = parser;
33
        }
34
        
35
        public void appendResultsTo(SAttributesListener another) {
36
                structs = another.structs;
37
                structsCountProf = another.structsCountProf;
38
                structsMaxProf = another.structsMaxProf;
39
        }
40

    
41
        public void start(def parser) {
42
                this.parser = parser;
43
        }
44

    
45
        def W = "w";
46
        def ANA = "ana";
47
        def FORM = "form";
48
        /**
49
         * Call this method for each START_ELEMENT stax event
50
         * @param localname the element localname
51
         */
52
        public void startElement(String localname) {
53
                localname = localname.toLowerCase();
54

    
55
                //String localname = parser.getLocalName();
56
                if(localname.equals(W)) return;
57
                if(localname.equals(ANA)) return;
58
                if(localname.equals(FORM)) return;
59

    
60
                structPath += localname+"/"
61
                //println "add: "+structPath
62
                def attrs = structs.get(localname)
63
                if (!structs.containsKey(localname)) {
64
                        attrs = new HashSet();
65
                        structs.put(localname, attrs);
66
                        structsCountProf.put(localname, 0)
67
                        structsMaxProf.put(localname, 0)
68
                } //else {
69
                
70
                // get structure recursion
71
                int prof = structsCountProf.get(localname)+1
72
                structsCountProf.put(localname, prof)
73
                if (structsMaxProf.get(localname) < prof )
74
                        structsMaxProf.put(localname, prof)
75
                
76
                // get the structure attributes
77
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
78
                        attrs << parser.getAttributeLocalName(i).toLowerCase();
79
                }
80
        }
81

    
82
        /**
83
         * Call this method for each END_ELEMENT stax event
84
         * @param localname the element localname
85
         */
86
        public void endElement(String localname) {
87
                localname = localname.toLowerCase();
88
                //String localname = parser.getLocalName();
89
                if(localname.equals(W)) return;
90
                if(localname.equals(ANA)) return;
91
                if(localname.equals(FORM)) return;
92

    
93
                if (structPath.length() > 1) {
94
                        int idx = structPath.lastIndexOf("/");
95
                        if (idx > 0) {
96
                                structPath = structPath.substring(0, idx)
97
                                //println "end of $localname "+(structsCountProf.get(localname))
98
                                //if (structsCountProf.get(localname) != null)
99
                                structsCountProf.put(localname, structsCountProf.get(localname)-1)
100
                        }
101
                        //println "pop: "+structPath
102
                }
103
        }
104

    
105
        boolean firstGetStructs = true;
106
        public HashMap<String,ArrayList<String>> getStructs() {
107
                if (structsCountProf.get("div") > 0)
108
                        structs.remove("div1")
109
                if (structsCountProf.get("div") > 1)
110
                        structs.remove("div2")
111
                if (structsCountProf.get("div") > 2)
112
                        structs.remove("div3")
113
                if (structsCountProf.get("div") > 3)
114
                        structs.remove("div4")
115
                if (structsCountProf.get("div") > 4)
116
                        structs.remove("div5")
117
                if (structsCountProf.get("div") > 5)
118
                        structs.remove("div6")
119

    
120
                if (firstGetStructs) {
121
                        firstGetStructs = false;
122
                        def keys = []
123
                        keys.addAll(structs.keySet());
124
                        for( String key : keys) {
125
                                def value = structs.get(key);
126
                                structs.remove(key)
127
                                structs.put(key.toLowerCase(), value);
128
                        }
129
                }
130

    
131
                return structs;
132
        }
133

    
134
        boolean firstGetstructsCountProf = true;
135
        public HashMap<String, Integer> getProfs() {
136

    
137
                //                if (firstGetstructsCountProf) {
138
                //                        firstGetstructsCountProf = false;
139
                //                        def keys = []
140
                //                        keys.addAll(structsCountProf.keySet());
141
                //                        for( String key : keys) {
142
                //                                def value = structsCountProf.get(key);
143
                //                                structsCountProf.remove(key)
144
                //                                structsCountProf.put(key.toLowerCase(), value);
145
                //                        }
146
                //                }
147
                HashMap<String, Integer> clone = new HashMap<String, Integer>();
148
                for (String key : structsMaxProf.keySet()) {
149
                        if (structsMaxProf.get(key) > 0)
150
                                clone.put(key, structsMaxProf.get(key)-1)
151
                        else
152
                                clone.put(key, 0)
153
                }
154
                return clone;
155
        }
156
        
157
        public static SAttributesListener scanFile(File xmlFile) {
158
                return scanFile(xmlFile, null)
159
        }
160
        
161
        public void setParser(def parser) {
162
                this.parser = parser;
163
        }
164
        
165
        /**
166
         * Merge results in the parentListener
167
         * 
168
         * @param xmlFile
169
         * @param parentListener results are appended to the parentListener if any
170
         * @return
171
         */
172
        public static SAttributesListener scanFile(File xmlFile, SAttributesListener parentListener) {
173
                
174
                def start = false;
175
                def inputData = xmlFile.toURI().toURL().openStream();
176
                def factory = XMLInputFactory.newInstance();
177
                def parser = factory.createXMLStreamReader(inputData);
178
                
179
                SAttributesListener listener;
180
                if (parentListener != null) {
181
                        listener = parentListener;
182
                        listener.setParser(parser)
183
                } else {
184
                        listener = new SAttributesListener(parser);
185
                }
186
                String TEXT = "text";
187
                //HashSet<String> types = new HashSet<String>();
188
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
189
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
190
                                if (TEXT.equals(parser.getLocalName())) start = true;
191
                                if (start) listener.startElement(parser.getLocalName())
192
                        } else if (event == XMLStreamConstants.END_ELEMENT) { // end elem
193
                                if (start) listener.endElement(parser.getLocalName())
194
                                if (TEXT.equals(parser.getLocalName())) start = false;
195
                        }
196
                }
197
                parser.close()
198
                
199
                return listener
200
        }
201
        
202
        public static SAttributesListener scanFiles(File xmlDirectory, String wordTag) {
203
                SAttributesListener listener = new SAttributesListener()
204
                listener.W = wordTag
205
                for (File xmlFile : xmlDirectory.listFiles()) {
206
                        if (xmlFile.isFile() && !xmlFile.isHidden() && xmlFile.getName().toLowerCase().endsWith(".xml")) {
207
                                scanFile(xmlFile, listener); // results saved in 'listener' data
208
//                                println "LISTENER RESULT with ${xmlFile.getName()}: "+listener
209
//                                println " prof: "+listener.getStructs()
210
//                                println " prof: "+listener.getProfs()
211
//                                println " path: "+listener.structPath
212
                        }
213
                }
214
                
215
                return listener;
216
        }
217
}