Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / SAttributesListener.groovy @ 1688

History | View | Annotate | Download (6.4 kB)

1
package org.txm.scripts.importer
2

    
3
import java.util.ArrayList;
4
import java.util.HashMap;
5

    
6
import javax.xml.stream.*;
7

    
8
import org.txm.utils.io.IOUtils
9

    
10
/**
11
 * Read an XML file and find out : XML elements, their attributes and recursivity level
12
 * names are lowercases
13
 * 
14
 * @author mdecorde
15
 *
16
 */
17
class SAttributesListener {
18

    
19
        private static HashMap<String,ArrayList<String>> structs = new HashMap<String, ArrayList<String>>();
20
        private static HashMap<String, Integer> structsCountProf = new HashMap<String, Integer>();
21
        private static HashMap<String, Integer> structsMaxProf = new HashMap<String, Integer>();
22
        private static String structPath = "/";
23
        private XMLStreamReader parser;
24

    
25
        SAttributesListener() {
26
                structs = new HashMap<String, ArrayList<String>>();
27
                structsCountProf = new HashMap<String, Integer>();
28
                structsMaxProf = new HashMap<String, Integer>();
29
                structPath = "/";
30
        }
31
        
32
        SAttributesListener(XMLStreamReader parser) {
33
                this();
34
                this.parser = parser;
35
        }
36
        
37
        public void appendResultsTo(SAttributesListener another) {
38
                structs = another.structs;
39
                structsCountProf = another.structsCountProf;
40
                structsMaxProf = another.structsMaxProf;
41
        }
42

    
43
        public void start(def parser) {
44
                this.parser = parser;
45
        }
46

    
47
        def W = "w";
48
        def ANA = "ana";
49
        def FORM = "form";
50
        /**
51
         * Call this method for each START_ELEMENT stax event
52
         * @param localname the element localname
53
         */
54
        public void startElement(String localname) {
55
                localname = localname.toLowerCase();
56

    
57
                //String localname = parser.getLocalName();
58
                if(localname.equals(W)) return;
59
                if(localname.equals(ANA)) return;
60
                if(localname.equals(FORM)) return;
61

    
62
                structPath += localname+"/"
63
                //println "add: "+structPath
64
                def attrs = structs.get(localname)
65
                if (!structs.containsKey(localname)) {
66
                        attrs = new HashSet();
67
                        structs.put(localname, attrs);
68
                        structsCountProf.put(localname, 0)
69
                        structsMaxProf.put(localname, 0)
70
                } //else {
71
                
72
                // get structure recursion
73
                int prof = structsCountProf.get(localname)+1
74
                structsCountProf.put(localname, prof)
75
                if (structsMaxProf.get(localname) < prof )
76
                        structsMaxProf.put(localname, prof)
77
                
78
                // get the structure attributes
79
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
80
                        attrs << parser.getAttributeLocalName(i).toLowerCase();
81
                }
82
        }
83

    
84
        /**
85
         * Call this method for each END_ELEMENT stax event
86
         * @param localname the element localname
87
         */
88
        public void endElement(String localname) {
89
                localname = localname.toLowerCase();
90
                //String localname = parser.getLocalName();
91
                if(localname.equals(W)) return;
92
                if(localname.equals(ANA)) return;
93
                if(localname.equals(FORM)) return;
94

    
95
                if (structPath.length() > 1) {
96
                        int idx = structPath.lastIndexOf("/");
97
                        if (idx > 0) {
98
                                structPath = structPath.substring(0, idx)
99
                                //println "end of $localname "+(structsCountProf.get(localname))
100
                                //if (structsCountProf.get(localname) != null)
101
                                structsCountProf.put(localname, structsCountProf.get(localname)-1)
102
                        }
103
                        //println "pop: "+structPath
104
                }
105
        }
106

    
107
        boolean firstGetStructs = true;
108
        public HashMap<String,ArrayList<String>> getStructs() {
109
                if (structsCountProf.get("div") > 0)
110
                        structs.remove("div1")
111
                if (structsCountProf.get("div") > 1)
112
                        structs.remove("div2")
113
                if (structsCountProf.get("div") > 2)
114
                        structs.remove("div3")
115
                if (structsCountProf.get("div") > 3)
116
                        structs.remove("div4")
117
                if (structsCountProf.get("div") > 4)
118
                        structs.remove("div5")
119
                if (structsCountProf.get("div") > 5)
120
                        structs.remove("div6")
121

    
122
                if (firstGetStructs) {
123
                        firstGetStructs = false;
124
                        def keys = []
125
                        keys.addAll(structs.keySet());
126
                        for( String key : keys) {
127
                                def value = structs.get(key);
128
                                structs.remove(key)
129
                                structs.put(key.toLowerCase(), value);
130
                        }
131
                }
132

    
133
                return structs;
134
        }
135

    
136
        boolean firstGetstructsCountProf = true;
137
        public HashMap<String, Integer> getProfs() {
138

    
139
                //                if (firstGetstructsCountProf) {
140
                //                        firstGetstructsCountProf = false;
141
                //                        def keys = []
142
                //                        keys.addAll(structsCountProf.keySet());
143
                //                        for( String key : keys) {
144
                //                                def value = structsCountProf.get(key);
145
                //                                structsCountProf.remove(key)
146
                //                                structsCountProf.put(key.toLowerCase(), value);
147
                //                        }
148
                //                }
149
                HashMap<String, Integer> clone = new HashMap<String, Integer>();
150
                for (String key : structsMaxProf.keySet()) {
151
                        if (structsMaxProf.get(key) > 0)
152
                                clone.put(key, structsMaxProf.get(key)-1)
153
                        else
154
                                clone.put(key, 0)
155
                }
156
                return clone;
157
        }
158
        
159
        public static SAttributesListener scanFile(File xmlFile) {
160
                return scanFile(xmlFile, null)
161
        }
162
        
163
        public void setParser(def parser) {
164
                this.parser = parser;
165
        }
166
        
167
        /**
168
         * Merge results in the parentListener
169
         * 
170
         * @param xmlFile
171
         * @param parentListener results are appended to the parentListener if any
172
         * @return
173
         */
174
        public static SAttributesListener scanFile(File xmlFile, SAttributesListener parentListener) {
175
                
176
                def start = false;
177
                def inputData = xmlFile.toURI().toURL().openStream();
178
                def factory = XMLInputFactory.newInstance();
179
                def parser = factory.createXMLStreamReader(inputData);
180
                
181
                SAttributesListener listener;
182
                if (parentListener != null) {
183
                        listener = parentListener;
184
                        listener.setParser(parser)
185
                } else {
186
                        listener = new SAttributesListener(parser);
187
                }
188
                String TEXT = "text";
189
                //HashSet<String> types = new HashSet<String>();
190
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
191
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
192
                                if (TEXT.equals(parser.getLocalName())) start = true;
193
                                if (start) listener.startElement(parser.getLocalName())
194
                        } else if (event == XMLStreamConstants.END_ELEMENT) { // end elem
195
                                if (start) listener.endElement(parser.getLocalName())
196
                                if (TEXT.equals(parser.getLocalName())) start = false;
197
                        }
198
                }
199
                if (parser != null) parser.close();
200
                if (inputData != null) inputData.close();
201
                
202
                return listener
203
        }
204
        
205
        public static SAttributesListener scanFiles(File xmlDirectory, String wordTag) {
206
                SAttributesListener listener = new SAttributesListener()
207
                listener.W = wordTag
208
                for (File xmlFile : xmlDirectory.listFiles(IOUtils.HIDDENFILE_FILTER)) {
209
                        if (xmlFile.isFile() && !xmlFile.isHidden() && xmlFile.getName().toLowerCase().endsWith(".xml")) {
210
                                scanFile(xmlFile, listener); // results saved in 'listener' data
211
//                                println "LISTENER RESULT with ${xmlFile.getName()}: "+listener
212
//                                println " prof: "+listener.getStructs()
213
//                                println " prof: "+listener.getProfs()
214
//                                println " path: "+listener.structPath
215
                        }
216
                }
217
                
218
                return listener;
219
        }
220
}