Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / SAttributesListener.groovy @ 1688

History | View | Annotate | Download (6.4 kB)

1 1000 mdecorde
package org.txm.scripts.importer
2 881 mdecorde
3 881 mdecorde
import java.util.ArrayList;
4 881 mdecorde
import java.util.HashMap;
5 881 mdecorde
6 881 mdecorde
import javax.xml.stream.*;
7 881 mdecorde
8 1370 mdecorde
import org.txm.utils.io.IOUtils
9 1370 mdecorde
10 881 mdecorde
/**
11 881 mdecorde
 * Read an XML file and find out : XML elements, their attributes and recursivity level
12 881 mdecorde
 * names are lowercases
13 881 mdecorde
 *
14 881 mdecorde
 * @author mdecorde
15 881 mdecorde
 *
16 881 mdecorde
 */
17 881 mdecorde
class SAttributesListener {
18 881 mdecorde
19 881 mdecorde
        private static HashMap<String,ArrayList<String>> structs = new HashMap<String, ArrayList<String>>();
20 881 mdecorde
        private static HashMap<String, Integer> structsCountProf = new HashMap<String, Integer>();
21 881 mdecorde
        private static HashMap<String, Integer> structsMaxProf = new HashMap<String, Integer>();
22 881 mdecorde
        private static String structPath = "/";
23 881 mdecorde
        private XMLStreamReader parser;
24 881 mdecorde
25 881 mdecorde
        SAttributesListener() {
26 881 mdecorde
                structs = new HashMap<String, ArrayList<String>>();
27 881 mdecorde
                structsCountProf = new HashMap<String, Integer>();
28 881 mdecorde
                structsMaxProf = new HashMap<String, Integer>();
29 881 mdecorde
                structPath = "/";
30 881 mdecorde
        }
31 881 mdecorde
32 881 mdecorde
        SAttributesListener(XMLStreamReader parser) {
33 881 mdecorde
                this();
34 881 mdecorde
                this.parser = parser;
35 881 mdecorde
        }
36 881 mdecorde
37 881 mdecorde
        public void appendResultsTo(SAttributesListener another) {
38 881 mdecorde
                structs = another.structs;
39 881 mdecorde
                structsCountProf = another.structsCountProf;
40 881 mdecorde
                structsMaxProf = another.structsMaxProf;
41 881 mdecorde
        }
42 881 mdecorde
43 881 mdecorde
        public void start(def parser) {
44 881 mdecorde
                this.parser = parser;
45 881 mdecorde
        }
46 881 mdecorde
47 881 mdecorde
        def W = "w";
48 881 mdecorde
        def ANA = "ana";
49 881 mdecorde
        def FORM = "form";
50 881 mdecorde
        /**
51 881 mdecorde
         * Call this method for each START_ELEMENT stax event
52 881 mdecorde
         * @param localname the element localname
53 881 mdecorde
         */
54 881 mdecorde
        public void startElement(String localname) {
55 881 mdecorde
                localname = localname.toLowerCase();
56 881 mdecorde
57 881 mdecorde
                //String localname = parser.getLocalName();
58 881 mdecorde
                if(localname.equals(W)) return;
59 881 mdecorde
                if(localname.equals(ANA)) return;
60 881 mdecorde
                if(localname.equals(FORM)) return;
61 881 mdecorde
62 881 mdecorde
                structPath += localname+"/"
63 881 mdecorde
                //println "add: "+structPath
64 881 mdecorde
                def attrs = structs.get(localname)
65 881 mdecorde
                if (!structs.containsKey(localname)) {
66 881 mdecorde
                        attrs = new HashSet();
67 881 mdecorde
                        structs.put(localname, attrs);
68 881 mdecorde
                        structsCountProf.put(localname, 0)
69 881 mdecorde
                        structsMaxProf.put(localname, 0)
70 881 mdecorde
                } //else {
71 881 mdecorde
72 881 mdecorde
                // get structure recursion
73 881 mdecorde
                int prof = structsCountProf.get(localname)+1
74 881 mdecorde
                structsCountProf.put(localname, prof)
75 881 mdecorde
                if (structsMaxProf.get(localname) < prof )
76 881 mdecorde
                        structsMaxProf.put(localname, prof)
77 881 mdecorde
78 881 mdecorde
                // get the structure attributes
79 881 mdecorde
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
80 881 mdecorde
                        attrs << parser.getAttributeLocalName(i).toLowerCase();
81 881 mdecorde
                }
82 881 mdecorde
        }
83 881 mdecorde
84 881 mdecorde
        /**
85 881 mdecorde
         * Call this method for each END_ELEMENT stax event
86 881 mdecorde
         * @param localname the element localname
87 881 mdecorde
         */
88 881 mdecorde
        public void endElement(String localname) {
89 881 mdecorde
                localname = localname.toLowerCase();
90 881 mdecorde
                //String localname = parser.getLocalName();
91 881 mdecorde
                if(localname.equals(W)) return;
92 881 mdecorde
                if(localname.equals(ANA)) return;
93 881 mdecorde
                if(localname.equals(FORM)) return;
94 881 mdecorde
95 881 mdecorde
                if (structPath.length() > 1) {
96 881 mdecorde
                        int idx = structPath.lastIndexOf("/");
97 881 mdecorde
                        if (idx > 0) {
98 881 mdecorde
                                structPath = structPath.substring(0, idx)
99 881 mdecorde
                                //println "end of $localname "+(structsCountProf.get(localname))
100 881 mdecorde
                                //if (structsCountProf.get(localname) != null)
101 881 mdecorde
                                structsCountProf.put(localname, structsCountProf.get(localname)-1)
102 881 mdecorde
                        }
103 881 mdecorde
                        //println "pop: "+structPath
104 881 mdecorde
                }
105 881 mdecorde
        }
106 881 mdecorde
107 881 mdecorde
        boolean firstGetStructs = true;
108 881 mdecorde
        public HashMap<String,ArrayList<String>> getStructs() {
109 881 mdecorde
                if (structsCountProf.get("div") > 0)
110 881 mdecorde
                        structs.remove("div1")
111 881 mdecorde
                if (structsCountProf.get("div") > 1)
112 881 mdecorde
                        structs.remove("div2")
113 881 mdecorde
                if (structsCountProf.get("div") > 2)
114 881 mdecorde
                        structs.remove("div3")
115 881 mdecorde
                if (structsCountProf.get("div") > 3)
116 881 mdecorde
                        structs.remove("div4")
117 881 mdecorde
                if (structsCountProf.get("div") > 4)
118 881 mdecorde
                        structs.remove("div5")
119 881 mdecorde
                if (structsCountProf.get("div") > 5)
120 881 mdecorde
                        structs.remove("div6")
121 881 mdecorde
122 881 mdecorde
                if (firstGetStructs) {
123 881 mdecorde
                        firstGetStructs = false;
124 881 mdecorde
                        def keys = []
125 881 mdecorde
                        keys.addAll(structs.keySet());
126 881 mdecorde
                        for( String key : keys) {
127 881 mdecorde
                                def value = structs.get(key);
128 881 mdecorde
                                structs.remove(key)
129 881 mdecorde
                                structs.put(key.toLowerCase(), value);
130 881 mdecorde
                        }
131 881 mdecorde
                }
132 881 mdecorde
133 881 mdecorde
                return structs;
134 881 mdecorde
        }
135 881 mdecorde
136 881 mdecorde
        boolean firstGetstructsCountProf = true;
137 881 mdecorde
        public HashMap<String, Integer> getProfs() {
138 881 mdecorde
139 881 mdecorde
                //                if (firstGetstructsCountProf) {
140 881 mdecorde
                //                        firstGetstructsCountProf = false;
141 881 mdecorde
                //                        def keys = []
142 881 mdecorde
                //                        keys.addAll(structsCountProf.keySet());
143 881 mdecorde
                //                        for( String key : keys) {
144 881 mdecorde
                //                                def value = structsCountProf.get(key);
145 881 mdecorde
                //                                structsCountProf.remove(key)
146 881 mdecorde
                //                                structsCountProf.put(key.toLowerCase(), value);
147 881 mdecorde
                //                        }
148 881 mdecorde
                //                }
149 881 mdecorde
                HashMap<String, Integer> clone = new HashMap<String, Integer>();
150 881 mdecorde
                for (String key : structsMaxProf.keySet()) {
151 881 mdecorde
                        if (structsMaxProf.get(key) > 0)
152 881 mdecorde
                                clone.put(key, structsMaxProf.get(key)-1)
153 881 mdecorde
                        else
154 881 mdecorde
                                clone.put(key, 0)
155 881 mdecorde
                }
156 881 mdecorde
                return clone;
157 881 mdecorde
        }
158 881 mdecorde
159 881 mdecorde
        public static SAttributesListener scanFile(File xmlFile) {
160 881 mdecorde
                return scanFile(xmlFile, null)
161 881 mdecorde
        }
162 881 mdecorde
163 881 mdecorde
        public void setParser(def parser) {
164 881 mdecorde
                this.parser = parser;
165 881 mdecorde
        }
166 881 mdecorde
167 881 mdecorde
        /**
168 881 mdecorde
         * Merge results in the parentListener
169 881 mdecorde
         *
170 881 mdecorde
         * @param xmlFile
171 881 mdecorde
         * @param parentListener results are appended to the parentListener if any
172 881 mdecorde
         * @return
173 881 mdecorde
         */
174 881 mdecorde
        public static SAttributesListener scanFile(File xmlFile, SAttributesListener parentListener) {
175 881 mdecorde
176 881 mdecorde
                def start = false;
177 881 mdecorde
                def inputData = xmlFile.toURI().toURL().openStream();
178 881 mdecorde
                def factory = XMLInputFactory.newInstance();
179 881 mdecorde
                def parser = factory.createXMLStreamReader(inputData);
180 881 mdecorde
181 881 mdecorde
                SAttributesListener listener;
182 881 mdecorde
                if (parentListener != null) {
183 881 mdecorde
                        listener = parentListener;
184 881 mdecorde
                        listener.setParser(parser)
185 881 mdecorde
                } else {
186 881 mdecorde
                        listener = new SAttributesListener(parser);
187 881 mdecorde
                }
188 881 mdecorde
                String TEXT = "text";
189 881 mdecorde
                //HashSet<String> types = new HashSet<String>();
190 881 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
191 881 mdecorde
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
192 881 mdecorde
                                if (TEXT.equals(parser.getLocalName())) start = true;
193 881 mdecorde
                                if (start) listener.startElement(parser.getLocalName())
194 881 mdecorde
                        } else if (event == XMLStreamConstants.END_ELEMENT) { // end elem
195 881 mdecorde
                                if (start) listener.endElement(parser.getLocalName())
196 881 mdecorde
                                if (TEXT.equals(parser.getLocalName())) start = false;
197 881 mdecorde
                        }
198 881 mdecorde
                }
199 1688 mdecorde
                if (parser != null) parser.close();
200 1688 mdecorde
                if (inputData != null) inputData.close();
201 881 mdecorde
202 881 mdecorde
                return listener
203 881 mdecorde
        }
204 881 mdecorde
205 881 mdecorde
        public static SAttributesListener scanFiles(File xmlDirectory, String wordTag) {
206 881 mdecorde
                SAttributesListener listener = new SAttributesListener()
207 881 mdecorde
                listener.W = wordTag
208 1615 mdecorde
                for (File xmlFile : xmlDirectory.listFiles(IOUtils.HIDDENFILE_FILTER)) {
209 881 mdecorde
                        if (xmlFile.isFile() && !xmlFile.isHidden() && xmlFile.getName().toLowerCase().endsWith(".xml")) {
210 881 mdecorde
                                scanFile(xmlFile, listener); // results saved in 'listener' data
211 881 mdecorde
//                                println "LISTENER RESULT with ${xmlFile.getName()}: "+listener
212 881 mdecorde
//                                println " prof: "+listener.getStructs()
213 881 mdecorde
//                                println " prof: "+listener.getProfs()
214 881 mdecorde
//                                println " path: "+listener.structPath
215 881 mdecorde
                        }
216 881 mdecorde
                }
217 881 mdecorde
218 881 mdecorde
                return listener;
219 881 mdecorde
        }
220 881 mdecorde
}