Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xmltxmpara / ValidateXmlTXM.groovy @ 1688

History | View | Annotate | Download (5.6 kB)

1
package org.txm.scripts.importer.xmltxmpara
2

    
3
import javax.xml.stream.*;
4
import java.net.URL;
5

    
6
class ValidateXmlTXM {
7
        
8
        public static String XMLNS = "http://www.w3.org/XML/1998/namespace";
9
        public static String TXMNS = "http://textometrie.org/1.0";
10
        public static String TEINS = "http://www.tei-c.org/ns/1.0";
11
        
12

    
13
        boolean checkteiCorpus = false;
14
        boolean checkTEI = true;
15
        boolean checkAlignStruct = false;
16
        boolean checkW = true;
17

    
18
        boolean hasTeiCorpus = false;
19
        boolean hasTeiCorpusID = false;
20
        boolean hasTeiCorpusHeader = false;
21
        boolean hasTeiCorpusHeaderAppDesc = false;
22
        boolean hasTeiCorpusHeaderVersion = false;
23
        int hasTei = 0;
24
        int hasTeiID = 0;
25
        boolean hasAlignStruct = false
26
        String alignStruct = "";
27
        boolean hasTXMW = false;
28

    
29
        public boolean validate(File sourceDir)
30
        {
31
                boolean ret = true;
32
                //get infos from import.xml if any
33
                File alignxml = sourceDir.listFiles().find{it.getName() == "align.xml"}
34
                if(alignxml != null)
35
                {
36
                        infosFromImportXML(alignxml)
37
                        checkteiCorpus = true;
38
                        // checkAlignStruct = true;
39
                        // mainLang = ...
40
                        // corpusnames = ...
41
                        // alignStructs = 
42
                }
43
                
44
                sourceDir.eachFileMatch(~/.+.xml/){f->
45

    
46
                        if (f.getName() != "import.xml" && f.getName() != "align.xml" && f.isFile())
47
                                ret = ret & validateXMLTXM(f);
48
                }
49

    
50
                return ret;
51
        }
52
        
53
        HashMap<String, ArrayList<String>> links = [:];
54
        protected boolean infosFromImportXML(File alignxml)
55
        {
56
                this.url = xmlfile.toURI().toURL();;
57
                inputData = url.openStream();
58

    
59
                factory = XMLInputFactory.newInstance();
60
                parser = factory.createXMLStreamReader(inputData);
61
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
62
                {
63
                        if(event == XMLStreamConstants.START_ELEMENT)
64
                        {
65
                                if(parser.getLocalName() == "link")
66
                                {
67
                                        links.put(parser.getAttributeValue(null, "target"), 
68
                                                [parser.getAttributeValue(null, "alignElement"), parser.getAttributeValue(null, "alignLevel")])
69
                                        
70
                                }
71
                        }
72
                }
73
                if (parser != null) parser.close();
74
                if (inputData != null) inputData.close();
75
                println "get infos from $alignxml"
76
                return true;
77
        }
78
        
79
        private void printAttributes(def parser)
80
        {
81
                for(int i = 0 ; i < parser.getAttributeCount(); i++)
82
                println( "$path "+parser.getAttributeLocalName(i)+
83
                 " = "+ parser.getAttributeValue(i))
84
        }
85
        
86
        def url;
87
        def inputData;
88
        def factory;
89
        XMLStreamReader parser;
90
        String path = ""
91
        protected boolean validateXMLTXM(File xmlfile)
92
        {
93
                println "validate $xmlfile"
94
                boolean inTeiCorpus = false;
95
                boolean inTeiHeader = false;
96
                boolean inMetadata = false;
97
                boolean inTEI = false;
98
                boolean inW = false;
99
                boolean inForm = false;
100

    
101
                try {
102
                        this.url = xmlfile.toURI().toURL();;
103
                        inputData = url.openStream();
104

    
105
                        factory = XMLInputFactory.newInstance();
106
                        parser = factory.createXMLStreamReader(inputData);
107
                
108
                String localname = ""
109
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
110
                {
111
                        if(event == XMLStreamConstants.START_ELEMENT)
112
                        {
113
                                localname = parser.getLocalName();
114
                                path += "/"+localname
115
                                switch (localname)
116
                                {
117
                                        case "teiCorpus":
118
                                                inTeiCorpus = true;
119
                                                hasTeiCorpus = true;
120
                                                //printAttributes(parser);
121
                                                if(parser.getAttributeValue(XMLNS, "id") != null)
122
                                                        hasTeiCorpusID = true;
123
                                                break;
124
                                        case "teiHeader":
125
                                                inTeiHeader = true;
126
                                                if(inTeiCorpus)
127
                                                        hasTeiCorpusHeader = true;
128
                                                break;
129
                                        case "metadata":
130
                                                inMetadata = true;
131
                                                //printAttributes(parser);
132
                                                if(parser.getAttributeValue(null, "name") == "version")
133
                                                        hasTeiCorpusHeaderVersion = true;
134
                                                break;
135
                                        case "TEI":
136
                                                inTEI = true;
137
                                                hasTei++;
138
                                                if(parser.getAttributeValue(XMLNS, "id") != null)
139
                                                        hasTeiID++;
140
                                                break;
141
                                        case "w":
142
                                                inW = true;
143
                                                String id = parser.getAttributeValue(XMLNS, "id");
144
                                                if(!(id != null && id.matches("w_.+_.+")))
145
                                                        hasTXMW = false;
146
                                                break;
147
                                        case "form":
148
                                                inForm = true;
149
                                                break;
150
                                        case alignStruct:
151
                                        if(parser.getAttributeValue(null, "align") == null)
152
                                                hasAlignStruct = false;
153
                                                break;
154
                                }
155
                        }
156
                        else if(event == XMLStreamConstants.END_ELEMENT)
157
                        {
158
                                localname = parser.getLocalName()
159
                                if(path.lastIndexOf("/") > 0)
160
                                        path = path.substring(0, path.lastIndexOf("/"))
161
                                switch (localname)
162
                                {
163
                                        case "teiCorpus":
164
                                                inTeiCorpus = false;
165
                                                break;
166
                                        case "teiHeader":
167
                                                inTeiHeader = false;
168
                                                break;
169
                                        case "metadata":
170
                                                inMetadata = false;
171
                                                break;
172
                                        case "TEI":
173
                                                inTEI = false;
174
                                                break;
175
                                        case "w":
176
                                                inW = false;
177
                                                break;
178
                                        case "form":
179
                                                inForm = false;
180
                                                break;
181
                                }
182
                        }
183
                }
184
                if (parser != null) parser.close();
185
                        if (inputData != null) inputData.close();
186
                } catch (XMLStreamException ex) {
187
                        System.out.println(ex);
188
                        if (parser != null) parser.close();
189
                        if (inputData != null) inputData.close();
190
                        return false;
191
                }
192
                if(checkteiCorpus &
193
                        !(hasTeiCorpus & hasTeiCorpusID & hasTeiCorpusHeader & hasTeiCorpusHeaderVersion))
194
                {        println "wrong <teiCorpus> format: $hasTeiCorpus & $hasTeiCorpusID & $hasTeiCorpusHeader & $hasTeiCorpusHeaderVersion"
195
                        return false;
196
                }
197
                
198
                if(checkAlignStruct &
199
                        !hasAlignStruct)
200
                {   println "wrong align structure format: $hasAlignStruct"
201
                        if (parser != null) parser.close();
202
                        if (inputData != null) inputData.close();
203
                        return false;
204
                }
205
                
206
                if(checkTEI & !(hasTei == hasTeiID))
207
                {        println "wrong <TEI> format: $hasTei == $hasTeiID"
208
                        if (parser != null) parser.close();
209
                        if (inputData != null) inputData.close();
210
                        return false;
211
                }
212
                
213
                return true;
214
        }
215
        
216
        public static void main(String[] args)
217
        {
218
                File srcdir = new File("/home/mdecorde/xml/xmltxmpara");
219
                def checker = new ValidateXmlTXM();
220
                println checker.validate(srcdir)
221
        }
222
}
223

    
224