Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / xmltxmpara / ValidateXmlTXM.groovy @ 187

History | View | Annotate | Download (5.4 kB)

1
package org.txm.importer.xmltxmpara
2

    
3
import javax.xml.stream.*;
4
import java.net.URL;
5

    
6
class ValidateXmlTXM {
7
        
8
        public static String XMLNS = "http://www.w3.org/XML/1998/namespace";
9
        public static String TXMNS = "http://textometrie.org/1.0";
10
        public static String TEINS = "http://www.tei-c.org/ns/1.0";
11
        
12

    
13
        boolean checkteiCorpus = false;
14
        boolean checkTEI = true;
15
        boolean checkAlignStruct = false;
16
        boolean checkW = true;
17

    
18
        boolean hasTeiCorpus = false;
19
        boolean hasTeiCorpusID = false;
20
        boolean hasTeiCorpusHeader = false;
21
        boolean hasTeiCorpusHeaderAppDesc = false;
22
        boolean hasTeiCorpusHeaderVersion = false;
23
        int hasTei = 0;
24
        int hasTeiID = 0;
25
        boolean hasAlignStruct = false
26
        String alignStruct = "";
27
        boolean hasTXMW = false;
28

    
29
        public boolean validate(File sourceDir)
30
        {
31
                boolean ret = true;
32
                //get infos from import.xml if any
33
                File alignxml = sourceDir.listFiles().find{it.getName() == "align.xml"}
34
                if(alignxml != null)
35
                {
36
                        infosFromImportXML(alignxml)
37
                        checkteiCorpus = true;
38
                        // checkAlignStruct = true;
39
                        // mainLang = ...
40
                        // corpusnames = ...
41
                        // alignStructs = 
42
                }
43
                
44
                sourceDir.eachFileMatch(~/.+.xml/){f->
45

    
46
                        if (f.getName() != "import.xml" && f.getName() != "align.xml" && f.isFile())
47
                                ret = ret & validateXMLTXM(f);
48
                }
49

    
50
                return ret;
51
        }
52
        
53
        HashMap<String, ArrayList<String>> links = [:];
54
        protected boolean infosFromImportXML(File alignxml)
55
        {
56
                this.url = xmlfile.toURI().toURL();;
57
                inputData = url.openStream();
58

    
59
                factory = XMLInputFactory.newInstance();
60
                parser = factory.createXMLStreamReader(inputData);
61
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
62
                {
63
                        if(event == XMLStreamConstants.START_ELEMENT)
64
                        {
65
                                if(parser.getLocalName() == "link")
66
                                {
67
                                        links.put(parser.getAttributeValue(null, "target"), 
68
                                                [parser.getAttributeValue(null, "alignElement"), parser.getAttributeValue(null, "alignLevel")])
69
                                        
70
                                }
71
                        }
72
                }
73
                parser.close();
74
                println "get infos from $alignxml"
75
                return true;
76
        }
77
        
78
        private void printAttributes(def parser)
79
        {
80
                for(int i = 0 ; i < parser.getAttributeCount(); i++)
81
                println( "$path "+parser.getAttributeLocalName(i)+
82
                 " = "+ parser.getAttributeValue(i))
83
        }
84
        
85
        def url;
86
        def inputData;
87
        def factory;
88
        XMLStreamReader parser;
89
        String path = ""
90
        protected boolean validateXMLTXM(File xmlfile)
91
        {
92
                println "validate $xmlfile"
93
                boolean inTeiCorpus = false;
94
                boolean inTeiHeader = false;
95
                boolean inMetadata = false;
96
                boolean inTEI = false;
97
                boolean inW = false;
98
                boolean inForm = false;
99

    
100
                try {
101
                        this.url = xmlfile.toURI().toURL();;
102
                        inputData = url.openStream();
103

    
104
                        factory = XMLInputFactory.newInstance();
105
                        parser = factory.createXMLStreamReader(inputData);
106
                } catch (XMLStreamException ex) {
107
                        System.out.println(ex);
108
                        return false;
109
                }catch (IOException ex) {
110
                        System.err.println("IOException while parsing ");
111
                        return false
112
                }
113
                String localname = ""
114
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
115
                {
116
                        if(event == XMLStreamConstants.START_ELEMENT)
117
                        {
118
                                localname = parser.getLocalName();
119
                                path += "/"+localname
120
                                switch (localname)
121
                                {
122
                                        case "teiCorpus":
123
                                                inTeiCorpus = true;
124
                                                hasTeiCorpus = true;
125
                                                //printAttributes(parser);
126
                                                if(parser.getAttributeValue(XMLNS, "id") != null)
127
                                                        hasTeiCorpusID = true;
128
                                                break;
129
                                        case "teiHeader":
130
                                                inTeiHeader = true;
131
                                                if(inTeiCorpus)
132
                                                        hasTeiCorpusHeader = true;
133
                                                break;
134
                                        case "metadata":
135
                                                inMetadata = true;
136
                                                //printAttributes(parser);
137
                                                if(parser.getAttributeValue(null, "name") == "version")
138
                                                        hasTeiCorpusHeaderVersion = true;
139
                                                break;
140
                                        case "TEI":
141
                                                inTEI = true;
142
                                                hasTei++;
143
                                                if(parser.getAttributeValue(XMLNS, "id") != null)
144
                                                        hasTeiID++;
145
                                                break;
146
                                        case "w":
147
                                                inW = true;
148
                                                String id = parser.getAttributeValue(XMLNS, "id");
149
                                                if(!(id != null && id.matches("w_.+_.+")))
150
                                                        hasTXMW = false;
151
                                                break;
152
                                        case "form":
153
                                                inForm = true;
154
                                                break;
155
                                        case alignStruct:
156
                                        if(parser.getAttributeValue(null, "align") == null)
157
                                                hasAlignStruct = false;
158
                                                break;
159
                                }
160
                        }
161
                        else if(event == XMLStreamConstants.END_ELEMENT)
162
                        {
163
                                localname = parser.getLocalName()
164
                                if(path.lastIndexOf("/") > 0)
165
                                        path = path.substring(0, path.lastIndexOf("/"))
166
                                switch (localname)
167
                                {
168
                                        case "teiCorpus":
169
                                                inTeiCorpus = false;
170
                                                break;
171
                                        case "teiHeader":
172
                                                inTeiHeader = false;
173
                                                break;
174
                                        case "metadata":
175
                                                inMetadata = false;
176
                                                break;
177
                                        case "TEI":
178
                                                inTEI = false;
179
                                                break;
180
                                        case "w":
181
                                                inW = false;
182
                                                break;
183
                                        case "form":
184
                                                inForm = false;
185
                                                break;
186
                                }
187
                        }
188
                }
189
                parser.close();
190
                
191
                if(checkteiCorpus &
192
                        !(hasTeiCorpus & hasTeiCorpusID & hasTeiCorpusHeader & hasTeiCorpusHeaderVersion))
193
                {        println "wrong <teiCorpus> format: $hasTeiCorpus & $hasTeiCorpusID & $hasTeiCorpusHeader & $hasTeiCorpusHeaderVersion"
194
                        return false;
195
                }
196
                
197
                if(checkAlignStruct &
198
                        !hasAlignStruct)
199
                {   println "wrong align structure format: $hasAlignStruct"
200
                        return false;
201
                }
202
                
203
                if(checkTEI & !(hasTei == hasTeiID))
204
                {        println "wrong <TEI> format: $hasTei == $hasTeiID"
205
                        return false;
206
                }
207
                
208
                return true;
209
        }
210
        
211
        public static void main(String[] args)
212
        {
213
                File srcdir = new File("/home/mdecorde/xml/xmltxmpara");
214
                def checker = new ValidateXmlTXM();
215
                println checker.validate(srcdir)
216
        }
217
}
218

    
219