Révision 2
projets/CMC2ELAN/.settings/org.eclipse.jdt.core.prefs (revision 2) | ||
---|---|---|
1 |
eclipse.preferences.version=1 |
|
2 |
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled |
|
3 |
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 |
|
4 |
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve |
|
5 |
org.eclipse.jdt.core.compiler.compliance=1.6 |
|
6 |
org.eclipse.jdt.core.compiler.debug.lineNumber=generate |
|
7 |
org.eclipse.jdt.core.compiler.debug.localVariable=generate |
|
8 |
org.eclipse.jdt.core.compiler.debug.sourceFile=generate |
|
9 |
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error |
|
10 |
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error |
|
11 |
org.eclipse.jdt.core.compiler.source=1.6 |
projets/CMC2ELAN/.classpath (revision 2) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<classpath> |
|
3 |
<classpathentry kind="src" path="src"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> |
|
5 |
<classpathentry exported="true" kind="con" path="GROOVY_SUPPORT"/> |
|
6 |
<classpathentry exported="true" kind="con" path="GROOVY_DSL_SUPPORT"/> |
|
7 |
<classpathentry kind="lib" path="saxon9he.jar"/> |
|
8 |
<classpathentry kind="output" path="bin"/> |
|
9 |
</classpath> |
projets/CMC2ELAN/.project (revision 2) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>CMC2ELAN</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
</buildSpec> |
|
14 |
<natures> |
|
15 |
<nature>org.eclipse.jdt.groovy.core.groovyNature</nature> |
|
16 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
17 |
</natures> |
|
18 |
</projectDescription> |
projets/CMC2ELAN/src/FixMissingTimings.groovy (revision 2) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
|
|
22 |
import java.text.SimpleDateFormat; |
|
23 |
import java.util.Date; |
|
24 |
|
|
25 |
class FixMissingTimings { |
|
26 |
|
|
27 |
File trsInFile = new File("/home/mdecorde/xml/transana/out.xml") |
|
28 |
File trsOutFile = new File("/home/mdecorde/xml/transana/TrP1S8_08102010.trs") |
|
29 |
|
|
30 |
public FixMissingTimings(File trsInFile, File trsOutFile) { |
|
31 |
this.trsInFile = trsInFile; |
|
32 |
this.trsOutFile = trsOutFile; |
|
33 |
} |
|
34 |
|
|
35 |
public boolean process() { |
|
36 |
def slurper = new XmlParser(); |
|
37 |
def trs = slurper.parse(trsInFile) |
|
38 |
|
|
39 |
String currentEnd = null; |
|
40 |
for (def turn : trs.Episode.Section.Turn) { |
|
41 |
def children = turn.children() |
|
42 |
|
|
43 |
if (children.size() > 0) { |
|
44 |
//println "fixing turn start="+turn.@startTime+" end="+turn.@endTime+" previous end="+currentEnd |
|
45 |
if (currentEnd != null && currentEnd.length() > 0 && turn.@startTime.length() == 0) { // using last Sync |
|
46 |
//println "fixing with previous Turn $currentEnd" |
|
47 |
turn.@startTime = currentEnd |
|
48 |
setStartTime(children[0], currentEnd) |
|
49 |
} else if (!(children[0] instanceof String)) { // no previous Sync, using next Sync |
|
50 |
def start = getStartTime(children[0]) |
|
51 |
if (start != null && start.length() > 0) turn.@startTime = start |
|
52 |
} |
|
53 |
currentEnd = null |
|
54 |
|
|
55 |
if (!(children[children.size() - 1] instanceof String)) { // this is the last Sync |
|
56 |
def end = getStartTime(children[children.size() - 1]) |
|
57 |
if (end != null && end.length() > 0) { |
|
58 |
turn.@endTime = end |
|
59 |
currentEnd = end; |
|
60 |
} |
|
61 |
} |
|
62 |
} |
|
63 |
} |
|
64 |
|
|
65 |
def nodes = [] |
|
66 |
// get Sync nodes to fix |
|
67 |
for (def section : trs.Episode.Section) { // all Section |
|
68 |
for (def child : section.children()) { // all Section children |
|
69 |
if (!(child instanceof String)) { // is a Tag |
|
70 |
switch(child.name()) { |
|
71 |
case "Turn": // get Sync in Turn |
|
72 |
//nodes << child; |
|
73 |
for (def sync : child.Sync) |
|
74 |
nodes << sync |
|
75 |
break; |
|
76 |
case "Sync": |
|
77 |
nodes << child; |
|
78 |
break; |
|
79 |
default: break; |
|
80 |
} |
|
81 |
} |
|
82 |
} |
|
83 |
} |
|
84 |
|
|
85 |
//Fixing Sync@time |
|
86 |
for (int i = 0 ; i < nodes.size() ; i++) { |
|
87 |
def node = nodes[i] |
|
88 |
String time = getStartTime(node) |
|
89 |
|
|
90 |
if (time.length() == 0) { |
|
91 |
def list = []; |
|
92 |
String previous; |
|
93 |
if (i > 0) { previous = getStartTime(nodes[i-1]); |
|
94 |
} else { previous = "0.0" } |
|
95 |
|
|
96 |
String next = null; |
|
97 |
while ((next == null || next.length() == 0) && i <= nodes.size()) { |
|
98 |
list << node |
|
99 |
node = nodes[++i] |
|
100 |
next = getStartTime(node) |
|
101 |
} |
|
102 |
if (next != null) { |
|
103 |
if (node != null) list << node |
|
104 |
fixSyncTimes(list, previous, next) |
|
105 |
} else { |
|
106 |
println "ERROR: no end time in the transcription" |
|
107 |
} |
|
108 |
} |
|
109 |
} |
|
110 |
|
|
111 |
// fixing startTime of Turn using next Sync.@time |
|
112 |
def turns = trs.Episode.Section.Turn |
|
113 |
for (int i = 0 ; i < turns.size() ; i++) { |
|
114 |
def turn = turns[i] |
|
115 |
def syncs = turn.Sync |
|
116 |
if (syncs.size() > 0) { |
|
117 |
turn.@startTime = syncs[0].@time |
|
118 |
} else { |
|
119 |
println "Error: Turn with no Sync milestone" |
|
120 |
} |
|
121 |
} |
|
122 |
|
|
123 |
// fixing endTimes of Turn using next Turn.@startTime |
|
124 |
for (int i = 0 ; i < turns.size() ; i++) { |
|
125 |
def turn = turns[i] |
|
126 |
//println "turn start="+turn.@startTime+" end="+turn.@endTime |
|
127 |
if (i < turns.size() - 1) { |
|
128 |
if (turn.@endTime.length() == 0 && turns[i+1].@startTime.length() > 0) { |
|
129 |
//println "fixing turn.@endTime "+turn.@endTime+" with turns[i+1].@startTime "+(turns[i+1]); |
|
130 |
turn.@endTime = turns[i+1].@startTime |
|
131 |
} |
|
132 |
} else { |
|
133 |
Float e = Float.parseFloat(turn.@startTime) |
|
134 |
e += turn.text().length()*1.0f |
|
135 |
turn.@endTime = e.toString() |
|
136 |
} |
|
137 |
} |
|
138 |
|
|
139 |
// fixing endTimes of Section using last Turn.@endTime |
|
140 |
// fixing startTimes of Section using first Turn.@startTime |
|
141 |
def sections = trs.Episode.Section |
|
142 |
for (int i = 0 ; i < sections.size() ; i++) { |
|
143 |
def t = sections[i].Turn |
|
144 |
if (t.size() == 0) continue; |
|
145 |
sections[i].@startTime = t[0].@startTime |
|
146 |
sections[i].@endTime = t[-1].@endTime |
|
147 |
} |
|
148 |
|
|
149 |
trsOutFile.withWriter("UTF-8") { writer -> |
|
150 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n') |
|
151 |
new groovy.util.XmlNodePrinter(new PrintWriter(writer)).print(trs) } |
|
152 |
|
|
153 |
return true; |
|
154 |
} |
|
155 |
|
|
156 |
def getStartTime(def node) { |
|
157 |
def ret = null; |
|
158 |
if (node == null) return "0.0" |
|
159 |
if (node.name() == null) return "0.0" |
|
160 |
switch(node.name()) { |
|
161 |
case "Turn": |
|
162 |
case "Section": |
|
163 |
// println "Turn" |
|
164 |
ret = node.@startTime |
|
165 |
break |
|
166 |
case "Sync": |
|
167 |
// println "Sync" |
|
168 |
ret = node.@time |
|
169 |
break |
|
170 |
default: break; |
|
171 |
} |
|
172 |
//println "getStartTime "+node.name()+" $ret" |
|
173 |
return ret |
|
174 |
} |
|
175 |
|
|
176 |
def setStartTime(def node, def value) { |
|
177 |
switch(node.name()) { |
|
178 |
case "Turn": |
|
179 |
case "Section": |
|
180 |
node.@startTime = ""+value |
|
181 |
break; |
|
182 |
|
|
183 |
case "Sync": |
|
184 |
return node.@time = ""+value |
|
185 |
default: break; |
|
186 |
} |
|
187 |
} |
|
188 |
|
|
189 |
def fixSyncTimes(def list, def start, def end) { |
|
190 |
//println "Nfix: "+list.size()+" "+list |
|
191 |
//println "$start $end" |
|
192 |
|
|
193 |
def startf = Double.parseDouble(start) |
|
194 |
def endf = Double.parseDouble(end) |
|
195 |
def delta = (endf-startf)/list.size() |
|
196 |
//println delta |
|
197 |
float c = startf; |
|
198 |
for (int i = 0 ; i < list.size() ; i++) { |
|
199 |
c += delta; |
|
200 |
list[i].@time = ""+c |
|
201 |
} |
|
202 |
} |
|
203 |
} |
projets/CMC2ELAN/src/CheckMultipleVideoInXMLTEICMC.groovy (revision 2) | ||
---|---|---|
1 |
|
|
2 |
File splitDir = new File("/home/mdecorde/xml/comere/split") |
|
3 |
for (File f : splitDir.listFiles()) { |
|
4 |
XPathResult xpathProcessor = new XPathResult(f); |
|
5 |
def files = xpathProcessor.getXpathResponses("//tei:dateline/tei:name/text()") |
|
6 |
files = new HashSet(files) |
|
7 |
if (files.size() > 1) println "$f $files" |
|
8 |
} |
projets/CMC2ELAN/src/ApplyXsl2.groovy (revision 2) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
|
|
22 |
// |
|
23 |
// This file is part of the TXM platform. |
|
24 |
// |
|
25 |
// The TXM platform is free software: you can redistribute it and/or modif y |
|
26 |
// it under the terms of the GNU General Public License as published by |
|
27 |
// the Free Software Foundation, either version 3 of the License, or |
|
28 |
// (at your option) any later version. |
|
29 |
// |
|
30 |
// The TXM platform is distributed in the hope that it will be useful, |
|
31 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
32 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
33 |
// GNU General Public License for more details. |
|
34 |
// |
|
35 |
// You should have received a copy of the GNU General Public License |
|
36 |
// along with the TXM platform. If not, see <http://www.gnu.org/licenses/>. |
|
37 |
// |
|
38 |
// |
|
39 |
// |
|
40 |
// $LastChangedDate: 2011-11-03 17:59:23 +0100 (jeu., 03 nov. 2011) $ |
|
41 |
// $LastChangedRevision: 2051 $ |
|
42 |
// $LastChangedBy: mdecorde $ |
|
43 |
// |
|
44 |
|
|
45 |
|
|
46 |
/** |
|
47 |
* @author mdecorde |
|
48 |
*/ |
|
49 |
|
|
50 |
import java.io.File; |
|
51 |
import java.io.FileNotFoundException; |
|
52 |
import java.io.FileOutputStream; |
|
53 |
import java.util.HashMap; |
|
54 |
|
|
55 |
import javax.xml.transform.Transformer; |
|
56 |
import javax.xml.transform.TransformerConfigurationException; |
|
57 |
import javax.xml.transform.TransformerException; |
|
58 |
import javax.xml.transform.stream.StreamResult; |
|
59 |
import javax.xml.transform.stream.StreamSource; |
|
60 |
|
|
61 |
import net.sf.saxon.TransformerFactoryImpl; |
|
62 |
|
|
63 |
|
|
64 |
// TODO: Auto-generated Javadoc |
|
65 |
/** |
|
66 |
* apply a xsl file to a file : use process(File infile, File outfile, String[] |
|
67 |
* args) to apply the xslt file loaded by the constructor args = |
|
68 |
* ["name1","value1","name2",value2, ...] |
|
69 |
* |
|
70 |
* @author mdecorde |
|
71 |
*/ |
|
72 |
|
|
73 |
public class ApplyXsl2 { |
|
74 |
|
|
75 |
// /** The proc. */ |
|
76 |
// private Processor proc; |
|
77 |
// |
|
78 |
// /** The comp. */ |
|
79 |
// private XsltCompiler comp; |
|
80 |
// |
|
81 |
// /** The exp. */ |
|
82 |
// private XsltExecutable exp; |
|
83 |
// |
|
84 |
// /** The source. */ |
|
85 |
// private XdmNode source; |
|
86 |
// |
|
87 |
// /** The out. */ |
|
88 |
// private Serializer out; |
|
89 |
// |
|
90 |
// /** The trans. */ |
|
91 |
// private XsltTransformer trans; |
|
92 |
|
|
93 |
net.sf.saxon.TransformerFactoryImpl tFactory; |
|
94 |
Transformer transformer; |
|
95 |
private HashMap<String, String> params = new HashMap<String, String>(); |
|
96 |
private File xsltfile; |
|
97 |
|
|
98 |
/** |
|
99 |
* initialize with the xslt file. |
|
100 |
* |
|
101 |
* @param xsltfile the xsltfile |
|
102 |
* @throws TransformerConfigurationException |
|
103 |
*/ |
|
104 |
public ApplyXsl2(String xsltfile) throws TransformerConfigurationException { |
|
105 |
|
|
106 |
System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); |
|
107 |
|
|
108 |
this.xsltfile = new File(xsltfile); |
|
109 |
tFactory = new TransformerFactoryImpl(); |
|
110 |
transformer = tFactory.newTransformer(new StreamSource(this.xsltfile)); |
|
111 |
|
|
112 |
//println("new tFactory: "+tFactory); |
|
113 |
//println("new transformer: "+transformer); |
|
114 |
} |
|
115 |
|
|
116 |
/** |
|
117 |
* initialize with the xslt file. |
|
118 |
* |
|
119 |
* @param xsltfile the xsltfile |
|
120 |
*/ |
|
121 |
public ApplyXsl2(File xsltfile) { |
|
122 |
this.xsltfile = xsltfile; |
|
123 |
|
|
124 |
System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); |
|
125 |
|
|
126 |
tFactory = new net.sf.saxon.TransformerFactoryImpl(); |
|
127 |
println("new tFactory: "+tFactory); |
|
128 |
println("ApplyXsl2 from file: "+xsltfile); |
|
129 |
reload(); |
|
130 |
println("new transformer: "+transformer); |
|
131 |
} |
|
132 |
|
|
133 |
/** |
|
134 |
* Set in and out file. |
|
135 |
* |
|
136 |
* @param xmlinfile the xmlinfile |
|
137 |
* @param xmloutfile the xmloutfile |
|
138 |
* @return true, if successful |
|
139 |
*/ |
|
140 |
private boolean SetInOutSource(File xmlinfile, File xmloutfile) { |
|
141 |
// if (proc == null || exp == null || comp == null) |
|
142 |
// return false; |
|
143 |
// |
|
144 |
// source = proc.newDocumentBuilder().build( |
|
145 |
// new StreamSource(new File(xmlinfile))); |
|
146 |
// out = new Serializer(); |
|
147 |
// out.setOutputFile(new File(xmloutfile)); |
|
148 |
// |
|
149 |
// trans = exp.load(); |
|
150 |
// transformer.set.setInitialContextNode(source); |
|
151 |
// transformer.setDestination(out); |
|
152 |
return true; |
|
153 |
} |
|
154 |
|
|
155 |
/** |
|
156 |
* Set a xslt param. |
|
157 |
* |
|
158 |
* @param name the name |
|
159 |
* @param value the value |
|
160 |
* @return true, if successful |
|
161 |
*/ |
|
162 |
public boolean SetParam(String name, Object value) { |
|
163 |
if (transformer != null) { |
|
164 |
transformer.setParameter(name, value); |
|
165 |
params.put(name, value.toString()); |
|
166 |
return true; |
|
167 |
} else |
|
168 |
return false; |
|
169 |
} |
|
170 |
|
|
171 |
public boolean resetParams() { |
|
172 |
if (transformer != null) { |
|
173 |
transformer.reset(); |
|
174 |
params.clear(); |
|
175 |
} else |
|
176 |
return false; |
|
177 |
return true; |
|
178 |
} |
|
179 |
|
|
180 |
/** |
|
181 |
* Force java to clean memory after processing a xslt. If not we might catch |
|
182 |
* a JavaHeapspace Exception |
|
183 |
* |
|
184 |
*/ |
|
185 |
private void cleanMemory() { |
|
186 |
reload(); |
|
187 |
for (String name : params.keySet()) { // reload parameters |
|
188 |
transformer.setParameter(name, params.get(name)); |
|
189 |
} |
|
190 |
} |
|
191 |
|
|
192 |
private boolean reload() { |
|
193 |
try { |
|
194 |
transformer = null; |
|
195 |
transformer = tFactory.newTransformer(new StreamSource(xsltfile)); |
|
196 |
} catch (TransformerConfigurationException e) { |
|
197 |
e.printStackTrace(); |
|
198 |
println("Error while reloading transformer: "+e); |
|
199 |
return false; |
|
200 |
} |
|
201 |
return true; |
|
202 |
} |
|
203 |
|
|
204 |
/** |
|
205 |
* Process files with xslt args. |
|
206 |
* |
|
207 |
* @param xmlinfile file to be processed |
|
208 |
* @param xmloutfile output file |
|
209 |
* @param args xslt args ["arg1","arg1value","arg2","arg2value"...] |
|
210 |
* @return true, if successful |
|
211 |
* @throws TransformerException |
|
212 |
* @throws FileNotFoundException |
|
213 |
*/ |
|
214 |
|
|
215 |
public boolean process(String xmlinfile, String xmloutfile, String[] args) throws Exception { |
|
216 |
File infile = new File(xmlinfile); |
|
217 |
File outfile = new File(xmloutfile); |
|
218 |
if (!this.SetInOutSource(infile, outfile)) |
|
219 |
return false; |
|
220 |
//System.out.println(xmlinfile+" Params: "+Arrays.toString(args)); |
|
221 |
for (int i = 0; i < args.length; i = i + 2) { |
|
222 |
if (!this.SetParam(args[i], args[i + 1])) |
|
223 |
return false; |
|
224 |
} |
|
225 |
|
|
226 |
transformer.transform(new StreamSource(xmlinfile), new StreamResult(new FileOutputStream(xmloutfile))); |
|
227 |
cleanMemory(); // save memory |
|
228 |
// System.out.println("Done"); |
|
229 |
return true; |
|
230 |
} |
|
231 |
|
|
232 |
/** |
|
233 |
* Process files without xslt args. |
|
234 |
* |
|
235 |
* @param xmlinfile the xmlinfile |
|
236 |
* @param xmloutfile the xmloutfile |
|
237 |
* @return true, if successful |
|
238 |
* @throws TransformerException |
|
239 |
* @throws FileNotFoundException |
|
240 |
*/ |
|
241 |
public boolean process(File xmlinfile, File xmloutfile) throws FileNotFoundException, TransformerException { |
|
242 |
if (!this.SetInOutSource(xmlinfile, xmloutfile)) |
|
243 |
return false; |
|
244 |
if (transformer == null) return false; |
|
245 |
transformer.transform(new StreamSource(xmlinfile), new StreamResult(new FileOutputStream(xmloutfile))); |
|
246 |
cleanMemory(); |
|
247 |
return true; |
|
248 |
} |
|
249 |
|
|
250 |
public static boolean processImportSources(File xslFile, File srcdir, File outdir) throws FileNotFoundException, TransformerException |
|
251 |
{ |
|
252 |
HashMap<String, String> params = new HashMap<String, String>(); |
|
253 |
return processImportSources(xslFile, srcdir, outdir, params); |
|
254 |
} |
|
255 |
|
|
256 |
public static boolean processImportSources(File xslFile, File srcdir, File outdir, HashMap<String, String> params) throws FileNotFoundException, TransformerException |
|
257 |
{ |
|
258 |
outdir.mkdir(); |
|
259 |
if (xslFile.exists()) { |
|
260 |
ApplyXsl2 builder = new ApplyXsl2(xslFile); |
|
261 |
for(String name : params.keySet()) |
|
262 |
builder.SetParam(name, params.get(name)); |
|
263 |
System.out.println("-- Apply xsl "+xslFile+" with parameters: "+params); |
|
264 |
File[] files = srcdir.listFiles(); |
|
265 |
if (files != null) |
|
266 |
for (File f : files) { |
|
267 |
if (f.isHidden() || f.isDirectory()) |
|
268 |
continue; |
|
269 |
if (f.getName().endsWith(".properties")) |
|
270 |
continue; |
|
271 |
if (f.getName().equals("import.xml")) |
|
272 |
continue; |
|
273 |
if (f.getName().endsWith(".csv")) |
|
274 |
continue; |
|
275 |
if (f.getName().endsWith(".dtd")) |
|
276 |
continue; |
|
277 |
if (f.getName().endsWith(".xsl")) |
|
278 |
continue; |
|
279 |
|
|
280 |
System.out.print("."); |
|
281 |
File outfile = new File(outdir, f.getName()); |
|
282 |
if (!builder.process(f, outfile)) { |
|
283 |
System.out.println("Failed with file "+ f); |
|
284 |
outfile.delete(); |
|
285 |
} |
|
286 |
} |
|
287 |
System.out.println(""); |
|
288 |
return true; |
|
289 |
} else { |
|
290 |
System.out.println("XSL file does not exists: "+ xslFile); |
|
291 |
} |
|
292 |
return false; |
|
293 |
} |
|
294 |
} |
projets/CMC2ELAN/src/SplitTEICMRPerLocutor.groovy (revision 2) | ||
---|---|---|
1 |
|
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.net.URL; |
|
5 |
|
|
6 |
import javax.xml.stream.*; |
|
7 |
|
|
8 |
|
|
9 |
public class SplitTEICMRPerLocutor extends StaxIdentityParser { |
|
10 |
|
|
11 |
File outputDirectory; |
|
12 |
def writers = [:]; |
|
13 |
|
|
14 |
public SplitTEICMRPerLocutor(File inputFile, File outputDirectory) { |
|
15 |
super(inputFile); |
|
16 |
this.outputDirectory = outputDirectory; |
|
17 |
outputDirectory.mkdir() |
|
18 |
} |
|
19 |
|
|
20 |
protected void processStartElement() { |
|
21 |
if (parser.getLocalName() == "posting") { |
|
22 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
23 |
if (parser.getAttributeLocalName(i) == "who") { |
|
24 |
String loc = parser.getAttributeValue(i) |
|
25 |
writer = writers.get(loc) // switch writer on locutor |
|
26 |
break; |
|
27 |
} |
|
28 |
} |
|
29 |
} |
|
30 |
|
|
31 |
if (writer != null) { |
|
32 |
processStartElement(writer); |
|
33 |
} else { |
|
34 |
for (def swriter : writers.values()) processStartElement(swriter); |
|
35 |
} |
|
36 |
} |
|
37 |
|
|
38 |
protected void processStartElement(def swriter) |
|
39 |
{ |
|
40 |
String prefix = parser.getPrefix(); |
|
41 |
|
|
42 |
if (prefix != null && prefix.length() > 0) |
|
43 |
swriter.writeStartElement(Nscontext.getNamespaceURI(prefix), localname) |
|
44 |
else |
|
45 |
swriter.writeStartElement(localname); |
|
46 |
|
|
47 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) { |
|
48 |
swriter.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
|
49 |
} |
|
50 |
|
|
51 |
writeAttributes(swriter); |
|
52 |
} |
|
53 |
|
|
54 |
private void _processStartElement() { |
|
55 |
String prefix = parser.getPrefix(); |
|
56 |
|
|
57 |
if (prefix != null && prefix.length() > 0) |
|
58 |
writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname) |
|
59 |
else |
|
60 |
writer.writeStartElement(localname); |
|
61 |
|
|
62 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) { |
|
63 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
|
64 |
} |
|
65 |
|
|
66 |
writeAttributes(); |
|
67 |
} |
|
68 |
|
|
69 |
|
|
70 |
protected void processNamespace() { |
|
71 |
if (writer != null) { |
|
72 |
writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
|
73 |
} else { |
|
74 |
for (def swriter : writers.values()) swriter.writeNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
|
75 |
} |
|
76 |
} |
|
77 |
|
|
78 |
protected void writeAttributes() { |
|
79 |
if (writer != null) { |
|
80 |
writeAttributes(writer); |
|
81 |
} else { |
|
82 |
for (def swriter : writers.values()) writeAttributes(swriter); |
|
83 |
} |
|
84 |
} |
|
85 |
|
|
86 |
protected void writeAttributes(def swriter) { |
|
87 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
88 |
String attrPrefix = parser.getAttributePrefix(i); |
|
89 |
if (attrPrefix != null && attrPrefix.length() > 0) |
|
90 |
swriter.writeAttribute(attrPrefix+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
91 |
else |
|
92 |
swriter.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
93 |
} |
|
94 |
} |
|
95 |
|
|
96 |
protected void processCharacters() |
|
97 |
{ |
|
98 |
if (writer != null) { |
|
99 |
writer.writeCharacters(parser.getText()); |
|
100 |
} else { |
|
101 |
for (def swriter : writers.values()) swriter.writeCharacters(parser.getText()); |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
protected void processProcessingInstruction() |
|
106 |
{ |
|
107 |
if (writer != null) { |
|
108 |
writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData()); |
|
109 |
} else { |
|
110 |
for (def swriter : writers.values()) swriter.writeProcessingInstruction(parser.getPITarget(), parser.getPIData()); |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
protected void processDTD() |
|
115 |
{ |
|
116 |
if (writer != null) { |
|
117 |
writer.writeDTD(parser.getText()); |
|
118 |
} else { |
|
119 |
for (def swriter : writers.values()) swriter.writeDTD(parser.getText()); |
|
120 |
} |
|
121 |
} |
|
122 |
|
|
123 |
protected void processCDATA() |
|
124 |
{ |
|
125 |
if (writer != null) { |
|
126 |
writer.writeCData(parser.getText()) |
|
127 |
} else { |
|
128 |
for (def swriter : writers.values()) swriter.writeCData(parser.getText()) |
|
129 |
} |
|
130 |
} |
|
131 |
|
|
132 |
protected void processComment() |
|
133 |
{ |
|
134 |
if (writer != null) { |
|
135 |
writer.writeComment(parser.getText()); |
|
136 |
} else { |
|
137 |
for (def swriter : writers.values()) swriter.writeComment(parser.getText()); |
|
138 |
} |
|
139 |
} |
|
140 |
|
|
141 |
protected void processEndElement() |
|
142 |
{ |
|
143 |
if (writer != null) { |
|
144 |
writer.writeEndElement(); |
|
145 |
} else { |
|
146 |
for (def swriter : writers.values()) swriter.writeEndElement(); |
|
147 |
} |
|
148 |
if (parser.getLocalName() == "posting") writer = null; |
|
149 |
} |
|
150 |
|
|
151 |
protected void processEndDocument() { |
|
152 |
if (writer != null) { |
|
153 |
writer.writeEndDocument(); |
|
154 |
} else { |
|
155 |
for (def swriter : writers.values()) swriter.writeEndDocument(); |
|
156 |
} |
|
157 |
} |
|
158 |
|
|
159 |
protected void processEntityReference() { |
|
160 |
if (writer != null) { |
|
161 |
writer.writeEntityRef(parser.getLocalName()); |
|
162 |
} else { |
|
163 |
for (def swriter : writers.values()) swriter.writeEntityRef(parser.getLocalName()); |
|
164 |
} |
|
165 |
} |
|
166 |
|
|
167 |
public boolean processLocutors(def locutors) { |
|
168 |
for (String loc : locutors) { |
|
169 |
println "Create writer for $loc" |
|
170 |
writers.put(loc, new StaxStackWriter(new File(outputDirectory, loc+"-teicmr.xml").newWriter("UTF-8"))) |
|
171 |
} |
|
172 |
println "writers: $writers" |
|
173 |
|
|
174 |
for (def swriter : writers.values()) { |
|
175 |
swriter.writeStartDocument("UTF-8", "1.0"); |
|
176 |
swriter.writeCharacters("\n"); |
|
177 |
} |
|
178 |
|
|
179 |
boolean ret = process(writer); |
|
180 |
|
|
181 |
for (def swriter : writers.values()) { |
|
182 |
try {swriter.close();} catch(Exception e){println "close writer exep: "+e} |
|
183 |
} |
|
184 |
|
|
185 |
if (parser != null) |
|
186 |
try {parser.close()} catch(Exception e){println "parser exep: "+e} |
|
187 |
|
|
188 |
return ret; |
|
189 |
} |
|
190 |
|
|
191 |
public static void main(String[] args) { |
|
192 |
File inputFile = new File("/home/mdecorde/xml/comere/ismael-textchat.xml") |
|
193 |
File outputDirectory = new File("/home/mdecorde/xml/comere/split_out") |
|
194 |
|
|
195 |
def p = new SplitTEICMRPerLocutor(inputFile, outputDirectory) |
|
196 |
println p.processLocutors(["s_1", "s_10", "s_11", "s_12", "s_13", "s_14", "s_3", "s_4", "s_5", "s_6", "s_7", "s_8", "s_9", "tt_1", "tt_12", "tt_2", "tt_3", "tt_4", "tt_5", "tt_6", "tt_7", "tt_8"]) |
|
197 |
} |
|
198 |
} |
projets/CMC2ELAN/src/TEICMCPerLocPerDate.groovy (revision 2) | ||
---|---|---|
1 |
|
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.net.URL; |
|
5 |
|
|
6 |
import javax.xml.stream.*; |
|
7 |
|
|
8 |
|
|
9 |
public class TEICMCPerLocPerDate extends StaxIdentityParser { |
|
10 |
|
|
11 |
File outputDirectory; |
|
12 |
def writers = [:]; |
|
13 |
def counts = [:]; |
|
14 |
|
|
15 |
public TEICMCPerLocPerDate(File inputFile, File outputDirectory) { |
|
16 |
super(inputFile); |
|
17 |
this.outputDirectory = outputDirectory; |
|
18 |
outputDirectory.mkdir() |
|
19 |
} |
|
20 |
|
|
21 |
boolean inName = false; |
|
22 |
String filenameValue = ""; |
|
23 |
boolean inDate = false; |
|
24 |
String dateValue = ""; |
|
25 |
boolean inTime = false; |
|
26 |
String timeValue = ""; |
|
27 |
boolean inPosting = false; |
|
28 |
String postingID = "" |
|
29 |
String postingWho = "" |
|
30 |
boolean inP; |
|
31 |
String content = "" |
|
32 |
|
|
33 |
protected void processStartElement() { |
|
34 |
if (parser.getLocalName() == "posting") { |
|
35 |
inPosting = true |
|
36 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
37 |
if (parser.getAttributeLocalName(i) == "who") { |
|
38 |
postingWho = parser.getAttributeValue(i) |
|
39 |
} else if (parser.getAttributeLocalName(i) == "id") { |
|
40 |
postingID = parser.getAttributeValue(i) |
|
41 |
} |
|
42 |
} |
|
43 |
return; |
|
44 |
} else if(inPosting) { |
|
45 |
if ( parser.getLocalName() == "dateline") { |
|
46 |
} else if (parser.getLocalName() == "name") { |
|
47 |
inName = true |
|
48 |
filenameValue = "" |
|
49 |
} else if (parser.getLocalName() == "date") { |
|
50 |
inDate = true; |
|
51 |
dateValue = "" |
|
52 |
} else if (parser.getLocalName() == "time") { |
|
53 |
inTime = true; |
|
54 |
timeValue = "" |
|
55 |
} else if (parser.getLocalName() == "p") { |
|
56 |
inP = true; |
|
57 |
content = "" |
|
58 |
} |
|
59 |
return; |
|
60 |
} |
|
61 |
|
|
62 |
if (writer != null) { |
|
63 |
processStartElement(writer); |
|
64 |
} else { |
|
65 |
for (def swriter : writers.values()) processStartElement(swriter); |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
protected void processCharacters() { |
|
70 |
if (inPosting) { |
|
71 |
if (inName) { |
|
72 |
filenameValue += parser.getText(); |
|
73 |
return; |
|
74 |
} else if (inDate) { |
|
75 |
dateValue += parser.getText(); |
|
76 |
return; |
|
77 |
} else if (inTime) { |
|
78 |
timeValue += parser.getText(); |
|
79 |
return; |
|
80 |
} else if (inP) { |
|
81 |
content += parser.getText(); |
|
82 |
return; |
|
83 |
} |
|
84 |
} |
|
85 |
|
|
86 |
if (writer != null) { |
|
87 |
writer.writeCharacters(parser.getText()); |
|
88 |
} else { |
|
89 |
for (def swriter : writers.values()) swriter.writeCharacters(parser.getText()); |
|
90 |
} |
|
91 |
} |
|
92 |
|
|
93 |
protected void processEndElement() |
|
94 |
{ |
|
95 |
if (inPosting) { |
|
96 |
if (inPosting && parser.getLocalName() == "name") { |
|
97 |
inName = false; |
|
98 |
} else if (parser.getLocalName() == "date") { |
|
99 |
inDate = false; |
|
100 |
} else if (parser.getLocalName() == "p") { |
|
101 |
inP = false; |
|
102 |
} else if (parser.getLocalName() == "time") { |
|
103 |
inTime = false; |
|
104 |
writer = getWriter(dateValue+postingWho) |
|
105 |
} else if (parser.getLocalName() == "posting") { |
|
106 |
if (writer == null) { |
|
107 |
println "Error Houston !!! with loc=$postingWho date=$dateValue at="parser.getLocation() |
|
108 |
return; |
|
109 |
} |
|
110 |
// write Turn |
|
111 |
//println "write Turn" |
|
112 |
writer.writeStartElement("posting"); |
|
113 |
writer.writeAttribute("xml:id",postingID); |
|
114 |
writer.writeAttribute("who",postingWho); |
|
115 |
writer.writeStartElement("dateline"); |
|
116 |
writer.writeStartElement("name"); |
|
117 |
writer.writeAttribute("type","file"); |
|
118 |
writer.writeCharacters(filenameValue) |
|
119 |
writer.writeEndElement(); //name |
|
120 |
writer.writeStartElement("date"); |
|
121 |
writer.writeCharacters(dateValue) |
|
122 |
writer.writeEndElement(); //date |
|
123 |
writer.writeStartElement("time"); |
|
124 |
writer.writeCharacters(timeValue) |
|
125 |
writer.writeEndElement(); //time |
|
126 |
writer.writeEndElement(); //dateline |
|
127 |
writer.writeStartElement("p"); |
|
128 |
writer.writeCharacters(content) |
|
129 |
writer.writeEndElement(); //p |
|
130 |
writer.writeEndElement(); //posting // closed in the next lines |
|
131 |
//println "end write Turn" |
|
132 |
inPosting = false |
|
133 |
writer = null; |
|
134 |
} |
|
135 |
return; |
|
136 |
} |
|
137 |
|
|
138 |
if (writer != null) { |
|
139 |
writer.writeEndElement(); |
|
140 |
} else { |
|
141 |
for (def swriter : writers.values()) swriter.writeEndElement(); |
|
142 |
} |
|
143 |
} |
|
144 |
|
|
145 |
|
|
146 |
protected void processStartElement(def swriter) |
|
147 |
{ |
|
148 |
String prefix = parser.getPrefix(); |
|
149 |
|
|
150 |
if (prefix != null && prefix.length() > 0) |
|
151 |
swriter.writeStartElement(Nscontext.getNamespaceURI(prefix), localname) |
|
152 |
else |
|
153 |
swriter.writeStartElement(localname); |
|
154 |
|
|
155 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) { |
|
156 |
swriter.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
|
157 |
} |
|
158 |
|
|
159 |
writeAttributes(swriter); |
|
160 |
} |
|
161 |
|
|
162 |
private void _processStartElement() { |
|
163 |
String prefix = parser.getPrefix(); |
|
164 |
|
|
165 |
if (prefix != null && prefix.length() > 0) |
|
166 |
writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname) |
|
167 |
else |
|
168 |
writer.writeStartElement(localname); |
|
169 |
|
|
170 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) { |
|
171 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
|
172 |
} |
|
173 |
|
|
174 |
writeAttributes(); |
|
175 |
} |
|
176 |
|
|
177 |
|
|
178 |
protected void processNamespace() { |
|
179 |
if (writer != null) { |
|
180 |
writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
|
181 |
} else { |
|
182 |
for (def swriter : writers.values()) swriter.writeNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
|
183 |
} |
|
184 |
} |
|
185 |
|
|
186 |
protected void writeAttributes() { |
|
187 |
if (writer != null) { |
|
188 |
writeAttributes(writer); |
|
189 |
} else { |
|
190 |
for (def swriter : writers.values()) writeAttributes(swriter); |
|
191 |
} |
|
192 |
} |
|
193 |
|
|
194 |
protected void writeAttributes(def swriter) { |
|
195 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
196 |
String attrPrefix = parser.getAttributePrefix(i); |
|
197 |
if (attrPrefix != null && attrPrefix.length() > 0) |
|
198 |
swriter.writeAttribute(attrPrefix+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
199 |
else |
|
200 |
swriter.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
201 |
} |
|
202 |
} |
|
203 |
|
|
204 |
protected void processProcessingInstruction() { |
|
205 |
if (writer != null) { |
|
206 |
writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData()); |
|
207 |
} else { |
|
208 |
for (def swriter : writers.values()) swriter.writeProcessingInstruction(parser.getPITarget(), parser.getPIData()); |
|
209 |
} |
|
210 |
} |
|
211 |
|
|
212 |
protected void processDTD() |
|
213 |
{ |
|
214 |
if (writer != null) { |
|
215 |
writer.writeDTD(parser.getText()); |
|
216 |
} else { |
|
217 |
for (def swriter : writers.values()) swriter.writeDTD(parser.getText()); |
|
218 |
} |
|
219 |
} |
|
220 |
|
|
221 |
protected void processCDATA() |
|
222 |
{ |
|
223 |
if (writer != null) { |
|
224 |
writer.writeCData(parser.getText()) |
|
225 |
} else { |
|
226 |
for (def swriter : writers.values()) swriter.writeCData(parser.getText()) |
|
227 |
} |
|
228 |
} |
|
229 |
|
|
230 |
protected void processComment() |
|
231 |
{ |
|
232 |
if (writer != null) { |
|
233 |
writer.writeComment(parser.getText()); |
|
234 |
} else { |
|
235 |
for (def swriter : writers.values()) swriter.writeComment(parser.getText()); |
|
236 |
} |
|
237 |
} |
|
238 |
|
|
239 |
protected getWriter(String key) { |
|
240 |
def w = writers.get(key) |
|
241 |
counts.put(key, counts.get(key)+1) |
|
242 |
return w |
|
243 |
} |
|
244 |
|
|
245 |
protected void processEndDocument() { |
|
246 |
if (writer != null) { |
|
247 |
writer.writeEndDocument(); |
|
248 |
} else { |
|
249 |
for (def swriter : writers.values()) swriter.writeEndDocument(); |
|
250 |
} |
|
251 |
} |
|
252 |
|
|
253 |
protected void processEntityReference() { |
|
254 |
if (writer != null) { |
|
255 |
writer.writeEntityRef(parser.getLocalName()); |
|
256 |
} else { |
|
257 |
for (def swriter : writers.values()) swriter.writeEntityRef(parser.getLocalName()); |
|
258 |
} |
|
259 |
} |
|
260 |
|
|
261 |
public boolean processDatesAndLocs(def dates, def locutors) { |
|
262 |
for (String date : dates) { |
|
263 |
for (String loc : locutors) { |
|
264 |
println "Create writer for $loc" |
|
265 |
writers.put(date+loc, new StaxStackWriter(new File(outputDirectory, "${loc}-${date}-teicmr.xml"))) |
|
266 |
counts.put(date+loc, 0) |
|
267 |
} |
|
268 |
} |
|
269 |
println "writers: $writers" |
|
270 |
|
|
271 |
for (def swriter : writers.values()) { |
|
272 |
swriter.writeStartDocument("UTF-8", "1.0"); |
|
273 |
swriter.writeCharacters("\n"); |
|
274 |
} |
|
275 |
|
|
276 |
boolean ret = process(writer); |
|
277 |
|
|
278 |
for (def swriter : writers.values()) { |
|
279 |
try {swriter.close();} catch(Exception e){println "close writer exep: "+e} |
|
280 |
} |
|
281 |
|
|
282 |
if (parser != null) |
|
283 |
try {parser.close()} catch(Exception e){println "parser exep: "+e} |
|
284 |
|
|
285 |
|
|
286 |
//delete file (date+loc) not used |
|
287 |
for (def key : counts.keySet()) { |
|
288 |
if (counts.get(key) == 0) { // never used for a couple date+loc |
|
289 |
(writers.get(key)).getInfile().delete() |
|
290 |
} |
|
291 |
} |
|
292 |
|
|
293 |
return ret; |
|
294 |
} |
|
295 |
|
|
296 |
public static void main(String[] args) { |
|
297 |
File inputFile = new File("/home/mdecorde/xml/comere/ismael-textchat.xml") |
|
298 |
File outputDirectory = new File("/home/mdecorde/xml/comere/split_out") |
|
299 |
outputDirectory.deleteDir() |
|
300 |
outputDirectory.mkdir() |
|
301 |
|
|
302 |
XPathResult xpathProcessor = new XPathResult(inputFile); |
|
303 |
def dates = xpathProcessor.getXpathResponses("//tei:dateline/tei:date/text()") |
|
304 |
def locs = xpathProcessor.getXpathResponses("//tei:listPerson/tei:person/@xml:id") |
|
305 |
|
|
306 |
dates = new HashSet(dates) |
|
307 |
dates = new ArrayList(dates) |
|
308 |
println dates.sort() |
|
309 |
println locs |
|
310 |
|
|
311 |
def p = new TEICMCPerLocPerDate(inputFile, outputDirectory) |
|
312 |
println p.processDatesAndLocs(dates, locs) |
|
313 |
} |
|
314 |
} |
projets/CMC2ELAN/src/TEICMC2TRS.groovy (revision 2) | ||
---|---|---|
1 |
|
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.net.URL; |
|
5 |
|
|
6 |
import javax.xml.stream.*; |
|
7 |
import java.text.SimpleDateFormat; |
|
8 |
import java.util.Date; |
|
9 |
|
|
10 |
public class TEICMC2TRS extends StaxIdentityParser { |
|
11 |
|
|
12 |
File outputDirectory; |
|
13 |
String loc; |
|
14 |
|
|
15 |
public TEICMC2TRS(File inputFile, String loc) { |
|
16 |
super(inputFile); |
|
17 |
this.loc = loc; |
|
18 |
} |
|
19 |
|
|
20 |
boolean inBody = false |
|
21 |
boolean inName = false; |
|
22 |
String filenameValue = ""; |
|
23 |
boolean inDate = false; |
|
24 |
String dateValue = ""; |
|
25 |
boolean inTime = false; |
|
26 |
String timeValue = ""; |
|
27 |
boolean inPosting = false; |
|
28 |
String postingID = "" |
|
29 |
String postingWho = "" |
|
30 |
boolean inP; |
|
31 |
String content = "" |
|
32 |
|
|
33 |
protected void processStartElement() { |
|
34 |
if (parser.getLocalName() == "body") { // write TRS HEADER |
|
35 |
inBody = true |
|
36 |
writer.writeStartElement("Trans"); |
|
37 |
writer.writeAttribute("version","10"); |
|
38 |
writer.writeAttribute("audio_filename",""); |
|
39 |
writer.writeAttribute("scribe",""); |
|
40 |
writer.writeAttribute("version_date",""); |
|
41 |
|
|
42 |
writer.writeStartElement("Topics"); |
|
43 |
writer.writeEndElement(); //Topics |
|
44 |
writer.writeStartElement("Speakers"); |
|
45 |
writer.writeStartElement("Speaker"); |
|
46 |
writer.writeAttribute("id",loc); |
|
47 |
writer.writeAttribute("name",loc); |
|
48 |
writer.writeAttribute("check",""); |
|
49 |
writer.writeAttribute("dialect",""); |
|
50 |
writer.writeAttribute("scope",""); |
|
51 |
writer.writeEndElement(); //Speaker |
|
52 |
writer.writeEndElement(); //Speakers |
|
53 |
|
|
54 |
writer.writeStartElement("Episode"); |
|
55 |
writer.writeStartElement("Section"); |
|
56 |
writer.writeAttribute("type","report"); |
|
57 |
writer.writeAttribute("startTime",""); |
|
58 |
writer.writeAttribute("endTime",""); |
|
59 |
|
|
60 |
} else if (parser.getLocalName() == "posting") { |
|
61 |
inPosting = true |
|
62 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
63 |
if (parser.getAttributeLocalName(i) == "who") { |
|
64 |
postingWho = parser.getAttributeValue(i) |
|
65 |
} else if (parser.getAttributeLocalName(i) == "type") { |
|
66 |
postingID = parser.getAttributeValue(i) |
|
67 |
} |
|
68 |
} |
|
69 |
return; |
|
70 |
} else if(inPosting) { |
|
71 |
if ( parser.getLocalName() == "dateline") { |
|
72 |
} else if (parser.getLocalName() == "name") { |
|
73 |
inName = true |
|
74 |
filenameValue = "" |
|
75 |
} else if (parser.getLocalName() == "date") { |
|
76 |
inDate = true; |
|
77 |
dateValue = "" |
|
78 |
} else if (parser.getLocalName() == "time") { |
|
79 |
inTime = true; |
|
80 |
timeValue = "" |
|
81 |
} else if (parser.getLocalName() == "p") { |
|
82 |
inP = true; |
|
83 |
content = "" |
|
84 |
} |
|
85 |
return; |
|
86 |
} |
|
87 |
} |
|
88 |
|
|
89 |
protected void processCharacters() { |
|
90 |
if (inPosting) { |
|
91 |
if (inName) { |
|
92 |
filenameValue += parser.getText(); |
|
93 |
return; |
|
94 |
} else if (inDate) { |
|
95 |
dateValue += parser.getText(); |
|
96 |
return; |
|
97 |
} else if (inTime) { |
|
98 |
timeValue += parser.getText(); |
|
99 |
return; |
|
100 |
} else if (inP) { |
|
101 |
content += parser.getText(); |
|
102 |
return; |
|
103 |
} |
|
104 |
} |
|
105 |
|
|
106 |
} |
|
107 |
|
|
108 |
SimpleDateFormat formatter = new SimpleDateFormat("HHmmss"); |
|
109 |
Date from = null// use only the first from |
|
110 |
protected void processEndElement() |
|
111 |
{ |
|
112 |
if (parser.getLocalName() == "body") { // write TRS HEADER |
|
113 |
writer.writeEndElement(); //Section |
|
114 |
writer.writeEndElement(); //Episode |
|
115 |
writer.writeEndElement(); //Trans |
|
116 |
} else if (inPosting) { |
|
117 |
if (inPosting && parser.getLocalName() == "name") { |
|
118 |
inName = false; |
|
119 |
} else if (parser.getLocalName() == "date") { |
|
120 |
inDate = false; |
|
121 |
} else if (parser.getLocalName() == "p") { |
|
122 |
inP = false; |
|
123 |
} else if (parser.getLocalName() == "time") { |
|
124 |
inTime = false; |
|
125 |
} else if (parser.getLocalName() == "posting") { |
|
126 |
if (writer == null) { |
|
127 |
println "Error Houston !!! with loc=$postingWho date=$dateValue at="parser.getLocation() |
|
128 |
return; |
|
129 |
} |
|
130 |
// write Turn |
|
131 |
//println "write Turn" |
|
132 |
|
|
133 |
if (from == null) { |
|
134 |
int i1 = filenameValue.indexOf(".flv") |
|
135 |
int i2 = i1-6 |
|
136 |
String date1 = filenameValue.substring(i2, i1) |
|
137 |
from = formatter.parse(date1); |
|
138 |
} |
|
139 |
|
|
140 |
Date to = formatter.parse(timeValue.replace(":", "")); |
|
141 |
float diff = ((float)((to.getTime() - from.getTime()))) / 1000.0f // ms |
|
142 |
float diff2 = diff + 10 |
|
143 |
writer.writeStartElement("Turn"); |
|
144 |
writer.writeAttribute("startTime","$diff"); |
|
145 |
writer.writeAttribute("endTime","$diff2"); |
|
146 |
writer.writeAttribute("speaker",postingWho); |
|
147 |
writer.writeStartElement("Sync"); |
|
148 |
writer.writeAttribute("time","$diff"); |
|
149 |
writer.writeEndElement(); //Sync |
|
150 |
writer.writeCharacters(content) |
|
151 |
writer.writeEndElement(); //Turn |
|
152 |
writer.writeCharacters("\n") |
|
153 |
//println "end write Turn" |
|
154 |
inPosting = false |
|
155 |
} |
|
156 |
return; |
|
157 |
} |
|
158 |
|
|
159 |
} |
|
160 |
|
|
161 |
public static void main(String[] args) { |
|
162 |
File inputFile = new File("/home/mdecorde/xml/comere/split_out/tt_5-2013-11-12-teicmr.xml") |
|
163 |
File outputFile = new File("/home/mdecorde/xml/comere/split_out/tt_5-2013-11-12-teicmr-broken.trs") |
|
164 |
File outputFixedFile = new File("/home/mdecorde/xml/comere/split_out/tt_5-2013-11-12-teicmr.trs") |
|
165 |
|
|
166 |
|
|
167 |
XPathResult xpathProcessor = new XPathResult(inputFile); |
|
168 |
def loc = xpathProcessor.getXpathResponse("//tei:posting/@who") |
|
169 |
|
|
170 |
def p1 = new TEICMC2TRS(inputFile, loc) |
|
171 |
def p2 = new FixMissingTimings(outputFile, outputFixedFile) |
|
172 |
|
|
173 |
if (p1 && p2) { |
|
174 |
outputFile.delete() |
|
175 |
} else { |
|
176 |
println "Failed to convert $inputFile to TRS" |
|
177 |
} |
|
178 |
} |
|
179 |
} |
projets/CMC2ELAN/src/StaxStackWriter.groovy (revision 2) | ||
---|---|---|
1 |
|
|
2 |
|
|
3 |
import javax.xml.namespace.NamespaceContext; |
|
4 |
import javax.xml.stream.*; |
|
5 |
|
|
6 |
public class StaxStackWriter implements XMLStreamWriter { |
|
7 |
|
|
8 |
public XMLStreamWriter writer; |
|
9 |
public Writer output; |
|
10 |
public File infile; |
|
11 |
public def events = []; |
|
12 |
|
|
13 |
public StaxStackWriter(def output) { |
|
14 |
XMLOutputFactory factory = XMLOutputFactory.newInstance(); |
|
15 |
this.output = output |
|
16 |
this.writer = factory.createXMLStreamWriter(output);//create a new file |
|
17 |
} |
|
18 |
|
|
19 |
public StaxStackWriter(File infile) { |
|
20 |
this(infile.newWriter("UTF-8")) |
|
21 |
this.infile = infile |
|
22 |
} |
|
23 |
|
|
24 |
public File getInfile() { |
|
25 |
return infile; |
|
26 |
} |
|
27 |
|
|
28 |
public void write(String txt) { |
|
29 |
output.write(txt) |
|
30 |
} |
|
31 |
|
|
32 |
public void writeEndElements() { |
|
33 |
int size = events.size(); |
|
34 |
for (int i = 0 ; i < size ; i++) { |
|
35 |
writeEndElement(); |
|
36 |
} |
|
37 |
events = []; |
|
38 |
} |
|
39 |
|
|
40 |
public void writeStartElements(def tagsToWrite) { |
|
41 |
for (String tag : tagsToWrite) |
|
42 |
writeStartElement(tag); |
|
43 |
} |
|
44 |
|
|
45 |
public def getTagStack() { |
|
46 |
return events; |
|
47 |
} |
|
48 |
|
|
49 |
@Override |
|
50 |
public void close () throws XMLStreamException { |
|
51 |
writer.close(); |
|
52 |
output.close(); |
|
53 |
} |
|
54 |
|
|
55 |
@Override |
|
56 |
public void flush () throws XMLStreamException { |
|
57 |
writer.flush(); |
|
58 |
|
|
59 |
} |
|
60 |
|
|
61 |
@Override |
|
62 |
public NamespaceContext getNamespaceContext() { |
|
63 |
return writer.getNamespaceContext(); |
|
64 |
} |
|
65 |
|
|
66 |
@Override |
|
67 |
public String getPrefix (String uri) throws XMLStreamException { |
|
68 |
return writer.getPrefix(uri); |
|
69 |
} |
|
70 |
|
|
71 |
@Override |
|
72 |
public Object getProperty (String name) throws IllegalArgumentException { |
|
73 |
return writer.getProperty(name); |
|
74 |
} |
|
75 |
|
|
76 |
@Override |
|
77 |
public void setDefaultNamespace (String uri) throws XMLStreamException { |
|
78 |
writer.setDefaultNamespace(uri); |
|
79 |
} |
|
80 |
|
|
81 |
@Override |
|
82 |
public void setNamespaceContext(NamespaceContext context) |
|
83 |
throws XMLStreamException { |
|
84 |
writer.setNamespaceContext(context); |
|
85 |
} |
|
86 |
|
|
87 |
@Override |
|
88 |
public void setPrefix (String prefix, String uri) throws XMLStreamException { |
|
89 |
writer.setPrefix(prefix, uri); |
|
90 |
} |
|
91 |
|
|
92 |
@Override |
|
93 |
public void writeAttribute (String localName, String value) throws XMLStreamException { |
|
94 |
writer.writeAttribute(localName, value); |
|
95 |
} |
|
96 |
|
|
97 |
@Override |
|
98 |
public void writeAttribute (String namespaceURI, String localName, String value) throws XMLStreamException { |
|
99 |
writer.writeAttribute(namespaceURI, localName, value); |
|
100 |
} |
|
101 |
|
|
102 |
@Override |
|
103 |
public void writeAttribute (String prefix, String namespaceURI, String localName, String value) throws XMLStreamException { |
|
104 |
writer.writeAttribute(prefix, namespaceURI, localName, value); |
|
105 |
} |
|
106 |
|
|
107 |
@Override |
|
108 |
public void writeCData (String data) throws XMLStreamException { |
|
109 |
writer.writeCData(data); |
|
110 |
} |
|
111 |
|
|
112 |
@Override |
|
113 |
public void writeCharacters (String text) throws XMLStreamException { |
|
114 |
writer.writeCharacters(text); |
|
115 |
} |
|
116 |
|
|
117 |
@Override |
|
118 |
public void writeCharacters (char[] text, int start, int len) throws XMLStreamException { |
|
119 |
writer.writeCharacters(text, start, len); |
|
120 |
} |
|
121 |
|
|
122 |
@Override |
|
123 |
public void writeComment (String data) throws XMLStreamException { |
|
124 |
writer.writeComment(data); |
|
125 |
} |
|
126 |
|
|
127 |
@Override |
|
128 |
public void writeDTD (String dtd) throws XMLStreamException { |
|
129 |
writer.writeDTD(dtd); |
|
130 |
} |
|
131 |
|
|
132 |
@Override |
|
133 |
public void writeDefaultNamespace(String namespaceURI) |
|
134 |
throws XMLStreamException { |
|
135 |
writer.writeDefaultNamespace(namespaceURI); |
|
136 |
} |
|
137 |
|
|
138 |
public void writeElement (String localName, String text) throws XMLStreamException { |
|
139 |
writer.writeStartElement(localName); |
|
140 |
writer.writeCharacters(text); |
|
141 |
writer.writeEndElement(); |
|
142 |
} |
|
143 |
|
|
144 |
public void writeElement (String localName, def map, String text) throws XMLStreamException { |
|
145 |
writer.writeStartElement(localName); |
|
146 |
for (def key : map.keySet()) writeAttribute(key.toString(), map[key].toString()); |
|
147 |
writer.writeCharacters(text); |
|
148 |
writer.writeEndElement(); |
|
149 |
} |
|
150 |
|
|
151 |
@Override |
|
152 |
public void writeEmptyElement (String localName) throws XMLStreamException { |
|
153 |
writer.writeEmptyElement(localName); |
|
154 |
} |
|
155 |
|
|
156 |
public void writeEmptyElement (String localName, def map) throws XMLStreamException { |
|
157 |
writer.writeEmptyElement(localName); |
|
158 |
for (def key : map.keySet()) |
|
159 |
writeAttribute(key.toString(), map[key].toString()); |
|
160 |
} |
|
161 |
|
|
162 |
@Override |
|
163 |
public void writeEmptyElement(String namespaceURI, String localName) |
|
164 |
throws XMLStreamException { |
|
165 |
writer.writeEmptyElement(namespaceURI, localName); |
|
166 |
} |
|
167 |
|
|
168 |
@Override |
|
169 |
public void writeEmptyElement (String prefix, String localName, String namespaceURI) throws XMLStreamException { |
|
170 |
writer.writeEmptyElement(prefix, localName, namespaceURI); |
|
171 |
} |
|
172 |
|
|
173 |
@Override |
|
174 |
public void writeEndDocument () throws XMLStreamException { |
|
175 |
writer.writeEndDocument(); |
|
176 |
} |
|
177 |
|
|
178 |
@Override |
|
179 |
public void writeEndElement () throws XMLStreamException { |
|
180 |
writer.writeEndElement(); |
|
181 |
events.pop() |
|
182 |
|
|
183 |
} |
|
184 |
|
|
185 |
@Override |
|
186 |
public void writeEntityRef (String name) throws XMLStreamException { |
|
187 |
writer.writeEntityRef(name); |
|
188 |
} |
|
189 |
|
|
190 |
@Override |
|
191 |
public void writeNamespace(String prefix, String namespaceURI) |
|
192 |
throws XMLStreamException { |
|
193 |
writer.writeNamespace(prefix, namespaceURI); |
|
194 |
} |
|
195 |
|
|
196 |
@Override |
|
197 |
public void writeProcessingInstruction(String target) |
|
198 |
throws XMLStreamException { |
|
199 |
writer.writeProcessingInstruction(target); |
|
200 |
} |
|
201 |
|
|
202 |
@Override |
|
203 |
public void writeProcessingInstruction(String target, String data) |
|
204 |
throws XMLStreamException { |
|
205 |
writer.writeProcessingInstruction(target, data); |
|
206 |
} |
|
207 |
|
|
208 |
@Override |
|
209 |
public void writeStartDocument () throws XMLStreamException { |
|
210 |
writer.writeStartDocument(); |
|
211 |
|
|
212 |
} |
|
213 |
|
Formats disponibles : Unified diff