Statistics
| Revision:

ccc / projets / CMC2ELAN / src / TRS2CSVELAN.groovy @ 2

History | View | Annotate | Download (1001 Bytes)

1

    
2
File trsDir = new File("/home/mdecorde/xml/comere/forElan")
3
File outDir = new File("/home/mdecorde/xml/comere/CSVforELAN")
4
outDir.deleteDir()
5
outDir.mkdir()
6

    
7
for (File f : trsDir.listFiles()) {
8
        if (!f.getName().endsWith(".trs")) continue;
9
        File tsvFile = new File(outDir, f.getName()+".csv")
10
        println "$f -> $tsvFile"
11
        
12
        def writer = tsvFile.newWriter("UTF-8")
13
        writer.println("actor\tstart\tend\tduration\tannotation")
14
        def parser = new XmlParser();
15
        parser.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
16
        parser.setFeature("http://xml.org/sax/features/namespaces", false)
17
        def trs = parser.parse(f)
18

    
19
        String currentEnd = null;
20
        for (def turn : trs.Episode.Section.Turn) {
21
                String actor = turn.@speaker
22
                String start = turn.@startTime
23
                String end = turn.@endTime
24
                String duration = Float.parseFloat(end) - Float.parseFloat(start)
25
                String annotation = turn.text().trim()
26
                writer.println("${actor}\t${start}\t${end}\t${duration}\t${annotation}")
27
        }
28
        writer.close()
29
}