Révision 3030
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3030) | ||
---|---|---|
20 | 20 |
|
21 | 21 |
@Field @Option(name="nonPrimarySpeakerRegex", usage="other non primary id of the other turns", widget="String", required=false, def="") |
22 | 22 |
String nonPrimarySpeakerRegex |
23 |
|
|
24 |
@Field @Option(name="newSectionMarker", usage="section marker", widget="String", required=false, def="*#") |
|
25 |
String newSectionMarker |
|
23 | 26 |
|
24 | 27 |
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false") |
25 | 28 |
Boolean debug |
... | ... | |
39 | 42 |
gse.runMacro(org.txm.macro.transcription.Vocapia2TranscriberMacro, ["vocapiaDirectory":vocapiaDirectory, "resultDirectory":trsDirectory]) |
40 | 43 |
|
41 | 44 |
|
45 |
println "DETECTING MARKED SECTIONS..." |
|
42 | 46 |
|
43 | 47 |
trsFiles = trsDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")} |
44 | 48 |
if (trsFiles.size() == 0) { |
... | ... | |
46 | 50 |
return false |
47 | 51 |
} |
48 | 52 |
|
53 |
File sectionsDirectory = new File(resultDirectory, "sections") |
|
54 |
sectionsDirectory.mkdir() |
|
55 |
gse.runMacro(org.txm.macro.transcription.SegmentTRSInSectionFromMarkerMacro, ["trsDirectory":trsDirectory, "resultDirectory":sectionsDirectory, "newSectionMarker":newSectionMarker]) |
|
56 |
|
|
57 |
|
|
49 | 58 |
println "CREATE THE 'OTHER' TURNS..." |
59 |
|
|
60 |
trsFiles = sectionsDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")} |
|
61 |
if (trsFiles.size() == 0) { |
|
62 |
println "No XML file found in $sectionsDirectory" |
|
63 |
return false |
|
64 |
} |
|
65 |
|
|
50 | 66 |
File otherDirectory = new File(resultDirectory, "otherturns") |
51 | 67 |
otherDirectory.mkdir() |
52 |
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
|
|
68 |
cpb = new ConsoleProgressBar(trsFiles.size()) |
|
53 | 69 |
for (File file : trsFiles) { |
54 | 70 |
cpb.tick() |
55 | 71 |
|
... | ... | |
63 | 79 |
} |
64 | 80 |
cpb.done() |
65 | 81 |
|
82 |
|
|
83 |
println "NORMALIZING LOCUTORS..." |
|
84 |
|
|
66 | 85 |
trsFiles = otherDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")} |
67 | 86 |
if (trsFiles.size() == 0) { |
68 | 87 |
println "No XML file found in $otherDirectory" |
69 | 88 |
return false |
70 | 89 |
} |
71 | 90 |
|
72 |
// FIX LOCUTORS |
|
73 |
println "NORMALIZING LOCUTORS..." |
|
74 | 91 |
cpb = new ConsoleProgressBar(trsFiles.size()) |
75 | 92 |
for (File file : trsFiles) { |
76 | 93 |
cpb.tick() |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/SegmentTRSInSectionFromMarkerMacro.groovy (revision 3030) | ||
---|---|---|
10 | 10 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
11 | 11 |
File trsDirectory; |
12 | 12 |
|
13 |
@Field @Option(name="resultDirectory", usage="Dossier résultat TRS", widget="Folder", required=true, def="") |
|
14 |
File resultDirectory; |
|
15 |
|
|
13 | 16 |
@Field @Option(name="newSectionMarker", usage="The marker, spaces included", widget="String", required=true, def=" *#") |
14 | 17 |
def newSectionMarker |
15 | 18 |
|
19 |
|
|
16 | 20 |
if (!ParametersDialog.open(this)) return; |
17 | 21 |
|
18 | 22 |
if (!trsDirectory.exists()) { |
... | ... | |
20 | 24 |
return |
21 | 25 |
} |
22 | 26 |
|
23 |
File outputDirectory = new File(trsDirectory, "out") |
|
24 |
println "Writing result to $outputDirectory..." |
|
27 |
println "Writing result to $resultDirectory..." |
|
25 | 28 |
|
26 | 29 |
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME |
27 | 30 |
def strTotime(def str) { |
... | ... | |
44 | 47 |
} |
45 | 48 |
|
46 | 49 |
try { |
47 |
def trsFiles = trsDirectory.list(new FilenameFilter() { public boolean accept(File dir, String name) { return name.toLowerCase().endsWith(".trs"); } });
|
|
50 |
def trsFiles = trsDirectory.listFiles().findAll() { it.getName().toLowerCase().endsWith(".trs") }
|
|
48 | 51 |
|
49 | 52 |
if (trsFiles.size() == 0) { |
50 | 53 |
println "No TRS file to process in $trsDirectory" |
51 | 54 |
return; |
52 | 55 |
} |
53 | 56 |
|
54 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
|
|
57 |
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
|
|
55 | 58 |
for (File trsFile : trsFiles) { |
56 | 59 |
cpb.tick() |
57 | 60 |
|
... | ... | |
68 | 71 |
def trsEpisode = trsEpisodes[0] |
69 | 72 |
def trsSections = trs.Episode.Section // 1 |
70 | 73 |
if (trsSections.size() > 1) { |
71 |
println "multiple Section node in $trsFile" |
|
74 |
println "Error: multiple Section node in $trsFile"
|
|
72 | 75 |
continue |
73 | 76 |
} |
74 | 77 |
def trsSection = trsSections[0] |
... | ... | |
76 | 79 |
def turns = trsSection.Turn |
77 | 80 |
def newSections = [] |
78 | 81 |
def iSection = 0; |
79 |
def currentSection = null
|
|
80 |
def currentNode = null
|
|
82 |
def previousSection = trsSection
|
|
83 |
def currentSection = trsSection
|
|
81 | 84 |
|
82 |
for (def turn : turns) { |
|
85 |
for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
|
86 |
def turn = turns[iTurn] |
|
83 | 87 |
def start = Float.parseFloat(turn.@startTime) |
84 | 88 |
def end = Float.parseFloat(turn.@endTime) |
85 | 89 |
|
86 |
def found = null |
|
87 |
for (int i = iSection ; i < sections.size() ; i++) { |
|
88 |
if (end < sections[i][0]) { // Turn is before section |
|
89 |
|
|
90 |
} else if (sections[i][1] < start) { // Turn is before section |
|
91 |
|
|
92 |
} else { |
|
93 |
found = sections[i] |
|
94 |
iSection = i |
|
95 |
break // stop searching and set iSection to accelerate next search |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
if (found == null) { |
|
100 |
if (currentSection != null || currentNode == null) { |
|
101 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
102 |
currentSection = null; |
|
103 |
} |
|
104 |
} else { |
|
105 |
if (found != currentSection) { |
|
106 |
if (currentNode != null && currentNode.@synchronized == "false") { |
|
107 |
def tmp = currentNode.Turn |
|
108 |
currentNode.@endTime = tmp[-1].@endTime |
|
90 |
String content = turn.text().trim() |
|
91 |
println "TURN: "+content |
|
92 |
if (content.startsWith(newSectionMarker)) { |
|
93 |
println "NEW SECTION !" |
|
94 |
previousSection = currentSection |
|
95 |
currentSection = new Node(trsEpisode, "Section", new LinkedHashMap(["type":newSectionMarker, "startTime":turn.@startTime, "endTime":previousSection.attributes()["endTime"]])) |
|
96 |
|
|
97 |
previousSection.attributes()["endTime"] = turn.@startTime |
|
98 |
turns[iTurn-1].attributes()["endTime"] = turn.@startTime |
|
99 |
|
|
100 |
def children = turn.children() |
|
101 |
for (int i = 0 ; i < children.size() ; i++) { |
|
102 |
def node = children[i] |
|
103 |
if (node instanceof String) { |
|
104 |
println "FIXING: $node " |
|
105 |
children.remove(i) |
|
106 |
i-- |
|
107 |
} else if (node instanceof groovy.util.Node && node.name() == "w" && node.text().contains(newSectionMarker)) { |
|
108 |
println "FIXING: w $node " |
|
109 |
children.remove(node) |
|
110 |
} |
|
109 | 111 |
} |
110 |
|
|
111 |
currentSection = found |
|
112 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
|
113 |
} |
|
114 | 112 |
} |
113 |
|
|
115 | 114 |
trsSection.remove(turn) |
116 |
currentNode.append(turn)
|
|
115 |
currentSection.append(turn)
|
|
117 | 116 |
} |
118 | 117 |
|
119 |
//remove the initial section |
|
120 |
trsEpisode.remove(trsSection) |
|
121 |
|
|
122 |
outputDirectory.mkdir() |
|
123 |
File outfile = new File(outputDirectory, trsFile.getName()) |
|
118 |
resultDirectory.mkdir() |
|
119 |
File outfile = new File(resultDirectory, trsFile.getName()) |
|
124 | 120 |
outfile.withWriter("UTF-8") { writer -> |
125 | 121 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n') |
126 | 122 |
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer)) |
... | ... | |
129 | 125 |
} |
130 | 126 |
} |
131 | 127 |
cpb.done() |
132 |
reader.close() |
|
128 |
|
|
133 | 129 |
println "Done." |
134 | 130 |
|
135 | 131 |
} catch(Exception e) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 3030) | ||
---|---|---|
40 | 40 |
def turninfos = new LinkedHashMap() |
41 | 41 |
boolean other = false; |
42 | 42 |
String word = "" |
43 |
String duration = "0.0" |
|
43 | 44 |
try { |
44 | 45 |
|
45 | 46 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
... | ... | |
84 | 85 |
writer.writeEndElement() |
85 | 86 |
writer.writeCharacters("\n") |
86 | 87 |
break; |
88 |
|
|
89 |
case "Channel": |
|
90 |
duration = parser.getAttributeValue(null, "sigdur") |
|
91 |
break; |
|
87 | 92 |
|
88 | 93 |
case "SegmentList": |
89 | 94 |
writer.writeStartElement("Episode") |
90 | 95 |
//<Section type="report" startTime="0" endTime="3617.593"> |
96 |
|
|
91 | 97 |
writer.writeStartElement("Section") |
98 |
writer.writeAttribute("startTime", "0.0") |
|
99 |
writer.writeAttribute("endTime", duration) |
|
92 | 100 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
93 | 101 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)) |
94 | 102 |
} |
Formats disponibles : Unified diff