Révision 3036
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/CreateTheOtherTurns.groovy (revision 3036) | ||
---|---|---|
127 | 127 |
|
128 | 128 |
inW = false |
129 | 129 |
String word = wordBuffer.toString().trim() |
130 |
if (!other && word.startsWith("*")) { |
|
131 |
//close current Turn and start a 'other' Turn |
|
132 |
writer.writeEndElement() // current Turn |
|
133 |
writer.writeCharacters("\n") |
|
134 |
|
|
135 |
def tmpInfos = new LinkedHashMap() |
|
136 |
for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr] |
|
137 |
tmpInfos["orig-speaker"] = turnInfos["speaker"] |
|
138 |
|
|
139 |
if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { // the current speaker is not the primary speaker |
|
140 |
tmpInfos["speaker"] = otherNonPrimarySpeakerId |
|
130 |
if (word.startsWith("*")) { |
|
131 |
if (other) { |
|
132 |
println "Warning: found a starting * when one 'other' is already started at "+getLocation() |
|
141 | 133 |
} else { |
142 |
tmpInfos["speaker"] = primarySpeakerId |
|
134 |
//close current Turn and start a 'other' Turn |
|
135 |
writer.writeEndElement() // current Turn |
|
136 |
writer.writeCharacters("\n") |
|
137 |
|
|
138 |
def tmpInfos = new LinkedHashMap() |
|
139 |
for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr] |
|
140 |
tmpInfos["orig-speaker"] = turnInfos["speaker"] |
|
141 |
|
|
142 |
if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { // the current speaker is not the primary speaker |
|
143 |
tmpInfos["speaker"] = otherNonPrimarySpeakerId |
|
144 |
} else { |
|
145 |
tmpInfos["speaker"] = primarySpeakerId |
|
146 |
} |
|
147 |
tmpInfos["startTime"] = currentTime |
|
148 |
writer.writeStartElement("Turn") |
|
149 |
for (String attr : tmpInfos.keySet()) { |
|
150 |
writer.writeAttribute(attr, tmpInfos[attr]) |
|
151 |
} |
|
152 |
|
|
153 |
other = true |
|
154 |
word = word.substring(1) |
|
143 | 155 |
} |
144 |
tmpInfos["startTime"] = currentTime |
|
145 |
writer.writeStartElement("Turn") |
|
146 |
for (String attr : tmpInfos.keySet()) { |
|
147 |
writer.writeAttribute(attr, tmpInfos[attr]) |
|
148 |
} |
|
149 |
|
|
150 |
other = true |
|
151 |
word = word.substring(1) |
|
152 | 156 |
} |
153 | 157 |
|
154 | 158 |
boolean shouldCloseOtherTurn = false; |
155 |
if (other && word.endsWith("*")) { |
|
156 |
shouldCloseOtherTurn = true; |
|
157 |
|
|
158 |
word = word.substring(0, word.length()-1) |
|
159 |
other = false |
|
159 |
if (word.endsWith("*")) { |
|
160 |
if (other) { |
|
161 |
shouldCloseOtherTurn = true; |
|
162 |
|
|
163 |
word = word.substring(0, word.length()-1) |
|
164 |
other = false |
|
165 |
} else { |
|
166 |
println "Warning: found a ending * when one 'other' is not started at "+getLocation() |
|
167 |
} |
|
160 | 168 |
} |
161 | 169 |
|
162 | 170 |
if ("XXX".equals(word)) { // <Event desc="XXX" type="unknown" extent="next"/> |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3036) | ||
---|---|---|
18 | 18 |
@Field @Option(name="otherNonPrimarySpeakerId", usage="other non primary id of the other turns", widget="String", required=false, def="") |
19 | 19 |
String otherNonPrimarySpeakerId |
20 | 20 |
|
21 |
@Field @Option(name="nonPrimarySpeakerRegex", usage="other non primary id of the other turns", widget="String", required=false, def="") |
|
22 |
String nonPrimarySpeakerRegex |
|
21 |
@Field @Option(name="nonPrimarySpeakerIdRegex", usage="other non primary id of the other turns", widget="String", required=false, def="")
|
|
22 |
String nonPrimarySpeakerIdRegex
|
|
23 | 23 |
|
24 | 24 |
@Field @Option(name="newSectionMarker", usage="section marker", widget="String", required=false, def="*#") |
25 | 25 |
String newSectionMarker |
... | ... | |
52 | 52 |
|
53 | 53 |
File sectionsDirectory = new File(resultDirectory, "sections") |
54 | 54 |
sectionsDirectory.mkdir() |
55 |
gse.runMacro(org.txm.macro.transcription.SegmentTRSInSectionFromMarkerMacro, ["trsDirectory":trsDirectory, "resultDirectory":sectionsDirectory, "newSectionMarker":newSectionMarker]) |
|
55 |
gse.runMacro(org.txm.macro.transcription.SegmentTRSInSectionFromMarkerMacro, ["trsDirectory":trsDirectory, "resultDirectory":sectionsDirectory, "newSectionMarker":newSectionMarker, "debug":debug])
|
|
56 | 56 |
|
57 | 57 |
|
58 |
println "CREATE THE 'OTHER' TURNS..."
|
|
58 |
println "CREATING THE 'OTHER' TURNS..."
|
|
59 | 59 |
|
60 | 60 |
trsFiles = sectionsDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")} |
61 | 61 |
if (trsFiles.size() == 0) { |
... | ... | |
75 | 75 |
|
76 | 76 |
if (!fixer.process(outFile)) { |
77 | 77 |
println "WARNING: ERROR WHILE PROCESSING: "+file |
78 |
if (debug) {println "DEBUG ACTIVATED -> STOP"; return;} |
|
78 | 79 |
} |
79 | 80 |
} |
80 | 81 |
cpb.done() |
... | ... | |
93 | 94 |
cpb.tick() |
94 | 95 |
|
95 | 96 |
File outFile = new File(resultDirectory, file.getName()) |
96 |
RecodeSpeakers fixer = new RecodeSpeakers(file, outFile, nonPrimarySpeakerRegex, null, otherNonPrimarySpeakerId, otherNonPrimarySpeakerId) |
|
97 |
RecodeSpeakers fixer = new RecodeSpeakers(file, outFile, nonPrimarySpeakerIdRegex, null, otherNonPrimarySpeakerId, otherNonPrimarySpeakerId)
|
|
97 | 98 |
|
98 | 99 |
if (!fixer.process()) { |
99 | 100 |
println "WARNING: ERROR WHILE PROCESSING: "+file |
101 |
if (debug) {println "DEBUG ACTIVATED -> STOP"; return;} |
|
100 | 102 |
} |
101 | 103 |
} |
102 | 104 |
cpb.done() |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/SegmentTRSInSectionFromMarkerMacro.groovy (revision 3036) | ||
---|---|---|
8 | 8 |
import org.txm.utils.logger.* |
9 | 9 |
|
10 | 10 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
11 |
File trsDirectory; |
|
11 |
File trsDirectory;
|
|
12 | 12 |
|
13 | 13 |
@Field @Option(name="resultDirectory", usage="Dossier résultat TRS", widget="Folder", required=true, def="") |
14 |
File resultDirectory; |
|
14 |
File resultDirectory;
|
|
15 | 15 |
|
16 | 16 |
@Field @Option(name="newSectionMarker", usage="The marker, spaces included", widget="String", required=true, def=" *#") |
17 |
def newSectionMarker |
|
17 |
def newSectionMarker
|
|
18 | 18 |
|
19 |
@Field @Option(name="debug", usage="activate debug messages", widget="Boolean", required=true, def="false") |
|
20 |
boolean debug |
|
19 | 21 |
|
20 | 22 |
if (!ParametersDialog.open(this)) return; |
21 | 23 |
|
... | ... | |
57 | 59 |
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size()) |
58 | 60 |
for (File trsFile : trsFiles) { |
59 | 61 |
cpb.tick() |
60 |
|
|
62 |
|
|
61 | 63 |
// Open input file |
62 | 64 |
def slurper = new groovy.util.XmlParser(false, true, true); |
63 | 65 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration |
... | ... | |
84 | 86 |
|
85 | 87 |
for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) { |
86 | 88 |
def turn = turns[iTurn] |
89 |
def previousTurn = turn |
|
90 |
def newTurn = null |
|
87 | 91 |
def start = Float.parseFloat(turn.@startTime) |
88 | 92 |
def end = Float.parseFloat(turn.@endTime) |
89 | 93 |
|
90 |
String content = turn.text().trim() |
|
91 |
println "TURN: "+content |
|
92 |
if (content.startsWith(newSectionMarker)) { |
|
93 |
println "NEW SECTION !" |
|
94 |
previousSection = currentSection |
|
95 |
currentSection = new Node(trsEpisode, "Section", new LinkedHashMap(["type":newSectionMarker, "startTime":turn.@startTime, "endTime":previousSection.attributes()["endTime"]])) |
|
94 |
if (debug) println "TURN: " |
|
95 |
|
|
96 |
def children = turn.children() |
|
97 |
for (int i = 0 ; i < children.size() ; i++) { |
|
98 |
def node = children[i] |
|
96 | 99 |
|
97 |
previousSection.attributes()["endTime"] = turn.@startTime |
|
98 |
turns[iTurn-1].attributes()["endTime"] = turn.@startTime |
|
100 |
String content = null; |
|
101 |
if (node instanceof String) { |
|
102 |
content = node |
|
103 |
} else if (node instanceof groovy.util.Node && node.name() == "w" && node.text().contains(newSectionMarker)) { |
|
104 |
content = node.text().trim() |
|
105 |
start = Float.parseFloat(node.@time) |
|
106 |
} |
|
99 | 107 |
|
100 |
def children = turn.children() |
|
101 |
for (int i = 0 ; i < children.size() ; i++) { |
|
102 |
def node = children[i] |
|
103 |
if (node instanceof String) { |
|
104 |
println "FIXING: $node " |
|
105 |
children.remove(i) |
|
106 |
i-- |
|
107 |
} else if (node instanceof groovy.util.Node && node.name() == "w" && node.text().contains(newSectionMarker)) { |
|
108 |
println "FIXING: w $node " |
|
109 |
children.remove(node) |
|
108 |
if (content.equals(newSectionMarker)) { |
|
109 |
if (debug) println "NEW SECTION !" |
|
110 |
previousSection = currentSection |
|
111 |
currentSection = new Node(trsEpisode, "Section", new LinkedHashMap(["type":newSectionMarker, "startTime":turn.@startTime, "endTime":previousSection.attributes()["endTime"]])) |
|
112 |
|
|
113 |
previousSection.attributes()["endTime"] = start |
|
114 |
previousTurn.attributes()["endTime"] = turn.@startTime |
|
115 |
|
|
116 |
if (i > 0) { // The mark is not at the begining of the turn |
|
117 |
|
|
118 |
newTurn = new Node(currentSection, "Turn", new LinkedHashMap()) |
|
119 |
for (def att : turn.attributes()) { |
|
120 |
newTurn.attributes()[att.getKey()] = att.getValue() |
|
110 | 121 |
} |
122 |
newTurn.attributes()["type"] = newSectionMarker |
|
123 |
newTurn.attributes()["startTime"] = start |
|
124 |
turn.attributes()["endTime"] = start |
|
125 |
|
|
111 | 126 |
} |
127 |
|
|
128 |
children.remove(i) // remove the mark |
|
129 |
i-- |
|
130 |
|
|
131 |
} else if (newTurn != null) { |
|
132 |
turn.children().remove(i) |
|
133 |
i-- |
|
134 |
newTurn.children().add(node) |
|
135 |
} |
|
136 |
previousTurn = turn |
|
112 | 137 |
} |
113 | 138 |
|
114 | 139 |
trsSection.remove(turn) |
Formats disponibles : Unified diff