27 |
27 |
|
28 |
28 |
String otherNonPrimarySpeakerId = "other"
|
29 |
29 |
|
|
30 |
def warnings = []
|
|
31 |
|
30 |
32 |
public CreateTheOtherTurns(File xmlfile, String primarySpeakerIdRegexString, String otherNonPrimarySpeakerId, Boolean debug) {
|
31 |
33 |
super(xmlfile)
|
32 |
34 |
this.debug = debug
|
... | ... | |
120 |
122 |
super.processCharacters();
|
121 |
123 |
}
|
122 |
124 |
}
|
123 |
|
|
|
125 |
|
124 |
126 |
protected void writeWord(String word) {
|
125 |
127 |
writer.writeStartElement("w") // start the initial word
|
126 |
128 |
for (String attr : wInfos.keySet() ) {
|
... | ... | |
129 |
131 |
writer.writeCharacters(word)
|
130 |
132 |
writer.writeEndElement() // w
|
131 |
133 |
}
|
132 |
|
|
133 |
|
def startOtherReg = /^(.*)\*([^\p{Zs}]+.*)$/
|
134 |
|
def endOtherReg = /^(.*[^\p{Zs}]+)\*(.*)$/
|
|
134 |
|
|
135 |
def startOtherReg = /^\*([^*])?([^\p{Zs}]+.*)$/
|
|
136 |
def endOtherReg = /^(.*[^\p{Zs}]+)\*([^*])?$/
|
|
137 |
def startAndEndOtherReg = /^([^*])?\*(.*[^\p{Zs}]+)\*([^*])?$/
|
135 |
138 |
String previousOtherStarting = "<none>"
|
136 |
139 |
@Override
|
137 |
140 |
protected void processEndElement() throws XMLStreamException {
|
... | ... | |
139 |
142 |
|
140 |
143 |
inW = false
|
141 |
144 |
String word = wordBuffer.toString().trim()
|
|
145 |
String wordToWrite = word
|
|
146 |
boolean shouldCloseOtherTurn = false;
|
|
147 |
|
|
148 |
def m0 = word =~ startAndEndOtherReg
|
|
149 |
if (m0.matches()) {
|
|
150 |
if (other) {
|
|
151 |
warnings << getLocation(true, false, false)+" with $word: Found a starting&ending * when one 'other' have been started at "+previousOtherStarting
|
|
152 |
}
|
|
153 |
// else {
|
|
154 |
|
|
155 |
if (debug) println "- ligne "+parser.getLocation().getLineNumber()+" : ouverture&fermeture de other avec '$word' -> tours '$turnInfos'"
|
|
156 |
|
|
157 |
writer.writeEndElement() // current Turn
|
|
158 |
writer.writeCharacters("\n")
|
|
159 |
|
|
160 |
def tmpInfos = new LinkedHashMap()
|
|
161 |
for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr]
|
|
162 |
tmpInfos["orig-speaker"] = turnInfos["speaker"]
|
|
163 |
|
|
164 |
if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { // the current speaker is not the primary speaker
|
|
165 |
tmpInfos["speaker"] = otherNonPrimarySpeakerId
|
|
166 |
} else {
|
|
167 |
tmpInfos["speaker"] = primarySpeakerId
|
|
168 |
}
|
|
169 |
tmpInfos["startTime"] = currentTime
|
|
170 |
writer.writeStartElement("Turn")
|
|
171 |
for (String attr : tmpInfos.keySet()) {
|
|
172 |
writer.writeAttribute(attr, tmpInfos[attr])
|
|
173 |
}
|
|
174 |
writer.writeCharacters("\n")
|
|
175 |
|
|
176 |
writer.writeStartElement("Sync")
|
|
177 |
writer.writeAttribute("time", tmpInfos["startTime"])
|
|
178 |
writer.writeCharacters("\n")
|
|
179 |
writer.writeEndElement()
|
|
180 |
|
|
181 |
String group1 = m0.group(1)
|
|
182 |
if (group1 != null && group1.length() > 0) { // write heading chars before the marker
|
|
183 |
writeWord(group1)
|
|
184 |
writer.writeCharacters("\n")
|
|
185 |
}
|
|
186 |
|
|
187 |
shouldCloseOtherTurn = true;
|
|
188 |
wordToWrite = m0.group(2)
|
|
189 |
other = false
|
|
190 |
//}
|
|
191 |
}
|
|
192 |
|
142 |
193 |
def m1 = word =~ startOtherReg
|
143 |
194 |
|
144 |
|
if (m1.matches()) {
|
|
195 |
if (!m0.matches() && m1.matches()) { // not and start&end but only a start
|
|
196 |
|
145 |
197 |
if (other) {
|
146 |
|
println "Warning at "+getLocation(true, false, false)+" with $word: Found a starting * when one 'other' have been started at "+previousOtherStarting
|
147 |
|
} else {
|
148 |
|
if (debug) println "- ligne "+parser.getLocation().getLineNumber()+" : ouverture de other avec '$word' -> tours '$turnInfos'"
|
149 |
|
//close current Turn and start a 'other' Turn
|
150 |
|
previousOtherStarting = ["word="+word+ " location="+getLocation(true, false, false)]
|
151 |
|
String group1 = m1.group(1)
|
152 |
|
if (group1.length() > 0) {
|
153 |
|
writeWord(group1)
|
154 |
|
writer.writeCharacters("\n")
|
155 |
|
}
|
156 |
|
|
|
198 |
warnings << getLocation(true, false, false)+" with $word: Found a starting * when one 'other' have been started at "+previousOtherStarting
|
|
199 |
}
|
|
200 |
// else {
|
|
201 |
if (debug) println "- ligne "+parser.getLocation().getLineNumber()+" : ouverture de other avec '$word' -> tours '$turnInfos'"
|
|
202 |
//close current Turn and start a 'other' Turn
|
|
203 |
previousOtherStarting = ["word="+word+ " location="+getLocation(true, false, false)]
|
|
204 |
String group1 = m1.group(1)
|
|
205 |
if (group1 != null && group1.length() > 0) { // write heading chars before the marker
|
|
206 |
writeWord(group1)
|
|
207 |
writer.writeCharacters("\n")
|
|
208 |
}
|
|
209 |
|
|
210 |
//if (other) { // don't restart a Turn if already in a Other Turn
|
157 |
211 |
writer.writeEndElement() // current Turn
|
158 |
212 |
writer.writeCharacters("\n")
|
159 |
213 |
|
... | ... | |
177 |
231 |
writer.writeAttribute("time", tmpInfos["startTime"])
|
178 |
232 |
writer.writeCharacters("\n")
|
179 |
233 |
writer.writeEndElement()
|
180 |
|
|
181 |
|
other = true
|
182 |
|
word = m1.group(2)
|
183 |
|
}
|
|
234 |
//}
|
|
235 |
|
|
236 |
other = true
|
|
237 |
wordToWrite = m1.group(2)
|
|
238 |
//}
|
184 |
239 |
}
|
185 |
240 |
|
186 |
|
boolean shouldCloseOtherTurn = false;
|
187 |
241 |
def m2 = word =~ endOtherReg
|
188 |
|
if (m2.matches()) {
|
|
242 |
if (!m1.matches() && !m0.matches() && m2.matches()) {
|
189 |
243 |
if (debug) println "- ligne "+parser.getLocation().getLineNumber()+" : fermeture de other avec '$word' -> tours '$turnInfos'"
|
190 |
|
previousOtherStarting = ["word="+word+ " location="+getLocation(true, false, false)]
|
191 |
|
if (other) {
|
192 |
|
shouldCloseOtherTurn = true;
|
193 |
|
|
194 |
|
word = m2.group(1)
|
195 |
|
other = false
|
196 |
|
} else {
|
197 |
|
println "Warning at "+getLocation(true, false, false)+" with $word: Found a closing * when one 'other' have been closed at "+previousOtherStarting
|
|
244 |
|
|
245 |
if (!other) {
|
|
246 |
warnings << getLocation(true, false, false)+" with '$word': Found a closing * when one 'other' have been closed at "+previousOtherStarting
|
198 |
247 |
}
|
|
248 |
|
|
249 |
previousOtherStarting = ["word='"+wordToWrite+ "' location="+getLocation(true, false, false)]
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
shouldCloseOtherTurn = true;
|
|
254 |
|
|
255 |
wordToWrite = m2.group(1)
|
199 |
256 |
}
|
200 |
257 |
|
201 |
|
if (word.length() > 0) {
|
202 |
|
writeWord(word)
|
|
258 |
if (wordToWrite.length() > 0) {
|
|
259 |
writeWord(wordToWrite)
|
203 |
260 |
}
|
204 |
261 |
|
205 |
262 |
if (shouldCloseOtherTurn) {
|
... | ... | |
222 |
279 |
writer.writeCharacters("\n")
|
223 |
280 |
writer.writeEndElement()
|
224 |
281 |
|
225 |
|
if (m2.group(2).length() > 0) {
|
|
282 |
if (m0.matches() && m0.group(3) != null && m0.group(3).length() > 0) {
|
|
283 |
writeWord(m0.group(3))
|
|
284 |
} else if (m2.matches() && m2.group(2) != null && m2.group(2).length() > 0) {
|
226 |
285 |
writeWord(m2.group(2))
|
227 |
286 |
}
|
228 |
287 |
|