Révision 3485
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 3485) | ||
---|---|---|
585 | 585 |
break; |
586 | 586 |
case "w": |
587 | 587 |
// concat spk id and ref |
588 |
String isEnq = (interviewers != null && interviewers.matches(u_name))?"*":"";
|
|
588 |
String isEnq = (interviewers != null && interviewers.contains(u_name))?"*":"";
|
|
589 | 589 |
String ref = (u_name+", "+formatedTime+""+isEnq) |
590 | 590 |
if (ADD_TEXTID_TO_REF) ref = textid+", "+ref |
591 | 591 |
vForm +="\t"+u_name+"\t"+ref |
... | ... | |
615 | 615 |
vForm = vForm.replaceAll("\n", "").replaceAll("&", "&").replaceAll("<", "<"); |
616 | 616 |
|
617 | 617 |
if (interviewers != null && !indexInterviewer) { // we must remove some words |
618 |
if (!interviewers.matches(u_name)) { // keep what is now an interviewer
|
|
618 |
if (!interviewers.contains(u_name)) { // keep what is now an interviewer
|
|
619 | 619 |
output.write(vForm+"\t"+wordid+vAna+"\n"); |
620 | 620 |
} |
621 | 621 |
} else { |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 3485) | ||
---|---|---|
922 | 922 |
|
923 | 923 |
writer.writeStartElement("span"); |
924 | 924 |
writer.writeAttribute("class", "spk"); |
925 |
bold = interviewers != null && interviewers.matches(spk)
|
|
925 |
bold = interviewers != null && interviewers.contains(spk)
|
|
926 | 926 |
spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2'); |
927 | 927 |
if (overlapping) { |
928 | 928 |
writer.writeCharacters("// ") |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/txt/SearchInDirectoryMacro.groovy (revision 3485) | ||
---|---|---|
21 | 21 |
String regexp; |
22 | 22 |
@Field @Option(name="encoding", usage="File encoding", widget="String", required=false, def="UTF-8") |
23 | 23 |
String encoding; |
24 |
@Field @Option(name="showLines", usage="File encoding", widget="Boolean", required=false, def="true") |
|
25 |
Boolean showLines; |
|
24 | 26 |
|
25 | 27 |
if (!ParametersDialog.open(this)) return; |
26 | 28 |
|
... | ... | |
38 | 40 |
} |
39 | 41 |
} |
40 | 42 |
|
41 |
println inputfile.getName() + " "+lines.size() + " match" +((lines.size() > 1)?"s":"") |
|
42 | 43 |
if (lines.size() > 0) { |
43 |
for (String s : lines ) println s |
|
44 |
println inputfile.getName() + " "+lines.size() + " match" +((lines.size() > 1)?"s":"") |
|
45 |
if (showLines && lines.size() > 0) { |
|
46 |
for (String s : lines ) println s |
|
47 |
} |
|
44 | 48 |
} |
49 |
|
|
45 | 50 |
} |
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3485) | ||
---|---|---|
11 | 11 |
|
12 | 12 |
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="") |
13 | 13 |
File resultDirectory |
14 |
|
|
14 |
|
|
15 | 15 |
@Field @Option(name="primarySpeakerIdRegex", usage="speaker ID of the primary speaker", widget="String", required=false, def="") |
16 | 16 |
String primarySpeakerIdRegex |
17 | 17 |
|
18 | 18 |
@Field @Option(name="otherNonPrimarySpeakerId", usage="other non primary id of the other turns", widget="String", required=false, def="") |
19 | 19 |
String otherNonPrimarySpeakerId |
20 |
|
|
20 |
|
|
21 | 21 |
@Field @Option(name="nonPrimarySpeakerIdRegex", usage="other non primary id of the other turns", widget="String", required=false, def="") |
22 | 22 |
String nonPrimarySpeakerIdRegex |
23 |
|
|
24 |
@Field @Option(name="supplementarySpeakerIdRegex", usage="supplementary locutor ids regex", widget="String", required=false, def="UX.+") |
|
25 |
String supplementarySpeakerIdRegex |
|
26 |
|
|
27 |
@Field @Option(name="supplementarySpeakerId", usage="supplementary locutor id to normalize", widget="String", required=false, def="") |
|
28 |
String supplementarySpeakerId |
|
29 |
|
|
23 |
|
|
24 |
//@Field @Option(name="supplementarySpeakerIdRegex", usage="supplementary locutor ids regex", widget="String", required=false, def="UX.+") |
|
25 |
// String supplementarySpeakerIdRegex |
|
26 |
// |
|
27 |
//@Field @Option(name="supplementarySpeakerId", usage="supplementary locutor id to normalize", widget="String", required=false, def="") |
|
28 |
// String supplementarySpeakerId |
|
29 |
|
|
30 |
@Field @Option(name="spearkerReplacementTableFile", usage="2 columns (from, to) table file", widget="File", required=false, def="replacements.tsv") |
|
31 |
File spearkerReplacementTableFile |
|
32 |
|
|
30 | 33 |
@Field @Option(name="newSectionMarker", usage="section marker", widget="String", required=false, def="*#") |
31 | 34 |
String newSectionMarker |
32 | 35 |
|
33 | 36 |
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false") |
34 | 37 |
Boolean debug |
35 |
|
|
38 |
|
|
36 | 39 |
@Field @Option(name="cleanWorkingDirectories", usage="Clean working directories is selected", widget="Boolean", required=false, def="true") |
37 | 40 |
Boolean cleanWorkingDirectories |
38 |
|
|
41 |
|
|
39 | 42 |
if (!ParametersDialog.open(this)) return; |
40 | 43 |
|
41 | 44 |
if (resultDirectory.equals(vocapiaDirectory)) { |
... | ... | |
124 | 127 |
return false |
125 | 128 |
} |
126 | 129 |
|
130 |
def replacements = [] |
|
131 |
if (spearkerReplacementTableFile.exists()) { |
|
132 |
for (String line : spearkerReplacementTableFile.readLines("UTF-8")) { |
|
133 |
def split = line.split("\t", 2) |
|
134 |
if (split.length == 2) { |
|
135 |
replacements << [split[0].replaceAll("<SPACE>", " ").replaceAll("<BOM>", "\uFEFF"), split[1]] |
|
136 |
} |
|
137 |
} |
|
138 |
} |
|
139 |
|
|
127 | 140 |
cpb = new ConsoleProgressBar(trsFiles.size()) |
128 | 141 |
for (File file : trsFiles) { |
129 | 142 |
if (debug) println "== $file ==" |
... | ... | |
140 | 153 |
if (debug) {println "DEBUG ACTIVATED -> STOP"; return;} |
141 | 154 |
} |
142 | 155 |
|
156 |
|
|
157 |
File tmpFile2 = new File(resultDirectory, "tmp2.xml") |
|
158 |
for (def replacement : replacements) { |
|
159 |
|
|
160 |
RecodeSpeakers fixer2 = new RecodeSpeakers(tmpFile, tmpFile2, replacement[0], null, replacement[1], replacement[1]) |
|
161 |
fixer2.debug = debug |
|
162 |
|
|
163 |
if (!fixer2.process()) { |
|
164 |
println "WARNING: ERROR WHILE PROCESSING: "+file |
|
165 |
if (debug) {println "DEBUG ACTIVATED -> STOP"; return;} |
|
166 |
} |
|
167 |
tmpFile.delete(); // This tmp file must be removed for the next operation |
|
168 |
tmpFile2.renameTo(tmpFile) |
|
169 |
} |
|
170 |
|
|
143 | 171 |
File outFile2 = new File(resultDirectory, file.getName()) |
144 |
RecodeSpeakers fixer2 = new RecodeSpeakers(tmpFile, outFile2, supplementarySpeakerIdRegex, null, supplementarySpeakerId, supplementarySpeakerId) |
|
145 |
fixer2.debug = debug |
|
172 |
tmpFile.renameTo(outFile2) |
|
146 | 173 |
|
147 |
if (!fixer2.process()) { |
|
148 |
println "WARNING: ERROR WHILE PROCESSING: "+file |
|
149 |
if (debug) {println "DEBUG ACTIVATED -> STOP"; return;} |
|
150 |
} |
|
151 |
tmpFile.delete(); // This tmp file must be removed for the next operation |
|
152 | 174 |
} |
153 | 175 |
cpb.done() |
154 | 176 |
|
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 3485) | ||
---|---|---|
81 | 81 |
break; |
82 | 82 |
case "Speaker": // <Speaker ch="1" dur="531.38" gender="X" spkid="Enquêtrice" lang="fre" lconf="1.00" nw="1586" tconf="0.95"/> -> <Speaker id="spk1" name="enq4" check="no" dialect="native" accent="" scope="local"/> |
83 | 83 |
|
84 |
String spkid = parser.getAttributeValue(null, "spkid").trim().replaceAll("[\\s()]", "")
|
|
84 |
String spkid = parser.getAttributeValue(null, "spkid").trim().replaceAll("[\\s\\uFEFF]", "")
|
|
85 | 85 |
|
86 | 86 |
writer.writeStartElement("Speaker") |
87 | 87 |
writer.writeAttribute("id", spkid) |
Formats disponibles : Unified diff