Révision 3028
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/CreateTheOtherTurns.groovy (revision 3028) | ||
---|---|---|
15 | 15 |
import java.util.Map.Entry |
16 | 16 |
import java.util.regex.Pattern |
17 | 17 |
|
18 |
class FixTranscription extends XMLProcessor {
|
|
18 |
class CreateTheOtherTurns extends XMLProcessor {
|
|
19 | 19 |
|
20 | 20 |
LocalNamesHookActivator activator; |
21 | 21 |
IdentityHook hook; |
... | ... | |
27 | 27 |
|
28 | 28 |
String otherNonPrimaryLocutor = "other" |
29 | 29 |
|
30 |
public FixTranscription(File xmlfile, String primarySpeakerIdPrefix, String otherNonPrimaryLocutor, Boolean debug) {
|
|
30 |
public CreateTheOtherTurns(File xmlfile, String primarySpeakerIdRegexString, String otherNonPrimaryLocutor, Boolean debug) {
|
|
31 | 31 |
super(xmlfile) |
32 | 32 |
this.debug = debug |
33 | 33 |
|
34 | 34 |
this.otherNonPrimaryLocutor = otherNonPrimaryLocutor |
35 |
if (primarySpeakerIdPrefix != null && primarySpeakerIdPrefix.length() > 0) {
|
|
35 |
if (primarySpeakerIdRegexString != null && primarySpeakerIdRegexString.length() > 0) {
|
|
36 | 36 |
String id = FileUtils.stripExtension(xmlfile) |
37 | 37 |
|
38 |
this.primarySpeakerIdRegex = /$primarySpeakerIdPrefix.*/
|
|
38 |
this.primarySpeakerIdRegex = /$primarySpeakerIdRegexString/
|
|
39 | 39 |
|
40 | 40 |
def rez = (id =~ primarySpeakerIdRegex).findAll() |
41 |
def rez2 = (id =~ /$primarySpeakerIdPrefix/).findAll()
|
|
41 |
def rez2 = (id =~ /$primarySpeakerIdRegex/).findAll()
|
|
42 | 42 |
if (rez2.size() != 1) { |
43 | 43 |
if (debug) println "WARNING: found the ${rez2.size()} matches of primary speaker prefix in the '$id' file name" |
44 | 44 |
this.primarySpeakerIdRegex = null |
... | ... | |
48 | 48 |
} |
49 | 49 |
} |
50 | 50 |
|
51 |
activator = new LocalNamesHookActivator<>(hook, ["w", "Turn", "Sync"]); |
|
51 |
activator = new LocalNamesHookActivator<>(hook, ["Speaker", "w", "Turn", "Sync"]);
|
|
52 | 52 |
|
53 | 53 |
hook = new IdentityHook("word_hook", activator, this) { |
54 | 54 |
|
... | ... | |
74 | 74 |
|
75 | 75 |
@Override |
76 | 76 |
protected void processStartElement() throws XMLStreamException, IOException { |
77 |
if (localname.equals("Turn")) { |
|
77 |
if (localname.equals("Speaker")) { // find out the main speaker |
|
78 |
String id = parser.getAttributeValue(null, "id")// id |
|
79 |
if (id ==~ primarySpeakerIdRegex) { |
|
80 |
primarySpeakerId = id |
|
81 |
} |
|
82 |
super.processStartElement(); |
|
83 |
} else if (localname.equals("Turn")) { |
|
78 | 84 |
// store values |
79 | 85 |
inTurn = true; |
80 | 86 |
turnInfos.clear() |
... | ... | |
130 | 136 |
for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr] |
131 | 137 |
tmpInfos["orig-speaker"] = turnInfos["speaker"] |
132 | 138 |
|
133 |
if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { |
|
134 |
tmpInfos["speaker"] = "other"
|
|
139 |
if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { // the current speaker is not the primary speaker
|
|
140 |
tmpInfos["speaker"] = otherNonPrimaryLocutor
|
|
135 | 141 |
} else { |
136 | 142 |
tmpInfos["speaker"] = primarySpeakerId |
137 | 143 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3028) | ||
---|---|---|
4 | 4 |
import java.time.format.DateTimeFormatter |
5 | 5 |
import org.txm.utils.* |
6 | 6 |
import org.txm.utils.logger.* |
7 |
import org.txm.macro.transcriber.RenameSpeaker
|
|
7 |
import org.txm.macro.transcription.RenameSpeaker
|
|
8 | 8 |
|
9 |
@Field @Option(name="trsDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
|
|
10 |
File trsDirectory
|
|
9 |
@Field @Option(name="vocapiaDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
|
|
10 |
File vocapiaDirectory
|
|
11 | 11 |
|
12 | 12 |
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="") |
13 | 13 |
File resultDirectory |
14 | 14 |
|
15 |
@Field @Option(name="primarySpeakerIdPrefix", usage="speaker ID of the primary speaker", widget="String", required=false, def="") |
|
16 |
String primarySpeakerIdPrefix |
|
15 |
@Field @Option(name="primarySpeakerIdRegex", usage="speaker ID of the primary speaker", widget="String", required=false, def="") |
|
16 |
String primarySpeakerIdRegex |
|
17 |
|
|
17 | 18 |
@Field @Option(name="otherNonPrimaryLocutor", usage="other non primary id of the other turns", widget="String", required=false, def="") |
18 | 19 |
String otherNonPrimaryLocutor |
19 | 20 |
|
... | ... | |
25 | 26 |
|
26 | 27 |
if (!ParametersDialog.open(this)) return; |
27 | 28 |
|
28 |
if (resultDirectory.equals(trsDirectory)) {
|
|
29 |
println "Result directory must differs from trsDirectory: "+trsDirectory
|
|
29 |
if (resultDirectory.equals(vocapiaDirectory)) {
|
|
30 |
println "Result directory must differs from vocapiaDirectory: "+vocapiaDirectory
|
|
30 | 31 |
return false; |
31 | 32 |
} |
32 | 33 |
|
33 | 34 |
resultDirectory.mkdirs(); |
34 | 35 |
|
35 |
def trsFiles = [] |
|
36 |
|
|
37 |
println "Processing TRS directory: $trsDirectory" |
|
38 |
for (File file : trsDirectory.listFiles()) { |
|
39 |
if (file.getName().toLowerCase().endsWith(".trs")) { |
|
40 |
trsFiles << file |
|
41 |
} |
|
42 |
} |
|
36 |
println "PREPARING VOCAPIA FILES TO TRS FILES..." |
|
37 |
File trsDirectory = new File(resultDirectory, "vocapia2trs") |
|
38 |
trsDirectory.mkdir() |
|
39 |
gse.runMacro(org.txm.macro.transcription.Vocapia2TranscriberMacro, ["vocapiaDirectory":vocapiaDirectory, "resultDirectory":trsDirectory]) |
|
43 | 40 |
|
41 |
|
|
42 |
|
|
43 |
trsFiles = trsDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")} |
|
44 | 44 |
if (trsFiles.size() == 0) { |
45 |
println "No XML file found for parameters trsDirectory=$trsDirectory"
|
|
45 |
println "No XML file found in $trsDirectory"
|
|
46 | 46 |
return false |
47 | 47 |
} |
48 | 48 |
|
49 | 49 |
println "CREATE THE 'OTHER' TURNS..." |
50 |
|
|
50 |
File otherDirectory = new File(resultDirectory, "otherturns") |
|
51 |
otherDirectory.mkdir() |
|
51 | 52 |
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size()) |
52 | 53 |
for (File file : trsFiles) { |
53 | 54 |
cpb.tick() |
54 | 55 |
|
55 |
CreateTheOtherTurns fixer = new CreateTheOtherTurns(file, primarySpeakerIdPrefix, otherNonPrimaryLocutor, debug)
|
|
56 |
CreateTheOtherTurns fixer = new CreateTheOtherTurns(file, primarySpeakerIdRegex, otherNonPrimaryLocutor, debug)
|
|
56 | 57 |
String name = FileUtils.stripExtension(file) |
57 |
File outFile = new File(resultDirectory, name+".trs")
|
|
58 |
File outFile = new File(otherDirectory, name+".trs")
|
|
58 | 59 |
|
59 | 60 |
if (!fixer.process(outFile)) { |
60 | 61 |
println "WARNING: ERROR WHILE PROCESSING: "+file |
... | ... | |
62 | 63 |
} |
63 | 64 |
cpb.done() |
64 | 65 |
|
66 |
trsFiles = otherDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")} |
|
67 |
if (trsFiles.size() == 0) { |
|
68 |
println "No XML file found in $otherDirectory" |
|
69 |
return false |
|
70 |
} |
|
71 |
|
|
65 | 72 |
// FIX LOCUTORS |
66 | 73 |
println "NORMALIZING LOCUTORS..." |
67 | 74 |
cpb = new ConsoleProgressBar(trsFiles.size()) |
68 |
for (File file : resultDirectory.listFiles()) {
|
|
75 |
for (File file : trsFiles) {
|
|
69 | 76 |
cpb.tick() |
70 | 77 |
|
71 |
RenameSpeaker fixer = new RenameSpeaker(file, file, nonPrimaryLocutorReplaceRegex, null, otherNonPrimaryLocutor, otherNonPrimaryLocutor) |
|
78 |
File outFile = new File(resultDirectory, file.getName()) |
|
79 |
RenameSpeaker fixer = new RenameSpeaker(file, outFile, nonPrimaryLocutorReplaceRegex, null, otherNonPrimaryLocutor, otherNonPrimaryLocutor) |
|
72 | 80 |
|
73 | 81 |
if (!fixer.process()) { |
74 | 82 |
println "WARNING: ERROR WHILE PROCESSING: "+file |
... | ... | |
76 | 84 |
} |
77 | 85 |
cpb.done() |
78 | 86 |
|
87 |
if (!debug) { |
|
88 |
otherDirectory.deleteDir() |
|
89 |
trsDirectory.deleteDir() |
|
90 |
} |
|
91 |
|
|
79 | 92 |
println "Done: "+trsFiles.size()+" files processed. Result files in $resultDirectory" |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeaker.groovy (revision 3028) | ||
---|---|---|
18 | 18 |
/** |
19 | 19 |
* Renames speakers given an id or name regex |
20 | 20 |
**/ |
21 |
public class ChangeSpeaker {
|
|
21 |
public class RenameSpeaker {
|
|
22 | 22 |
File outfile, transcriptionfile |
23 | 23 |
|
24 | 24 |
/** The doc. */ |
... | ... | |
35 | 35 |
* @param id the id |
36 | 36 |
* @param newid the newid |
37 | 37 |
*/ |
38 |
public ChangeSpeaker(File transcriptionfile, File outfile, String idRegexString, String nameRegexString, String newId, String newName) {
|
|
38 |
public RenameSpeaker(File transcriptionfile, File outfile, String idRegexString, String nameRegexString, String newId, String newName) {
|
|
39 | 39 |
|
40 | 40 |
this.transcriptionfile = transcriptionfile |
41 | 41 |
this.outfile = outfile; |
... | ... | |
93 | 93 |
} |
94 | 94 |
} |
95 | 95 |
|
96 |
|
|
97 | 96 |
def expr = XPathFactory.newInstance().newXPath().compile("//Turn"); |
98 | 97 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
99 | 98 |
|
Formats disponibles : Unified diff