Révision 3017
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/FixTranscription.groovy (revision 3017) | ||
---|---|---|
3 | 3 |
import javax.xml.stream.* |
4 | 4 |
|
5 | 5 |
import org.txm.importer.PersonalNamespaceContext |
6 |
import org.txm.utils.FileUtils |
|
6 | 7 |
import org.txm.xml.IdentityHook |
7 | 8 |
import org.txm.xml.* |
8 | 9 |
|
... | ... | |
19 | 20 |
LocalNamesHookActivator activator; |
20 | 21 |
IdentityHook hook; |
21 | 22 |
|
22 |
public FixTranscription(File xmlfile) { |
|
23 |
def primarySpeakerIdRegex |
|
24 |
String primarySpeakerId |
|
25 |
|
|
26 |
Boolean debug |
|
27 |
|
|
28 |
String otherNonPrimaryLocutor = "other" |
|
29 |
|
|
30 |
public FixTranscription(File xmlfile, String primarySpeakerIdPrefix, String otherNonPrimaryLocutor, Boolean debug) { |
|
23 | 31 |
super(xmlfile) |
32 |
this.debug = debug |
|
24 | 33 |
|
34 |
this.otherNonPrimaryLocutor = otherNonPrimaryLocutor |
|
35 |
if (primarySpeakerIdPrefix != null && primarySpeakerIdPrefix.length() > 0) { |
|
36 |
String id = FileUtils.stripExtension(xmlfile) |
|
37 |
|
|
38 |
this.primarySpeakerIdRegex = /$primarySpeakerIdPrefix.*/ |
|
39 |
|
|
40 |
def rez = (id =~ primarySpeakerIdRegex).findAll() |
|
41 |
def rez2 = (id =~ /$primarySpeakerIdPrefix/).findAll() |
|
42 |
if (rez2.size() != 1) { |
|
43 |
if (debug) println "WARNING: found the ${rez2.size()} matches of primary speaker prefix in the '$id' file name" |
|
44 |
this.primarySpeakerIdRegex = null |
|
45 |
} else { |
|
46 |
primarySpeakerId = rez[0] |
|
47 |
//if (debug) println "Detected primary speaker: $primarySpeakerId" |
|
48 |
} |
|
49 |
} |
|
50 |
|
|
25 | 51 |
activator = new LocalNamesHookActivator<>(hook, ["w", "Turn", "Sync"]); |
26 | 52 |
|
27 | 53 |
hook = new IdentityHook("word_hook", activator, this) { |
... | ... | |
103 | 129 |
def tmpInfos = new LinkedHashMap() |
104 | 130 |
for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr] |
105 | 131 |
tmpInfos["orig-speaker"] = turnInfos["speaker"] |
106 |
tmpInfos["speaker"] = "other" |
|
132 |
|
|
133 |
if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { |
|
134 |
tmpInfos["speaker"] = "other" |
|
135 |
} else { |
|
136 |
tmpInfos["speaker"] = primarySpeakerId |
|
137 |
} |
|
107 | 138 |
tmpInfos["startTime"] = currentTime |
108 | 139 |
writer.writeStartElement("Turn") |
109 | 140 |
for (String attr : tmpInfos.keySet()) { |
... | ... | |
130 | 161 |
writer.writeEndElement() // event |
131 | 162 |
word = "" // don't write the word |
132 | 163 |
} |
133 |
|
|
164 |
|
|
134 | 165 |
if (word.length() > 0) { |
135 | 166 |
|
136 | 167 |
writer.writeStartElement("w") // start the initial word |
... | ... | |
140 | 171 |
writer.writeCharacters(word) |
141 | 172 |
writer.writeEndElement() // w |
142 | 173 |
} |
143 |
|
|
174 |
|
|
144 | 175 |
if (shouldCloseOtherTurn) { |
145 | 176 |
shouldCloseOtherTurn = false; |
146 | 177 |
//close the current 'other' Turn and restart the actual Turn |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/FixTranscriptionsMacro.groovy (revision 3017) | ||
---|---|---|
6 | 6 |
import org.txm.utils.logger.* |
7 | 7 |
|
8 | 8 |
@Field @Option(name="trsFile", usage="A single vocapia XML file", widget="FileOpen", required=false, def="") |
9 |
File trsFile;
|
|
9 |
File trsFile |
|
10 | 10 |
|
11 | 11 |
@Field @Option(name="trsDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="") |
12 |
File trsDirectory;
|
|
12 |
File trsDirectory |
|
13 | 13 |
|
14 | 14 |
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="") |
15 |
File resultDirectory; |
|
15 |
File resultDirectory |
|
16 |
|
|
17 |
@Field @Option(name="primarySpeakerIdPrefix", usage="speaker ID of the primary speaker", widget="String", required=false, def="") |
|
18 |
String primarySpeakerIdPrefix |
|
16 | 19 |
|
20 |
@Field @Option(name="otherNonPrimaryLocutor", usage="other non primary id of the other turns", widget="String", required=false, def="") |
|
21 |
String otherNonPrimaryLocutor |
|
22 |
|
|
23 |
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false") |
|
24 |
Boolean debug |
|
25 |
|
|
17 | 26 |
if (!ParametersDialog.open(this)) return; |
18 | 27 |
|
19 | 28 |
if (resultDirectory.equals(trsDirectory) || (trsFile != null && trsFile.getParentFile().equals(resultDirectory))) { |
... | ... | |
44 | 53 |
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size()) |
45 | 54 |
for (File file : trsFiles) { |
46 | 55 |
cpb.tick() |
47 |
FixTranscription fixer = new FixTranscription(file) |
|
56 |
|
|
57 |
FixTranscription fixer = new FixTranscription(file, primarySpeakerIdPrefix, otherNonPrimaryLocutor, debug) |
|
48 | 58 |
String name = FileUtils.stripExtension(file) |
49 | 59 |
File outFile = new File(resultDirectory, name+".trs") |
50 | 60 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeakerFromDirectoryMacro.groovy (revision 3017) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import java.nio.charset.Charset |
|
4 |
|
|
5 |
import java.time.LocalTime |
|
6 |
import java.time.format.DateTimeFormatter |
|
7 |
import org.txm.utils.* |
|
8 |
import org.txm.utils.logger.* |
|
9 |
|
|
10 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
|
11 |
File trsDirectory; |
|
12 |
|
|
13 |
@Field @Option(name="outputTrsDirectory", usage="Dossier résultat qui contient les fichiers TRS", widget="Folder", required=true, def="") |
|
14 |
File outputTrsDirectory; |
|
15 |
|
|
16 |
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
17 |
def idRegex |
|
18 |
|
|
19 |
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
20 |
def nameRegex |
|
21 |
|
|
22 |
@Field @Option(name="newID", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
23 |
def newID |
|
24 |
|
|
25 |
@Field @Option(name="newName", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
26 |
def newName |
|
27 |
|
|
28 |
def files = trsDirectory.listFiles() |
|
29 |
if (files == null) { |
|
30 |
println "Error: no files in $trsDirectory" |
|
31 |
return false |
|
32 |
} |
|
33 |
|
|
34 |
outputTrsDirectory.mkdirs() |
|
35 |
if (!outputTrsDirectory.exists()) { |
|
36 |
println "Error: can't create $outputTrsDirectory" |
|
37 |
return false; |
|
38 |
} |
|
39 |
|
|
40 |
boolean ok = true |
|
41 |
for (File trsFile : files) { |
|
42 |
|
|
43 |
if (!trsFile.getName().toLowerCase().endsWith(".trs")) continue; |
|
44 |
|
|
45 |
File outputTrsFile = new File(outputTrsDirectory, trsFile.getName()); |
|
46 |
|
|
47 |
ok = ok && gse.runMacro(RenameSpeakerMacro, ["trsFile":trsFile, "outputTrsFile":outputTrsFile, "idRegex":idRegex, "nameRegex":nameRegex, "newID":newID, "newName":newName]) |
|
48 |
} |
|
49 |
|
|
50 |
return ok |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeaker.groovy (revision 3017) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import org.txm.scripts.importer.*; |
|
4 |
import org.txm.utils.*; |
|
5 |
import org.txm.metadatas.*; |
|
6 |
|
|
7 |
import java.io.File; |
|
8 |
|
|
9 |
import org.w3c.dom.Document; |
|
10 |
import org.w3c.dom.Element; |
|
11 |
|
|
12 |
import javax.xml.parsers.*; |
|
13 |
import javax.xml.xpath.*; |
|
14 |
import javax.xml.transform.*; |
|
15 |
import javax.xml.transform.dom.DOMSource; |
|
16 |
import javax.xml.transform.stream.StreamResult; |
|
17 |
|
|
18 |
/** |
|
19 |
* Renames speakers given an id or name regex |
|
20 |
**/ |
|
21 |
public class ChangeSpeaker { |
|
22 |
File outfile, transcriptionfile |
|
23 |
|
|
24 |
/** The doc. */ |
|
25 |
Document doc |
|
26 |
|
|
27 |
def idRegex, nameRegex |
|
28 |
String newId, newName |
|
29 |
|
|
30 |
/** |
|
31 |
* Instantiates a new change speaker. |
|
32 |
* |
|
33 |
* @param transcriptionfile the transcriptionfile |
|
34 |
* @param outfile the outfile |
|
35 |
* @param id the id |
|
36 |
* @param newid the newid |
|
37 |
*/ |
|
38 |
public ChangeSpeaker(File transcriptionfile, File outfile, String idRegexString, String nameRegexString, String newId, String newName) { |
|
39 |
|
|
40 |
this.transcriptionfile = transcriptionfile |
|
41 |
this.outfile = outfile; |
|
42 |
|
|
43 |
if (idRegexString != null && idRegexString.length() > 0) { |
|
44 |
this.idRegex = /$idRegexString/ |
|
45 |
} |
|
46 |
if (nameRegexString != null && nameRegexString.length() > 0) { |
|
47 |
this.nameRegex = /$nameRegexString/ |
|
48 |
} |
|
49 |
this.newId = newId |
|
50 |
this.newName = newName |
|
51 |
} |
|
52 |
|
|
53 |
/** |
|
54 |
* Save. |
|
55 |
* |
|
56 |
* @return true, if successful |
|
57 |
*/ |
|
58 |
public boolean process() { |
|
59 |
try { |
|
60 |
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); |
|
61 |
domFactory.setNamespaceAware(true); // never forget this! |
|
62 |
domFactory.setXIncludeAware(true); |
|
63 |
DocumentBuilder builder = domFactory.newDocumentBuilder(); |
|
64 |
doc = builder.parse(transcriptionfile); |
|
65 |
|
|
66 |
|
|
67 |
def expr2 = XPathFactory.newInstance().newXPath().compile("//Speaker"); |
|
68 |
def nodes2 = expr2.evaluate(doc, XPathConstants.NODESET); |
|
69 |
|
|
70 |
// fix speaker declarations |
|
71 |
def replacedIds = [] // list of IDs replaced, the list is used later to update the Turn locutors ids |
|
72 |
|
|
73 |
for (def node : nodes2) { |
|
74 |
if (node == null) continue; |
|
75 |
Element elem = (Element)node; |
|
76 |
|
|
77 |
String id = elem.getAttribute("id") |
|
78 |
String name = elem.getAttribute("name") |
|
79 |
|
|
80 |
if (idRegex != null && id =~ idRegex) { // patch Speaker@id |
|
81 |
if (newId != null) { |
|
82 |
elem.setAttribute("id", newId); |
|
83 |
replacedIds << id |
|
84 |
} |
|
85 |
if (newName != null) { |
|
86 |
elem.setAttribute("name", newName); |
|
87 |
} |
|
88 |
} |
|
89 |
|
|
90 |
if (nameRegex != null && id =~ nameRegex) { // patch Speaker@name |
|
91 |
if (newName != null) { |
|
92 |
elem.setAttribute("name", newName); |
|
93 |
} |
|
94 |
} |
|
95 |
} |
|
96 |
|
|
97 |
|
|
98 |
def expr = XPathFactory.newInstance().newXPath().compile("//Turn"); |
|
99 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
|
100 |
|
|
101 |
// fix speaker turns |
|
102 |
|
|
103 |
for (def node : nodes) { |
|
104 |
if (node == null) continue; |
|
105 |
|
|
106 |
Element elem = (Element)node; |
|
107 |
String id = elem.getAttribute("speaker"); |
|
108 |
|
|
109 |
if (replacedIds.contains(id)) { |
|
110 |
elem.setAttribute("speaker", newId); |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
// Création de la source DOM |
|
115 |
Source source = new DOMSource(doc); |
|
116 |
|
|
117 |
// Création du fichier de sortie |
|
118 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); |
|
119 |
Result resultat = new StreamResult(writer); |
|
120 |
|
|
121 |
// Configuration du transformer |
|
122 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
|
123 |
Transformer transformer = fabrique.newTransformer(); |
|
124 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
|
125 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
126 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); |
|
127 |
|
|
128 |
// Transformation |
|
129 |
transformer.transform(source, resultat); |
|
130 |
writer.close(); |
|
131 |
return true; |
|
132 |
} catch (Exception e) { |
|
133 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
134 |
return false; |
|
135 |
} |
|
136 |
} |
|
137 |
|
|
138 |
/** |
|
139 |
* The main method. |
|
140 |
* |
|
141 |
* @param args the arguments |
|
142 |
*/ |
|
143 |
public static void main(String[] args) { |
|
144 |
String homedir = System.getProperty("user.home") |
|
145 |
File trs1 = new File(homedir, "xml/concattrs/int18_1.trs") |
|
146 |
File trs2 = new File(homedir, "xml/concattrs/int18_1-renamed.trs") |
|
147 |
|
|
148 |
new RenameSpeaker(trs1, trs2, "spk2", null, "spk4", null).process(); |
|
149 |
|
|
150 |
} |
|
151 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeakerMacro.groovy (revision 3017) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import java.nio.charset.Charset |
|
4 |
|
|
5 |
import java.time.LocalTime |
|
6 |
import java.time.format.DateTimeFormatter |
|
7 |
import org.txm.utils.* |
|
8 |
import org.txm.utils.logger.* |
|
9 |
|
|
10 |
@Field @Option(name="trsFile", usage="Dossier qui contient les fichiers TRS", widget="FileOpen", required=true, def="") |
|
11 |
File trsFile; |
|
12 |
|
|
13 |
@Field @Option(name="outputTrsFile", usage="Dossier qui contient les fichiers TRS", widget="FileSave", required=true, def="") |
|
14 |
File outputTrsFile; |
|
15 |
|
|
16 |
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
17 |
def idRegex |
|
18 |
|
|
19 |
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
20 |
def nameRegex |
|
21 |
|
|
22 |
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
23 |
def newID |
|
24 |
|
|
25 |
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="") |
|
26 |
def newName |
|
27 |
|
|
28 |
|
|
29 |
def cs = new RenameSpeaker(trsFile, outputTrsFile, idRegex, nameRegex, newID, newName); |
|
30 |
|
|
31 |
return cs.process(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/ChangeSpeaker.groovy (revision 3017) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.scripts.importer.transcriber; |
|
29 |
|
|
30 |
import org.txm.scripts.importer.*; |
|
31 |
import org.txm.utils.*; |
|
32 |
import org.txm.metadatas.*; |
|
33 |
|
|
34 |
import java.io.File; |
|
35 |
|
|
36 |
import org.w3c.dom.Document; |
|
37 |
import org.w3c.dom.Element; |
|
38 |
|
|
39 |
import javax.xml.parsers.*; |
|
40 |
import javax.xml.xpath.*; |
|
41 |
import javax.xml.transform.*; |
|
42 |
import javax.xml.transform.dom.DOMSource; |
|
43 |
import javax.xml.transform.stream.StreamResult; |
|
44 |
|
|
45 |
// TODO: Auto-generated Javadoc |
|
46 |
/** remove part of xml given an id. */ |
|
47 |
public class ChangeSpeaker { |
|
48 |
File outfile; |
|
49 |
|
|
50 |
/** The doc. */ |
|
51 |
Document doc; |
|
52 |
|
|
53 |
/** |
|
54 |
* Instantiates a new change speaker. |
|
55 |
* |
|
56 |
* @param transcriptionfile the transcriptionfile |
|
57 |
* @param outfile the outfile |
|
58 |
* @param id the id |
|
59 |
* @param newid the newid |
|
60 |
*/ |
|
61 |
public ChangeSpeaker(File transcriptionfile, File outfile, String id, String newid) |
|
62 |
{ |
|
63 |
this.outfile = outfile; |
|
64 |
|
|
65 |
String xpath = "//Turn"; |
|
66 |
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); |
|
67 |
domFactory.setNamespaceAware(true); // never forget this! |
|
68 |
domFactory.setXIncludeAware(true); |
|
69 |
DocumentBuilder builder = domFactory.newDocumentBuilder(); |
|
70 |
doc = builder.parse(transcriptionfile); |
|
71 |
|
|
72 |
def expr = XPathFactory.newInstance().newXPath().compile(xpath); |
|
73 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
|
74 |
|
|
75 |
|
|
76 |
|
|
77 |
for(def node : nodes) |
|
78 |
{ |
|
79 |
if (node == null) continue; |
|
80 |
|
|
81 |
Element elem = (Element)node; |
|
82 |
if(elem.getAttribute("speaker").contains(id)) |
|
83 |
{ |
|
84 |
String value = elem.getAttribute("speaker"); |
|
85 |
value = value.replace(id, newid); |
|
86 |
elem.setAttribute("speaker", value); |
|
87 |
} |
|
88 |
} |
|
89 |
|
|
90 |
def expr2 = XPathFactory.newInstance().newXPath().compile("//Speaker[@id='"+id+"']"); |
|
91 |
def nodes2 = expr2.evaluate(doc, XPathConstants.NODESET); |
|
92 |
|
|
93 |
for(def node : nodes2) |
|
94 |
{ |
|
95 |
if (node == null) continue; |
|
96 |
|
|
97 |
Element elem = (Element)node; |
|
98 |
elem.setAttribute("id", newid); |
|
99 |
} |
|
100 |
save() |
|
101 |
} |
|
102 |
|
|
103 |
/** |
|
104 |
* Save. |
|
105 |
* |
|
106 |
* @return true, if successful |
|
107 |
*/ |
|
108 |
private boolean save() |
|
109 |
{ |
|
110 |
try { |
|
111 |
// Création de la source DOM |
|
112 |
Source source = new DOMSource(doc); |
|
113 |
|
|
114 |
// Création du fichier de sortie |
|
115 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); |
|
116 |
Result resultat = new StreamResult(writer); |
|
117 |
|
|
118 |
// Configuration du transformer |
|
119 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
|
120 |
Transformer transformer = fabrique.newTransformer(); |
|
121 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
|
122 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
123 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); |
|
124 |
|
|
125 |
// Transformation |
|
126 |
transformer.transform(source, resultat); |
|
127 |
writer.close(); |
|
128 |
return true; |
|
129 |
} catch (Exception e) { |
|
130 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
131 |
return false; |
|
132 |
} |
|
133 |
} |
|
134 |
|
|
135 |
/** |
|
136 |
* The main method. |
|
137 |
* |
|
138 |
* @param args the arguments |
|
139 |
*/ |
|
140 |
public static void main(String[] args) |
|
141 |
{ |
|
142 |
String homedir = System.getProperty("user.home") |
|
143 |
File trs1 = new File(homedir, "xml/concattrs/int18_1.trs") |
|
144 |
new ChangeSpeaker(trs1, trs1, "spk2", "spk4"); |
|
145 |
new ChangeSpeaker(trs1, trs1, "spk3", "spk2"); |
|
146 |
new ChangeSpeaker(trs1, trs1, "spk4", "spk3"); |
|
147 |
} |
|
148 |
} |
Formats disponibles : Unified diff