Révision 3017

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/FixTranscription.groovy (revision 3017)
3 3
import javax.xml.stream.*
4 4

  
5 5
import org.txm.importer.PersonalNamespaceContext
6
import org.txm.utils.FileUtils
6 7
import org.txm.xml.IdentityHook
7 8
import org.txm.xml.*
8 9

  
......
19 20
	LocalNamesHookActivator activator;
20 21
	IdentityHook hook;
21 22
	
22
	public FixTranscription(File xmlfile) {
23
	def primarySpeakerIdRegex
24
	String primarySpeakerId
25
	
26
	Boolean debug
27
	
28
	String otherNonPrimaryLocutor = "other"
29
	
30
	public FixTranscription(File xmlfile, String primarySpeakerIdPrefix, String otherNonPrimaryLocutor, Boolean debug) {
23 31
		super(xmlfile)
32
		this.debug = debug
24 33
		
34
		this.otherNonPrimaryLocutor = otherNonPrimaryLocutor
35
		if (primarySpeakerIdPrefix != null && primarySpeakerIdPrefix.length() > 0) {
36
			String id = FileUtils.stripExtension(xmlfile)
37
			
38
			this.primarySpeakerIdRegex = /$primarySpeakerIdPrefix.*/
39
			
40
			def rez = (id =~ primarySpeakerIdRegex).findAll()
41
			def rez2 = (id =~ /$primarySpeakerIdPrefix/).findAll()
42
			if (rez2.size() != 1) {
43
				if (debug) println "WARNING: found the ${rez2.size()} matches of primary speaker prefix in the '$id' file name"
44
				this.primarySpeakerIdRegex = null
45
			} else {
46
				primarySpeakerId = rez[0]
47
				//if (debug) println "Detected primary speaker: $primarySpeakerId"
48
			}
49
		}
50
		
25 51
		activator = new LocalNamesHookActivator<>(hook, ["w", "Turn", "Sync"]);
26 52
		
27 53
		hook = new IdentityHook("word_hook", activator, this) {
......
103 129
								def tmpInfos = new LinkedHashMap()
104 130
								for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr]
105 131
								tmpInfos["orig-speaker"] = turnInfos["speaker"]
106
								tmpInfos["speaker"] = "other"
132
								
133
								if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) {
134
									tmpInfos["speaker"] = "other"
135
								} else {
136
									tmpInfos["speaker"] = primarySpeakerId
137
								}
107 138
								tmpInfos["startTime"] = currentTime
108 139
								writer.writeStartElement("Turn")
109 140
								for (String attr : tmpInfos.keySet()) {
......
130 161
								writer.writeEndElement() // event
131 162
								word = "" // don't write the word
132 163
							}
133
														
164
							
134 165
							if (word.length() > 0) {
135 166
								
136 167
								writer.writeStartElement("w") // start the initial word
......
140 171
								writer.writeCharacters(word)
141 172
								writer.writeEndElement() // w
142 173
							}
143
						
174
							
144 175
							if (shouldCloseOtherTurn) {
145 176
								shouldCloseOtherTurn = false;
146 177
								//close the current 'other' Turn and restart the actual Turn
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/FixTranscriptionsMacro.groovy (revision 3017)
6 6
import org.txm.utils.logger.*
7 7

  
8 8
@Field @Option(name="trsFile", usage="A single vocapia XML file", widget="FileOpen", required=false, def="")
9
		File trsFile;
9
		File trsFile
10 10

  
11 11
@Field @Option(name="trsDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
12
		File trsDirectory;
12
		File trsDirectory
13 13

  
14 14
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="")
15
		File resultDirectory;
15
		File resultDirectory
16
		
17
@Field @Option(name="primarySpeakerIdPrefix", usage="speaker ID of the primary speaker", widget="String", required=false, def="")
18
		String primarySpeakerIdPrefix
16 19

  
20
		@Field @Option(name="otherNonPrimaryLocutor", usage="other non primary id of the other turns", widget="String", required=false, def="")
21
		String otherNonPrimaryLocutor
22
		
23
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false")
24
		Boolean debug
25

  
17 26
if (!ParametersDialog.open(this)) return;
18 27

  
19 28
if (resultDirectory.equals(trsDirectory) || (trsFile != null && trsFile.getParentFile().equals(resultDirectory))) {
......
44 53
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
45 54
for (File file : trsFiles) {
46 55
	cpb.tick()
47
	FixTranscription fixer = new FixTranscription(file)
56
	
57
	FixTranscription fixer = new FixTranscription(file, primarySpeakerIdPrefix, otherNonPrimaryLocutor, debug)
48 58
	String name = FileUtils.stripExtension(file)
49 59
	File outFile = new File(resultDirectory, name+".trs")
50 60
	
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeakerFromDirectoryMacro.groovy (revision 3017)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
11
File trsDirectory;
12

  
13
@Field @Option(name="outputTrsDirectory", usage="Dossier résultat qui contient les fichiers TRS", widget="Folder", required=true, def="")
14
File outputTrsDirectory;
15

  
16
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
17
def idRegex
18

  
19
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
20
def nameRegex
21

  
22
@Field @Option(name="newID", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
23
def newID
24

  
25
@Field @Option(name="newName", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
26
def newName
27

  
28
def files = trsDirectory.listFiles()
29
if (files == null) {
30
	println "Error: no files in $trsDirectory"
31
	return false
32
}
33

  
34
outputTrsDirectory.mkdirs()
35
if (!outputTrsDirectory.exists()) {
36
	println "Error: can't create $outputTrsDirectory"
37
	return false;
38
}
39

  
40
boolean ok = true
41
for (File trsFile : files) {
42
	
43
	if (!trsFile.getName().toLowerCase().endsWith(".trs")) continue;
44
	
45
	File outputTrsFile = new File(outputTrsDirectory, trsFile.getName());
46
	
47
	ok = ok && gse.runMacro(RenameSpeakerMacro, ["trsFile":trsFile, "outputTrsFile":outputTrsFile, "idRegex":idRegex, "nameRegex":nameRegex, "newID":newID, "newName":newName])
48
}
49

  
50
return ok
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeaker.groovy (revision 3017)
1
package org.txm.macro.transcription
2

  
3
import org.txm.scripts.importer.*;
4
import org.txm.utils.*;
5
import org.txm.metadatas.*;
6

  
7
import java.io.File;
8

  
9
import org.w3c.dom.Document;
10
import org.w3c.dom.Element;
11

  
12
import javax.xml.parsers.*;
13
import javax.xml.xpath.*;
14
import javax.xml.transform.*;
15
import javax.xml.transform.dom.DOMSource;
16
import javax.xml.transform.stream.StreamResult;
17

  
18
/** 
19
 * Renames speakers given an id or name regex
20
 **/
21
public class ChangeSpeaker {
22
	File outfile, transcriptionfile
23
	
24
	/** The doc. */
25
	Document doc
26
	
27
	def idRegex, nameRegex
28
	String newId, newName
29
	
30
	/**
31
	 * Instantiates a new change speaker.
32
	 *
33
	 * @param transcriptionfile the transcriptionfile
34
	 * @param outfile the outfile
35
	 * @param id the id
36
	 * @param newid the newid
37
	 */
38
	public ChangeSpeaker(File transcriptionfile, File outfile, String idRegexString, String nameRegexString, String newId, String newName) {
39
		
40
		this.transcriptionfile = transcriptionfile
41
		this.outfile = outfile;
42
		
43
		if (idRegexString != null && idRegexString.length() > 0) {
44
			this.idRegex = /$idRegexString/
45
		}
46
		if (nameRegexString != null && nameRegexString.length() > 0) {
47
			this.nameRegex = /$nameRegexString/
48
		}
49
		this.newId = newId
50
		this.newName = newName
51
	}
52
	
53
	/**
54
	 * Save.
55
	 *
56
	 * @return true, if successful
57
	 */
58
	public boolean process() {
59
		try {
60
			DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
61
			domFactory.setNamespaceAware(true); // never forget this!
62
			domFactory.setXIncludeAware(true);
63
			DocumentBuilder builder = domFactory.newDocumentBuilder();
64
			doc = builder.parse(transcriptionfile);
65
			
66
			
67
			def expr2 = XPathFactory.newInstance().newXPath().compile("//Speaker");
68
			def nodes2 = expr2.evaluate(doc, XPathConstants.NODESET);
69
			
70
			// fix speaker declarations
71
			def replacedIds = [] // list of IDs replaced, the list is used later to update the Turn locutors ids
72
			
73
			for (def node : nodes2) {
74
				if (node == null) continue;
75
				Element elem = (Element)node;
76
				
77
				String id = elem.getAttribute("id")
78
				String name = elem.getAttribute("name")
79
				
80
				if (idRegex != null && id =~ idRegex) { // patch Speaker@id
81
					if (newId != null) {
82
						elem.setAttribute("id", newId);
83
						replacedIds << id
84
					}
85
					if (newName != null) {
86
						elem.setAttribute("name", newName);
87
					}
88
				}
89
				
90
				if (nameRegex != null && id =~ nameRegex) { // patch Speaker@name
91
					if (newName != null) {
92
						elem.setAttribute("name", newName);
93
					}
94
				}
95
			}
96
			
97
			
98
			def expr = XPathFactory.newInstance().newXPath().compile("//Turn");
99
			def nodes = expr.evaluate(doc, XPathConstants.NODESET);
100
			
101
			// fix speaker turns
102
			
103
			for (def node : nodes) {
104
				if (node == null) continue;
105
				
106
				Element elem = (Element)node;
107
				String id = elem.getAttribute("speaker");
108
				
109
				if (replacedIds.contains(id)) {
110
					elem.setAttribute("speaker", newId);
111
				}
112
			}
113
			
114
			// Création de la source DOM
115
			Source source = new DOMSource(doc);
116
			
117
			// Création du fichier de sortie
118
			Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
119
			Result resultat = new StreamResult(writer);
120
			
121
			// Configuration du transformer
122
			TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
123
			Transformer transformer = fabrique.newTransformer();
124
			transformer.setOutputProperty(OutputKeys.METHOD, "xml");
125
			transformer.setOutputProperty(OutputKeys.INDENT, "yes");
126
			transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
127
			
128
			// Transformation
129
			transformer.transform(source, resultat);
130
			writer.close();
131
			return true;
132
		} catch (Exception e) {
133
			org.txm.utils.logger.Log.printStackTrace(e);
134
			return false;
135
		}
136
	}
137
	
138
	/**
139
	 * The main method.
140
	 *
141
	 * @param args the arguments
142
	 */
143
	public static void main(String[] args) {
144
		String homedir = System.getProperty("user.home")
145
		File trs1 = new File(homedir, "xml/concattrs/int18_1.trs")
146
		File trs2 = new File(homedir, "xml/concattrs/int18_1-renamed.trs")
147
		
148
		new RenameSpeaker(trs1, trs2, "spk2", null, "spk4", null).process();
149
		
150
	}
151
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeakerMacro.groovy (revision 3017)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="trsFile", usage="Dossier qui contient les fichiers TRS", widget="FileOpen", required=true, def="")
11
File trsFile;
12

  
13
@Field @Option(name="outputTrsFile", usage="Dossier qui contient les fichiers TRS", widget="FileSave", required=true, def="")
14
File outputTrsFile;
15

  
16
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
17
def idRegex
18

  
19
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
20
def nameRegex
21

  
22
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
23
def newID
24

  
25
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
26
def newName
27

  
28

  
29
def cs = new RenameSpeaker(trsFile, outputTrsFile, idRegex, nameRegex, newID, newName);
30

  
31
return cs.process();
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/ChangeSpeaker.groovy (revision 3017)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.scripts.importer.transcriber;
29

  
30
import org.txm.scripts.importer.*;
31
import org.txm.utils.*;
32
import org.txm.metadatas.*;
33

  
34
import java.io.File;
35

  
36
import org.w3c.dom.Document;
37
import org.w3c.dom.Element;
38

  
39
import javax.xml.parsers.*;
40
import javax.xml.xpath.*;
41
import javax.xml.transform.*;
42
import javax.xml.transform.dom.DOMSource;
43
import javax.xml.transform.stream.StreamResult;
44

  
45
// TODO: Auto-generated Javadoc
46
/** remove part of xml given an id. */
47
public class ChangeSpeaker {
48
	File outfile;
49
	
50
	/** The doc. */
51
	Document doc;
52
	
53
	/**
54
	 * Instantiates a new change speaker.
55
	 *
56
	 * @param transcriptionfile the transcriptionfile
57
	 * @param outfile the outfile
58
	 * @param id the id
59
	 * @param newid the newid
60
	 */
61
	public ChangeSpeaker(File transcriptionfile, File outfile, String id, String newid)
62
	{
63
		this.outfile = outfile;
64
		
65
		String xpath = "//Turn";
66
		DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
67
		domFactory.setNamespaceAware(true); // never forget this!
68
		domFactory.setXIncludeAware(true);
69
		DocumentBuilder builder = domFactory.newDocumentBuilder();
70
		doc = builder.parse(transcriptionfile);
71
		
72
		def expr = XPathFactory.newInstance().newXPath().compile(xpath);
73
		def nodes = expr.evaluate(doc, XPathConstants.NODESET);
74
		
75
		
76
		
77
		for(def node : nodes)
78
		{
79
			if (node == null) continue;
80
			
81
			Element elem = (Element)node;
82
			if(elem.getAttribute("speaker").contains(id))
83
			{
84
				String value = elem.getAttribute("speaker");
85
				value = value.replace(id, newid);
86
				elem.setAttribute("speaker", value);
87
			}
88
		}
89
		
90
		def expr2 = XPathFactory.newInstance().newXPath().compile("//Speaker[@id='"+id+"']");
91
		def nodes2 = expr2.evaluate(doc, XPathConstants.NODESET);
92
		
93
		for(def node : nodes2)
94
		{
95
			if (node == null) continue;
96
			
97
			Element elem = (Element)node;
98
			elem.setAttribute("id", newid);
99
		}
100
		save()
101
	}
102
	
103
	/**
104
	 * Save.
105
	 *
106
	 * @return true, if successful
107
	 */
108
	private boolean save()
109
	{
110
		try {
111
			// Création de la source DOM
112
			Source source = new DOMSource(doc);
113
			
114
			// Création du fichier de sortie
115
			Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); 
116
			Result resultat = new StreamResult(writer);
117
			
118
			// Configuration du transformer
119
			TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
120
			Transformer transformer = fabrique.newTransformer();
121
			transformer.setOutputProperty(OutputKeys.METHOD, "xml");
122
			transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
123
			transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); 
124
			
125
			// Transformation
126
			transformer.transform(source, resultat);
127
			writer.close();
128
			return true;
129
		} catch (Exception e) {
130
			org.txm.utils.logger.Log.printStackTrace(e);
131
			return false;
132
		}
133
	}
134
	
135
	/**
136
	 * The main method.
137
	 *
138
	 * @param args the arguments
139
	 */
140
	public static void main(String[] args)
141
	{
142
		String homedir = System.getProperty("user.home")
143
		File trs1 = new File(homedir, "xml/concattrs/int18_1.trs")
144
		new ChangeSpeaker(trs1, trs1, "spk2", "spk4");
145
		new ChangeSpeaker(trs1, trs1, "spk3", "spk2");
146
		new ChangeSpeaker(trs1, trs1, "spk4", "spk3");
147
	}
148
}

Formats disponibles : Unified diff