Révision 3029

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/CreateTheOtherTurns.groovy (revision 3029)
25 25
	
26 26
	Boolean debug
27 27
	
28
	String otherNonPrimaryLocutor = "other"
28
	String otherNonPrimarySpeakerId = "other"
29 29
	
30
	public CreateTheOtherTurns(File xmlfile, String primarySpeakerIdRegexString, String otherNonPrimaryLocutor, Boolean debug) {
30
	public CreateTheOtherTurns(File xmlfile, String primarySpeakerIdRegexString, String otherNonPrimarySpeakerId, Boolean debug) {
31 31
		super(xmlfile)
32 32
		this.debug = debug
33 33
		
34
		this.otherNonPrimaryLocutor = otherNonPrimaryLocutor
34
		this.otherNonPrimarySpeakerId = otherNonPrimarySpeakerId
35 35
		if (primarySpeakerIdRegexString != null && primarySpeakerIdRegexString.length() > 0) {
36 36
			String id = FileUtils.stripExtension(xmlfile)
37 37
			
......
137 137
								tmpInfos["orig-speaker"] = turnInfos["speaker"]
138 138
								
139 139
								if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { // the current speaker is not the primary speaker
140
									tmpInfos["speaker"] = otherNonPrimaryLocutor
140
									tmpInfos["speaker"] = otherNonPrimarySpeakerId
141 141
								} else {
142 142
									tmpInfos["speaker"] = primarySpeakerId
143 143
								}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3029)
4 4
import java.time.format.DateTimeFormatter
5 5
import org.txm.utils.*
6 6
import org.txm.utils.logger.*
7
import org.txm.macro.transcription.RenameSpeaker
7
import org.txm.macro.transcription.RecodeSpeakers
8 8

  
9 9
@Field @Option(name="vocapiaDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
10 10
		File vocapiaDirectory
......
15 15
@Field @Option(name="primarySpeakerIdRegex", usage="speaker ID of the primary speaker", widget="String", required=false, def="")
16 16
		String primarySpeakerIdRegex
17 17

  
18
@Field @Option(name="otherNonPrimaryLocutor", usage="other non primary id of the other turns", widget="String", required=false, def="")
19
		String otherNonPrimaryLocutor
18
@Field @Option(name="otherNonPrimarySpeakerId", usage="other non primary id of the other turns", widget="String", required=false, def="")
19
		String otherNonPrimarySpeakerId
20 20
		
21
@Field @Option(name="nonPrimaryLocutorReplaceRegex", usage="other non primary id of the other turns", widget="String", required=false, def="")
22
		String nonPrimaryLocutorReplaceRegex
21
@Field @Option(name="nonPrimarySpeakerRegex", usage="other non primary id of the other turns", widget="String", required=false, def="")
22
		String nonPrimarySpeakerRegex
23 23

  
24 24
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false")
25 25
		Boolean debug
......
53 53
for (File file : trsFiles) {
54 54
	cpb.tick()
55 55
	
56
	CreateTheOtherTurns fixer = new CreateTheOtherTurns(file, primarySpeakerIdRegex, otherNonPrimaryLocutor, debug)
56
	CreateTheOtherTurns fixer = new CreateTheOtherTurns(file, primarySpeakerIdRegex, otherNonPrimarySpeakerId, debug)
57 57
	String name = FileUtils.stripExtension(file)
58 58
	File outFile = new File(otherDirectory, name+".trs")
59 59
	
......
76 76
	cpb.tick()
77 77
	
78 78
	File outFile = new File(resultDirectory, file.getName())
79
	RenameSpeaker fixer = new RenameSpeaker(file, outFile, nonPrimaryLocutorReplaceRegex, null, otherNonPrimaryLocutor, otherNonPrimaryLocutor)
79
	RecodeSpeakers fixer = new RecodeSpeakers(file, outFile, nonPrimarySpeakerRegex, null, otherNonPrimarySpeakerId, otherNonPrimarySpeakerId)
80 80
	
81 81
	if (!fixer.process()) {
82 82
		println "WARNING: ERROR WHILE PROCESSING: "+file
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeakerFromDirectoryMacro.groovy (revision 3029)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
11
File trsDirectory;
12

  
13
@Field @Option(name="outputTrsDirectory", usage="Dossier résultat qui contient les fichiers TRS", widget="Folder", required=true, def="")
14
File outputTrsDirectory;
15

  
16
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
17
def idRegex
18

  
19
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
20
def nameRegex
21

  
22
@Field @Option(name="newID", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
23
def newID
24

  
25
@Field @Option(name="newName", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
26
def newName
27

  
28
def files = trsDirectory.listFiles()
29
if (files == null) {
30
	println "Error: no files in $trsDirectory"
31
	return false
32
}
33

  
34
outputTrsDirectory.mkdirs()
35
if (!outputTrsDirectory.exists()) {
36
	println "Error: can't create $outputTrsDirectory"
37
	return false;
38
}
39

  
40
boolean ok = true
41
for (File trsFile : files) {
42
	
43
	if (!trsFile.getName().toLowerCase().endsWith(".trs")) continue;
44
	
45
	File outputTrsFile = new File(outputTrsDirectory, trsFile.getName());
46
	
47
	ok = ok && gse.runMacro(RenameSpeakerMacro, ["trsFile":trsFile, "outputTrsFile":outputTrsFile, "idRegex":idRegex, "nameRegex":nameRegex, "newID":newID, "newName":newName])
48
}
49

  
50
return ok
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeaker.groovy (revision 3029)
1
package org.txm.macro.transcription
2

  
3
import org.txm.scripts.importer.*;
4
import org.txm.utils.*;
5
import org.txm.metadatas.*;
6

  
7
import java.io.File;
8

  
9
import org.w3c.dom.Document;
10
import org.w3c.dom.Element;
11

  
12
import javax.xml.parsers.*;
13
import javax.xml.xpath.*;
14
import javax.xml.transform.*;
15
import javax.xml.transform.dom.DOMSource;
16
import javax.xml.transform.stream.StreamResult;
17

  
18
/** 
19
 * Renames speakers given an id or name regex
20
 **/
21
public class RenameSpeaker {
22
	File outfile, transcriptionfile
23
	
24
	/** The doc. */
25
	Document doc
26
	
27
	def idRegex, nameRegex
28
	String newId, newName
29
	
30
	/**
31
	 * Instantiates a new change speaker.
32
	 *
33
	 * @param transcriptionfile the transcriptionfile
34
	 * @param outfile the outfile
35
	 * @param id the id
36
	 * @param newid the newid
37
	 */
38
	public RenameSpeaker(File transcriptionfile, File outfile, String idRegexString, String nameRegexString, String newId, String newName) {
39
		
40
		this.transcriptionfile = transcriptionfile
41
		this.outfile = outfile;
42
		
43
		if (idRegexString != null && idRegexString.length() > 0) {
44
			this.idRegex = /$idRegexString/
45
		}
46
		if (nameRegexString != null && nameRegexString.length() > 0) {
47
			this.nameRegex = /$nameRegexString/
48
		}
49
		this.newId = newId
50
		this.newName = newName
51
	}
52
	
53
	/**
54
	 * Save.
55
	 *
56
	 * @return true, if successful
57
	 */
58
	public boolean process() {
59
		try {
60
			DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
61
			domFactory.setNamespaceAware(true); // never forget this!
62
			domFactory.setXIncludeAware(true);
63
			DocumentBuilder builder = domFactory.newDocumentBuilder();
64
			doc = builder.parse(transcriptionfile);
65
			
66
			def expr2 = XPathFactory.newInstance().newXPath().compile("//Speaker");
67
			def nodes2 = expr2.evaluate(doc, XPathConstants.NODESET);
68
			
69
			// fix speaker declarations
70
			def replacedIds = [] // list of IDs replaced, the list is used later to update the Turn locutors ids
71
			
72
			for (def node : nodes2) {
73
				if (node == null) continue;
74
				Element elem = (Element)node;
75
				
76
				String id = elem.getAttribute("id")
77
				String name = elem.getAttribute("name")
78
				
79
				if (idRegex != null && id =~ idRegex) { // patch Speaker@id
80
					if (newId != null) {
81
						elem.setAttribute("id", newId);
82
						replacedIds << id
83
					}
84
					if (newName != null) {
85
						elem.setAttribute("name", newName);
86
					}
87
				}
88
				
89
				if (nameRegex != null && id =~ nameRegex) { // patch Speaker@name
90
					if (newName != null) {
91
						elem.setAttribute("name", newName);
92
					}
93
				}
94
			}
95
			
96
			def expr = XPathFactory.newInstance().newXPath().compile("//Turn");
97
			def nodes = expr.evaluate(doc, XPathConstants.NODESET);
98
			
99
			// fix speaker turns
100
			
101
			for (def node : nodes) {
102
				if (node == null) continue;
103
				
104
				Element elem = (Element)node;
105
				String id = elem.getAttribute("speaker");
106
				
107
				if (replacedIds.contains(id)) {
108
					elem.setAttribute("speaker", newId);
109
				}
110
			}
111
			
112
			// Création de la source DOM
113
			Source source = new DOMSource(doc);
114
			
115
			// Création du fichier de sortie
116
			Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
117
			Result resultat = new StreamResult(writer);
118
			
119
			// Configuration du transformer
120
			TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
121
			Transformer transformer = fabrique.newTransformer();
122
			transformer.setOutputProperty(OutputKeys.METHOD, "xml");
123
			transformer.setOutputProperty(OutputKeys.INDENT, "yes");
124
			transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
125
			
126
			// Transformation
127
			transformer.transform(source, resultat);
128
			writer.close();
129
			return true;
130
		} catch (Exception e) {
131
			org.txm.utils.logger.Log.printStackTrace(e);
132
			return false;
133
		}
134
	}
135
	
136
	/**
137
	 * The main method.
138
	 *
139
	 * @param args the arguments
140
	 */
141
	public static void main(String[] args) {
142
		String homedir = System.getProperty("user.home")
143
		File trs1 = new File(homedir, "xml/concattrs/int18_1.trs")
144
		File trs2 = new File(homedir, "xml/concattrs/int18_1-renamed.trs")
145
		
146
		new RenameSpeaker(trs1, trs2, "spk2", null, "spk4", null).process();
147
	}
148
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RenameSpeakerMacro.groovy (revision 3029)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="trsFile", usage="Dossier qui contient les fichiers TRS", widget="FileOpen", required=true, def="")
11
File trsFile;
12

  
13
@Field @Option(name="outputTrsFile", usage="Dossier qui contient les fichiers TRS", widget="FileSave", required=true, def="")
14
File outputTrsFile;
15

  
16
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
17
def idRegex
18

  
19
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
20
def nameRegex
21

  
22
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
23
def newID
24

  
25
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
26
def newName
27

  
28

  
29
def cs = new RenameSpeaker(trsFile, outputTrsFile, idRegex, nameRegex, newID, newName);
30

  
31
return cs.process();
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RecodeSpeakersFromDirectoryMacro.groovy (revision 3029)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
11
File trsDirectory;
12

  
13
@Field @Option(name="outputTrsDirectory", usage="Dossier résultat qui contient les fichiers TRS", widget="Folder", required=true, def="")
14
File outputTrsDirectory;
15

  
16
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
17
def idRegex
18

  
19
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
20
def nameRegex
21

  
22
@Field @Option(name="newID", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
23
def newID
24

  
25
@Field @Option(name="newName", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
26
def newName
27

  
28
def files = trsDirectory.listFiles()
29
if (files == null) {
30
	println "Error: no files in $trsDirectory"
31
	return false
32
}
33

  
34
outputTrsDirectory.mkdirs()
35
if (!outputTrsDirectory.exists()) {
36
	println "Error: can't create $outputTrsDirectory"
37
	return false;
38
}
39

  
40
boolean ok = true
41
for (File trsFile : files) {
42
	
43
	if (!trsFile.getName().toLowerCase().endsWith(".trs")) continue;
44
	
45
	File outputTrsFile = new File(outputTrsDirectory, trsFile.getName());
46
	
47
	ok = ok && gse.runMacro(RecodeSpeakersMacro, ["trsFile":trsFile, "outputTrsFile":outputTrsFile, "idRegex":idRegex, "nameRegex":nameRegex, "newID":newID, "newName":newName])
48
}
49

  
50
return ok
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RecodeSpeakers.groovy (revision 3029)
1
package org.txm.macro.transcription
2

  
3
import org.txm.scripts.importer.*;
4
import org.txm.utils.*;
5
import org.txm.metadatas.*;
6

  
7
import java.io.File;
8

  
9
import org.w3c.dom.Document;
10
import org.w3c.dom.Element;
11

  
12
import javax.xml.parsers.*;
13
import javax.xml.xpath.*;
14
import javax.xml.transform.*;
15
import javax.xml.transform.dom.DOMSource;
16
import javax.xml.transform.stream.StreamResult;
17

  
18
/** 
19
 * Renames speakers given an id or name regex
20
 **/
21
public class RecodeSpeakers {
22
	File outfile, transcriptionfile
23
	
24
	/** The doc. */
25
	Document doc
26
	
27
	def idRegex, nameRegex
28
	String newId, newName
29
	
30
	/**
31
	 * Instantiates a new change speaker.
32
	 *
33
	 * @param transcriptionfile the transcriptionfile
34
	 * @param outfile the outfile
35
	 * @param id the id
36
	 * @param newid the newid
37
	 */
38
	public RecodeSpeakers(File transcriptionfile, File outfile, String idRegexString, String nameRegexString, String newId, String newName) {
39
		
40
		this.transcriptionfile = transcriptionfile
41
		this.outfile = outfile;
42
		
43
		if (idRegexString != null && idRegexString.length() > 0) {
44
			this.idRegex = /$idRegexString/
45
		}
46
		if (nameRegexString != null && nameRegexString.length() > 0) {
47
			this.nameRegex = /$nameRegexString/
48
		}
49
		this.newId = newId
50
		this.newName = newName
51
	}
52
	
53
	/**
54
	 * Save.
55
	 *
56
	 * @return true, if successful
57
	 */
58
	public boolean process() {
59
		try {
60
			DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
61
			domFactory.setNamespaceAware(true); // never forget this!
62
			domFactory.setXIncludeAware(true);
63
			DocumentBuilder builder = domFactory.newDocumentBuilder();
64
			doc = builder.parse(transcriptionfile);
65
			
66
			def expr2 = XPathFactory.newInstance().newXPath().compile("//Speaker");
67
			def nodes2 = expr2.evaluate(doc, XPathConstants.NODESET);
68
			
69
			// fix speaker declarations
70
			def replacedIds = [] // list of IDs replaced, the list is used later to update the Turn locutors ids
71
			
72
			for (def node : nodes2) {
73
				if (node == null) continue;
74
				Element elem = (Element)node;
75
				
76
				String id = elem.getAttribute("id")
77
				String name = elem.getAttribute("name")
78
				
79
				if (idRegex != null && id =~ idRegex) { // patch Speaker@id
80
					if (newId != null) {
81
						elem.setAttribute("id", newId);
82
						replacedIds << id
83
					}
84
					if (newName != null) {
85
						elem.setAttribute("name", newName);
86
					}
87
				}
88
				
89
				if (nameRegex != null && id =~ nameRegex) { // patch Speaker@name
90
					if (newName != null) {
91
						elem.setAttribute("name", newName);
92
					}
93
				}
94
			}
95
			
96
			def expr = XPathFactory.newInstance().newXPath().compile("//Turn");
97
			def nodes = expr.evaluate(doc, XPathConstants.NODESET);
98
			
99
			// fix speaker turns
100
			
101
			for (def node : nodes) {
102
				if (node == null) continue;
103
				
104
				Element elem = (Element)node;
105
				String id = elem.getAttribute("speaker");
106
				
107
				if (replacedIds.contains(id)) {
108
					elem.setAttribute("speaker", newId);
109
				}
110
			}
111
			
112
			// Création de la source DOM
113
			Source source = new DOMSource(doc);
114
			
115
			// Création du fichier de sortie
116
			Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
117
			Result resultat = new StreamResult(writer);
118
			
119
			// Configuration du transformer
120
			TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
121
			Transformer transformer = fabrique.newTransformer();
122
			transformer.setOutputProperty(OutputKeys.METHOD, "xml");
123
			transformer.setOutputProperty(OutputKeys.INDENT, "yes");
124
			transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
125
			
126
			// Transformation
127
			transformer.transform(source, resultat);
128
			writer.close();
129
			return true;
130
		} catch (Exception e) {
131
			org.txm.utils.logger.Log.printStackTrace(e);
132
			return false;
133
		}
134
	}
135
	
136
	/**
137
	 * The main method.
138
	 *
139
	 * @param args the arguments
140
	 */
141
	public static void main(String[] args) {
142
		String homedir = System.getProperty("user.home")
143
		File trs1 = new File(homedir, "xml/concattrs/int18_1.trs")
144
		File trs2 = new File(homedir, "xml/concattrs/int18_1-renamed.trs")
145
		
146
		new RecodeSpeakers(trs1, trs2, "spk2", null, "spk4", null).process();
147
	}
148
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RecodeSpeakersMacro.groovy (revision 3029)
1
package org.txm.macro.transcription
2

  
3
import java.nio.charset.Charset
4

  
5
import java.time.LocalTime
6
import java.time.format.DateTimeFormatter
7
import org.txm.utils.*
8
import org.txm.utils.logger.*
9

  
10
@Field @Option(name="trsFile", usage="Dossier qui contient les fichiers TRS", widget="FileOpen", required=true, def="")
11
File trsFile;
12

  
13
@Field @Option(name="outputTrsFile", usage="Dossier qui contient les fichiers TRS", widget="FileSave", required=true, def="")
14
File outputTrsFile;
15

  
16
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
17
def idRegex
18

  
19
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
20
def nameRegex
21

  
22
@Field @Option(name="idRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
23
def newID
24

  
25
@Field @Option(name="nameRegex", usage="Colonne de jointure de transcription", widget="String", required=true, def="")
26
def newName
27

  
28

  
29
def cs = new RecodeSpeakers(trsFile, outputTrsFile, idRegex, nameRegex, newID, newName);
30

  
31
return cs.process();

Formats disponibles : Unified diff