Révision 3052

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/CreateCQPListMacro.groovy (revision 3052)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

  
22
package org.txm.macro.cqp
23

  
24
/*
25
This macro allows you to define CQL lists which you can call in CQP queries during the current session.
26
To use this macro for your own lists,
27
1. Find below an example of the type of list you want to create
28
   (case 1 = explicit list, or case 2 = list from file),
29
2. Copy then paste this example (5 lines) after the given examples
30
3. Give the name for your list as a string after list_name=
31
4. Give the content of the list as a string after
32
either list_value= (for case 1, explicit list)
33
or list_path= (for case 2, list from file).
34
5. Save this file (macro code)
35
6. Run the macro by double-clicking on it.
36
The lists are defined only for the current session.
37
To use the same lists in a new session, just run the macro to generate the lists again.
38
*/
39

  
40
import org.txm.Toolbox
41
import org.txm.searchengine.cqp.*
42

  
43
def CQI = CQPSearchEngine.getCqiClient();
44
if ((CQI instanceof NetCqiClient)) {
45
	println "Error: only available in CQP memory mode"
46
	return;
47
}
48

  
49
def list_name=""
50
def list_value=""
51
def list_path=""
52
def cqp_value=""
53
File f;
54
try {
55

  
56
// BEGIN CONFIGURATION HERE
57

  
58
// 1st case : explicit lists
59
list_name = "negatif"
60
list_value = "rien non ne pas sans"
61

  
62
cqp_statement = "define \$$list_name = \"$list_value\";"
63
println "CQP executes : $cqp_statement"
64
CQI.query(cqp_statement)
65

  
66
// two other examples of 1st case (explicit list)
67
// 1st explicit list
68
list_name = "evaluation"
69
list_value = "trop assez peu bien très beaucoup vraiment"
70

  
71
cqp_statement = "define \$$list_name = \"$list_value\";"
72
println "CQP executes : $cqp_statement"
73
CQI.query(cqp_statement)
74

  
75
// 2nd explicit list
76
list_name = "categoriesmotsvides"
77
list_value = "ABR ADV DET:ART DET:POS INT KON NUM PRO PRO:DEM PRO:IND PRO:PER PRO:POS PRO:REL PRP PRP:det PUN PUN:cit SENT SYM"
78

  
79
cqp_statement = "define \$$list_name = \"$list_value\";"
80
println "CQP executes : $cqp_statement"
81
CQI.query(cqp_statement)
82

  
83
// one example of regular expression list to be used with RE()
84
list_name = "regexp_categoriesmotsvides"
85
list_value = "ABR ADV DET.* INT KON NUM PRO.* PRP.* PUN.* SENT SYM"
86

  
87
cqp_statement = "define \$$list_name = \"$list_value\";"
88
println "CQP executes : $cqp_statement"
89
CQI.query(cqp_statement)
90

  
91
// 2nd case : list from a file
92

  
93
list_name = "motsvides"
94
list_path = System.getProperty("user.home")+"/TXM/cqp/listes/stopwords_fr_ranknl.txt"
95
// Windows users must replace "/" with "\\". 
96
// for instance : 
97
// list_path = System.getProperty("user.home")+"\\TXM\\cqp\\listes\\stopwords_fr_ranknl.txt"
98
f = new File(list_path);
99
if (f.exists()) {
100
	cqp_statement = "define \$$list_name < \"$list_path\";"
101
	println "CQP executes : $cqp_statement"
102
	CQI.query(cqp_statement)
103
} else {
104
	println "Error: \"$f\" file does not exists"
105
}
106

  
107
/*
108
You can comment a group of several lines of code
109
using stars and slash like this.
110
*/
111

  
112
// END OF CONFIGURATION
113

  
114
} catch (Exception e) {
115
	println "Error during the \"$list_name\" list creation with the \"$list_value\" value"
116
	println "CPQ statement was \"$cqp_statement\""
117
	if (f != null) println "Last CQL file path was \"$f\""
118
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/CreateCQPListFromGroovyMacro.groovy (revision 3052)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

  
22
package org.txm.macro.cqp
23

  
24
/*
25
This macro allows you to define CQL lists which you can call in CQP queries during the current session.
26
To use this macro for your own lists,
27
1. Find below an example of the type of list you want to create
28
   (case 1 = explicit list, or case 2 = list from file),
29
2. Copy then paste this example (5 lines) after the given examples
30
3. Give the name for your list as a string after list_name=
31
4. Give the content of the list as a string after
32
either list_value= (for case 1, explicit list)
33
or list_path= (for case 2, list from file).
34
5. Save this file (macro code)
35
6. Run the macro by double-clicking on it.
36
The lists are defined only for the current session.
37
To use the same lists in a new session, just run the macro to generate the lists again.
38
*/
39

  
40
import org.txm.Toolbox
41
import org.txm.searchengine.cqp.*
42

  
43
def CQI = CQPSearchEngine.getCqiClient();
44
if ((CQI instanceof NetCqiClient)) {
45
	println "Error: only available in CQP memory mode"
46
	return;
47
}
48

  
49
def list_name=""
50
def list_value=""
51
def list_path=""
52
def cqp_value=""
53
File f;
54
try {
55

  
56
// BEGIN CONFIGURATION HERE
57

  
58
// 1st case : explicit lists
59
list_name = "negatif"
60
list_value = "rien non ne pas sans"
61

  
62
cqp_statement = "define \$$list_name = \"$list_value\";"
63
println "CQP executes : $cqp_statement"
64
CQI.query(cqp_statement)
65

  
66
// two other examples of 1st case (explicit list)
67
// 1st explicit list
68
list_name = "evaluation"
69
list_value = "trop assez peu bien très beaucoup vraiment"
70

  
71
cqp_statement = "define \$$list_name = \"$list_value\";"
72
println "CQP executes : $cqp_statement"
73
CQI.query(cqp_statement)
74

  
75
// 2nd explicit list
76
list_name = "categoriesmotsvides"
77
list_value = "ABR ADV DET:ART DET:POS INT KON NUM PRO PRO:DEM PRO:IND PRO:PER PRO:POS PRO:REL PRP PRP:det PUN PUN:cit SENT SYM"
78

  
79
cqp_statement = "define \$$list_name = \"$list_value\";"
80
println "CQP executes : $cqp_statement"
81
CQI.query(cqp_statement)
82

  
83
// one example of regular expression list to be used with RE()
84
list_name = "regexp_categoriesmotsvides"
85
list_value = "ABR ADV DET.* INT KON NUM PRO.* PRP.* PUN.* SENT SYM"
86

  
87
cqp_statement = "define \$$list_name = \"$list_value\";"
88
println "CQP executes : $cqp_statement"
89
CQI.query(cqp_statement)
90

  
91
// 2nd case : list from a file
92

  
93
list_name = "motsvides"
94
list_path = System.getProperty("user.home")+"/TXM/cqp/listes/stopwords_fr_ranknl.txt"
95
// Windows users must replace "/" with "\\". 
96
// for instance : 
97
// list_path = System.getProperty("user.home")+"\\TXM\\cqp\\listes\\stopwords_fr_ranknl.txt"
98
f = new File(list_path);
99
if (f.exists()) {
100
	cqp_statement = "define \$$list_name < \"$list_path\";"
101
	println "CQP executes : $cqp_statement"
102
	CQI.query(cqp_statement)
103
} else {
104
	println "Error: \"$f\" file does not exists"
105
}
106

  
107
/*
108
You can comment a group of several lines of code
109
using stars and slash like this.
110
*/
111

  
112
// END OF CONFIGURATION
113

  
114
} catch (Exception e) {
115
	println "Error during the \"$list_name\" list creation with the \"$list_value\" value"
116
	println "CPQ statement was \"$cqp_statement\""
117
	if (f != null) println "Last CQL file path was \"$f\""
118
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/CreateCQPListFromStringMacro.groovy (revision 3052)
1
// Copyright © 2021 ENS de Lyon
2
// Licensed under the terms of the GNU General Public License v3.0 (http://www.gnu.org/licenses)
3
// @author sheiden
4

  
5
package org.txm.macro.cqp
6

  
7
import org.kohsuke.args4j.*
8
import groovy.transform.Field
9
import org.txm.rcpapplication.swt.widget.parameters.*
10

  
11
import org.txm.Toolbox
12
import org.txm.searchengine.cqp.*
13

  
14
// parameters
15

  
16
@Field @Option(name="wordList", usage="list of words", widget="Text", required=true, def='')
17
def wordList
18

  
19
@Field @Option(name="separator", usage="character separating words", widget="String", required=true, def=',')
20
def separator
21

  
22
@Field @Option(name="name", usage="name of the list", widget="String", required=true, def='')
23
def name
24

  
25
if (!ParametersDialog.open(this)) return
26

  
27
def CQI = CQPSearchEngine.getCqiClient()
28
def cqp_statement
29

  
30
if ((CQI instanceof NetCqiClient)) {
31

  
32
	println "** Error: only available in CQP memory mode"
33
	return
34
}
35

  
36
try {
37
	wordList = wordList.split(separator).join(' ')
38
	cqp_statement = "define \$$name = \"$wordList\";"
39
	println "Executing CQP statement... $cqp_statement"
40
	CQI.query(cqp_statement)
41
	println "\$$name word list defined. You can now use it in queries, like: [word=\$$name]"
42

  
43
} catch (Exception e) {
44
	println "** Error during the creation of word list \"$name\" with the list \"$wordList\""
45
	println "CPQ statement was \"$cqp_statement\""
46
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/CreateCQPListFromFileMacro.groovy (revision 3052)
1
// Copyright © 2021 ENS de Lyon
2
// Licensed under the terms of the GNU General Public License v3.0 (http://www.gnu.org/licenses)
3
// @author sheiden
4

  
5
package org.txm.macro.cqp
6

  
7
import org.kohsuke.args4j.*
8
import groovy.transform.Field
9
import org.txm.rcpapplication.swt.widget.parameters.*
10

  
11
import org.txm.Toolbox
12
import org.txm.searchengine.cqp.*
13

  
14
// parameters
15

  
16
@Field @Option(name="inputFile", usage="fichier contenant la liste de mots (un mot par ligne)", widget="File", required=true, def='')
17
def inputFile
18

  
19
@Field @Option(name="name", usage="name of the list", widget="String", required=true, def='')
20
def name
21

  
22
if (!ParametersDialog.open(this)) return
23

  
24
def CQI = CQPSearchEngine.getCqiClient()
25
def cqp_statement
26

  
27
if ((CQI instanceof NetCqiClient)) {
28

  
29
	println "** Error: only available in CQP memory mode"
30
	return
31
}
32

  
33
try {
34

  
35
  if (inputFile.exists()) {
36
	cqp_statement = "define \$$name < \"$inputFile\";"
37
	println "Executing CQP statement... $cqp_statement"
38
	CQI.query(cqp_statement)
39
	println "\$$name word list defined. You can now use it in queries, like: [word=\$$name]"
40
	
41
  } else {
42
	println "** Error: impossible to access \"$inputFile\""
43
  }
44

  
45
} catch (Exception e) {
46
	println "** Error during the creation of word list \"$name\" with the file \"$inputFile\""
47
	println "CPQ statement was \"$cqp_statement\""
48
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3052)
21 21
@Field @Option(name="nonPrimarySpeakerIdRegex", usage="other non primary id of the other turns", widget="String", required=false, def="")
22 22
		String nonPrimarySpeakerIdRegex
23 23
		
24
@Field @Option(name="supplementarySpeakerIdRegex", usage="supplementary locutor ids regex", widget="String", required=false, def="UX.+")
25
		String supplementarySpeakerIdRegex
26
		
27
@Field @Option(name="supplementarySpeakerId", usage="supplementary locutor id to normalize", widget="String", required=false, def="")
28
		String supplementarySpeakerId
29
		
24 30
@Field @Option(name="newSectionMarker", usage="section marker", widget="String", required=false, def="*#")
25 31
		String newSectionMarker
26 32

  
......
42 48
println "CONVERTING VOCAPIA FILES TO TRS FILES..."
43 49
File trsDirectory = new File(resultDirectory, "vocapia2trs")
44 50
trsDirectory.mkdir()
45
gse.runMacro(org.txm.macro.transcription.Vocapia2TranscriberMacro, ["vocapiaDirectory":vocapiaDirectory, "resultDirectory":trsDirectory, "debug":debug])
51
gse.runMacro(org.txm.macro.transcription.Vocapia2TranscriberMacro, ["vocapiaDirectory":vocapiaDirectory, "resultDirectory":trsDirectory, "additionalSpeakers": "UX1:UX1", "debug":debug])
46 52

  
47 53

  
48 54
println "SPOTTING SECTION MARKS..."
......
111 117
	if (debug) println "== $file =="
112 118
	else cpb.tick()
113 119
	
114
	File outFile = new File(resultDirectory, file.getName())
115
	RecodeSpeakers fixer = new RecodeSpeakers(file, outFile, nonPrimarySpeakerIdRegex, null, otherNonPrimarySpeakerId, otherNonPrimarySpeakerId)
120
	File tmpFile = new File(resultDirectory, "tmp.xml")
121
	tmpFile.delete();
122
	
123
	RecodeSpeakers fixer = new RecodeSpeakers(file, tmpFile, nonPrimarySpeakerIdRegex, null, otherNonPrimarySpeakerId, otherNonPrimarySpeakerId)
116 124
	fixer.debug = debug
117 125
	
118 126
	if (!fixer.process()) {
119 127
		println "WARNING: ERROR WHILE PROCESSING: "+file
120 128
		if (debug) {println "DEBUG ACTIVATED -> STOP"; return;}
121 129
	}
130
	
131
	File outFile2 = new File(resultDirectory, file.getName())
132
	RecodeSpeakers fixer2 = new RecodeSpeakers(tmpFile, outFile2, supplementarySpeakerIdRegex, null, supplementarySpeakerId, supplementarySpeakerId)
133
	fixer2.debug = debug
134
	
135
	if (!fixer2.process()) {
136
		println "WARNING: ERROR WHILE PROCESSING: "+file
137
		if (debug) {println "DEBUG ACTIVATED -> STOP"; return;}
138
	}
139
	tmpFile.delete(); // This tmp file must be removed for the next operation
122 140
}
123 141
cpb.done()
124 142

  
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2TranscriberMacro.groovy (revision 3052)
13 13

  
14 14
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="")
15 15
		File resultDirectory;
16

  
16
		
17
@Field @Option(name="additionalSpeakers", usage="The result directory", widget="String", required=false, def="")
18
		def additionalSpeakers;
19
		additionalSpeakers
20
		
17 21
@Field @Option(name="debug", usage="The result directory", widget="Boolean", required=false, def="false")
18 22
		boolean debug;
19 23

  
......
45 49
	
46 50
	if (debug) println "== $xmlFile =="
47 51
	else cpb.tick()
52
	
48 53
		
54
	def map = [:]
55
	for (String spk : additionalSpeakers.split("\t")) {
56
		def split = spk.split(":", 2)
57
		map[split[0]] = split[1]
58
	}
49 59
	Vocapia2Transcriber v2t = new Vocapia2Transcriber(xmlFile)
60
	v2t.setAddtionalSpeakers(map)
50 61
	String name = FileUtils.stripExtension(xmlFile)
51 62
	File outFile = new File(resultDirectory, name+".trs")
52 63
	
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/RecodeSpeakers.groovy (revision 3052)
114 114
			}
115 115
			
116 116
			if (debug) println "id=$idRegex or name=$nameRegex -> $nReplace replacements"
117
			if (nReplace == 0) println "Warning found no replacement for id=$idRegex or name=$nameRegex"
117
			if (nReplace == 0) println "Warning: didn't found any occurrence of id=$idRegex or name=$nameRegex"
118 118
			
119 119
			// Création de la source DOM
120 120
			Source source = new DOMSource(doc);
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 3052)
15 15
	protected BufferedOutputStream output;
16 16
	protected XMLStreamWriter writer;
17 17
	
18
	def additionalSpeakers = [:]
19
	
18 20
	public Vocapia2Transcriber(File xmlfile) {
19 21
		
20 22
		this.xmlfile = xmlfile;
21 23
	}
22 24
	
25
	public void setAddtionalSpeakers(def additionalSpeakers) {
26
		this.additionalSpeakers = additionalSpeakers
27
	}
28
	
23 29
	public boolean process(File outfile) {
24 30
		
25 31
		if (!xmlfile.exists()) return false;
......
84 90
								writer.writeAttribute("scope", "local")
85 91
								writer.writeEndElement()
86 92
								writer.writeCharacters("\n")
93
								
94
								/**
95
								 * remove the additional speaker if already written
96
								 */
97
								if (additionalSpeakers.containsKey(parser.getAttributeValue(null, "spkid"))) {
98
									additionalSpeakers.remove(parser.getAttributeValue(null, "spkid"))
99
								}
87 100
								break;
88 101
								
89 102
							case "Channel":
......
155 168
							
156 169
							case "SpeakerList": // <SpeakerList> -> <Speakers>
157 170
							
171
								// write additional speakers 
172
								for (String spkid : additionalSpeakers.keySet()) {
173
									writer.writeStartElement("Speaker")
174
									writer.writeAttribute("id", spkid)
175
									writer.writeAttribute("name", spkid)
176
									writer.writeAttribute("check", "")
177
									writer.writeAttribute("dialect", "")
178
									writer.writeAttribute("accent", spkid)
179
									writer.writeAttribute("scope", spkid)
180
									writer.writeEndElement()
181
									writer.writeCharacters("\n")
182
								}
183
							
158 184
								writer.writeEndElement()
159 185
								writer.writeCharacters("\n")
160 186
								break

Formats disponibles : Unified diff