Révision 2996

tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/MainCorpus.java (revision 2996)
38 38
import java.util.Set;
39 39
import java.util.UUID;
40 40

  
41
import org.apache.commons.lang.StringUtils;
41 42
import org.eclipse.core.runtime.IProgressMonitor;
42 43
import org.eclipse.osgi.util.NLS;
43 44
import org.txm.Toolbox;
44 45
import org.txm.core.messages.TXMCoreMessages;
45 46
import org.txm.core.results.TXMParameters;
46 47
import org.txm.importer.cwb.PatchCwbRegistry;
48
import org.txm.importer.cwb.ReadRegistryFile;
47 49
import org.txm.objects.CorpusBuild;
48 50
import org.txm.objects.Project;
49 51
import org.txm.searchengine.core.messages.SearchEngineCoreMessages;
......
109 111
	 * 
110 112
	 * @param parametersNodePath the preference node path
111 113
	 * 
112
	 * @throws InvalidCqpIdException the invalid cqp id exception
114
	 * @throws InvalidCqpIdException the invalid CQP id exception
113 115
	 * @throws CqiClientException the cqi client exception
114 116
	 */
115 117
	public MainCorpus(String parametersNodePath) throws InvalidCqpIdException, CqiClientException {
......
117 119
		
118 120
		if (getID() != null && getProjectDirectory() != null) {
119 121
			try {
120
				compute(false);
122
				if (!compute(false)) {
123
					Log.warning(NLS.bind("Warning: the {0} corpus won't work correctly", this.getID()));
124
					throw new IllegalStateException(NLS.bind("{0} CQP MainCorpus not instanciate correctly.", this.getID()));
125
				}
121 126
			}
122 127
			catch (InterruptedException e) {
123
				// TODO Auto-generated catch block
124
				e.printStackTrace();
128
				throw new IllegalStateException(e);
125 129
			}
126 130
		}
127 131
	}
......
161 165
			return false;
162 166
		}
163 167
		try {
164
			try {
168
			try { // fix the absolute paths in the registry file
165 169
				PatchCwbRegistry.patch(this.registryFile, this.dataDirectory);
166 170
			}
167 171
			catch (IOException e) {
......
170 174
				return false;
171 175
			}
172 176
			
177
			// check if all corpus index files are present
178
			ReadRegistryFile rrf = new ReadRegistryFile(this.registryFile);
179
			ArrayList<String> errors = rrf.isCorpusBuildValid(this.dataDirectory);
180
			if (errors.size() > 0) {
181
				Log.warning(TXMCoreMessages.bind("Error: some {0} index files are missing : {1}.", this.getID(), StringUtils.join(errors, ", ")));
182
				return false;
183
			}
184
			
173 185
			Log.fine(NLS.bind("Call CQI: load_a_system_corpus with {0} and {1}", this.registryFile.getParent(), this.pID));
174 186
			CQPSearchEngine.getCqiClient().load_a_system_corpus(this.registryFile.getParent(), this.pID);
175 187
			
176
			List tmp = Arrays.asList(CQPSearchEngine.getCqiClient().listCorpora());
188
			List<String> tmp = Arrays.asList(CQPSearchEngine.getCqiClient().listCorpora());
177 189
			if (tmp.contains(this.pID)) {
178 190
				Log.fine("Corpus registered: " + pID);
179 191
				Log.fine(NLS.bind("Call CQI: corpusProperties with {0}.", this.pID));
tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/ReadRegistryFile.java (revision 2996)
6 6
import java.util.HashSet;
7 7

  
8 8
import org.txm.utils.io.IOUtils;
9
import org.txm.utils.logger.Log;
9 10

  
10 11
/**
11 12
 * Read a registry file a retrive the declared p attributes and s attributes informations.
......
13 14
 * Call constructor then use : getPattributes and getSattributes for cwb-encode
14 15
 * 
15 16
 * or use getSattributesMap, getSattributeProfs and getAnatypes() to get the declared attributes
17
 * 
16 18
 * @author mdecorde
17 19
 *
18 20
 */
19 21
public class ReadRegistryFile {
22
	
20 23
	File registryFile;
24
	
21 25
	ArrayList<String> pAttributes;
26
	
22 27
	ArrayList<String> sAttributes;
28
	
23 29
	HashMap<String, HashSet<String>> sattrs;
30
	
24 31
	HashMap<String, Integer> sattrsProfs;
25 32
	
26 33
	public ReadRegistryFile(File registryFile) {
......
34 41
	public void read() {
35 42
		pAttributes = new ArrayList();
36 43
		sAttributes = new ArrayList();
37
		sattrs = new HashMap<String, HashSet<String>>();
38
		sattrsProfs = new HashMap<String, Integer>();
44
		sattrs = new HashMap<>();
45
		sattrsProfs = new HashMap<>();
39 46
		
40 47
		for (String line : IOUtils.getLines(registryFile, System.getProperty("file.encoding"))) {
41 48
			line = line.trim(); // remove first tab
42

  
49
			
43 50
			if (line.startsWith("ATTRIBUTE ")) {
44 51
				line = line.substring(10); // remove 'ATTRIBUTE '
45 52
				pAttributes.add(line);
46
			} else if (line.startsWith("STRUCTURE ")) {
53
			}
54
			else if (line.startsWith("STRUCTURE ")) {
47 55
				line = line.substring(9); // remove 'STRUCTURE '
48 56
				line = line.replaceAll("\\#.*", "");
49 57
				line = line.trim();
50 58
				String[] split = line.split("_", 2);
51 59
				String sname = split[0];
52
				//println split
60
				// println split
53 61
				if (split.length == 1) { // sattr decl
54
					if (sname.matches(".+[1-9]") && sattrs.containsKey(sname.substring(0, sname.length()-1))) { // recursive structure
55
						sname = sname.substring(0, sname.length()-1);
56
						sattrsProfs.put(sname, sattrsProfs.get(sname)+1);
57
					} else {
62
					if (sname.matches(".+[1-9]") && sattrs.containsKey(sname.substring(0, sname.length() - 1))) { // recursive structure
63
						sname = sname.substring(0, sname.length() - 1);
64
						sattrsProfs.put(sname, sattrsProfs.get(sname) + 1);
65
					}
66
					else {
58 67
						sattrs.put(sname, new HashSet<String>());
59 68
						sattrsProfs.put(sname, 0);
60 69
					}
61
				} else {
70
				}
71
				else {
62 72
					String satt = split[1];
63
					if (satt.matches(".+[1-9]") && sattrs.get(sname).contains(satt.substring(0, satt.length()-1))) {
73
					if (satt.matches(".+[1-9]") && sattrs.get(sname).contains(satt.substring(0, satt.length() - 1))) {
64 74
						// recursive attribute -> to be ignored
65
					} else {
75
					}
76
					else {
66 77
						sattrs.get(sname).add(satt);
67 78
					}
68 79
				}
......
70 81
		}
71 82
		
72 83
		for (String sattr : sattrs.keySet()) {
73
			String tmp = ""+sattr+":"+sattrsProfs.get(sattr);
84
			String tmp = "" + sattr + ":" + sattrsProfs.get(sattr);
74 85
			for (String attr : sattrs.get(sattr)) {
75
				tmp += "+"+attr;
86
				tmp += "+" + attr;
76 87
			}
77 88
			sAttributes.add(tmp);
78 89
		}
79 90
	}
80 91
	
81 92
	/**
93
	 * Test the CQP index files of each p-attribute and s-attribute properties
82 94
	 * 
95
	 * @param dataDirectory the directory where the binary files should be found
96
	 * @return true if all CQP files are present
97
	 */
98
	public ArrayList<String> isCorpusBuildValid(File dataDirectory) {
99
		if (pAttributes == null) {
100
			read();
101
		}
102
		
103
		ArrayList<String> errors = new ArrayList<>();
104
		
105
		// test p-attributes
106
		String[] exts = { ".corpus", ".lexicon", ".corpus.cnt", ".corpus.rdx", ".corpus.rev", ".lexicon.idx", ".lexicon.srt" };
107
		for (String p : pAttributes) {
108
			for (String ext : exts) {
109
				File f = new File(dataDirectory, p + ext);
110
				if (!f.exists()) {
111
					// System.out.println("MISSING: " + f.exists() + " " + f.getAbsolutePath());
112
					errors.add(f.getName());
113
				}
114
			}
115
		}
116
		
117
		String[] sexts = { ".rng" };
118
		String[] spexts = { ".avs", ".avx", ".rng" };
119
		for (String s : sattrs.keySet()) {
120
			for (String ext : sexts) {
121
				File f = new File(dataDirectory, s + ext);
122
				if (!f.exists()) {
123
					// System.out.println("MISSING: " + f.exists() + " " + f.getAbsolutePath());
124
					errors.add(f.getName());
125
				}
126
			}
127
			
128
			for (String sp : sattrs.get(s)) {
129
				for (String ext : spexts) {
130
					File f = new File(dataDirectory, s + "_" + sp + ext);
131
					if (!f.exists()) {
132
						// System.out.println("MISSING: " + f.exists() + " " + f.getAbsolutePath());
133
						errors.add(f.getName());
134
					}
135
				}
136
			}
137
		}
138
		return errors;
139
	}
140
	
141
	/**
142
	 * 
83 143
	 * @return the cwb-encode arguments for p attributes
84 144
	 */
85 145
	public ArrayList<String> getPAttributes() {
......
111 171
	}
112 172
	
113 173
	public static void main(String[] args) {
114
		File registry = new File(System.getProperty("user.home"), "runtime-rcpapplication.product/corpora/ANNOTATIONCONC/registry/annotationconc");
174
		File registry = new File(System.getProperty("user.home"), "runtime-rcpapplication.product/corpora/VOEUX/registry/voeux");
175
		File data = new File(System.getProperty("user.home"), "runtime-rcpapplication.product/corpora/VOEUX/data/VOEUX");
115 176
		ReadRegistryFile reader = new ReadRegistryFile(registry);
116
		System.out.println("pAttributes: "+reader.getPAttributes());
117
		System.out.println("sAttributes Map: "+reader.getSAttributesMap());
118
		System.out.println("sAttributes: "+reader.getSAttributes());
177
		System.out.println("pAttributes: " + reader.getPAttributes());
178
		System.out.println("sAttributes Map: " + reader.getSAttributesMap());
179
		System.out.println("sAttributes: " + reader.getSAttributes());
180
		
181
		System.out.println("Validation: " + reader.isCorpusBuildValid(data));
119 182
	}
120 183
}
tmp/org.txm.core/src/java/org/txm/objects/Project.java (revision 2996)
567 567
					result.setUserPersistable(true);
568 568
				}
569 569
			}
570
			catch (Exception e) {
571
				// TODO Auto-generated catch block
572
				e.printStackTrace();
570
			catch (Throwable e) {
571
				Log.warning("Internal error: could not initialize result: " + resultNodePath + ": " + e);
572
				Log.printStackTrace(e);
573 573
			}
574 574
		}
575 575
		if (errors.size() > 0) {
tmp/org.txm.concordance.core/src/org/txm/concordance/core/functions/Concordance.java (revision 2996)
207 207
	public boolean loadParameters() {
208 208
		
209 209
		try {
210
			
210
			if (!getCorpus().hasBeenComputedOnce()) {
211
				Log.warning("Error: CQP corpus is not available: " + getCorpus());
212
				return false;
213
			}
211 214
			this.setQuery(this.getStringParameterValue(ConcordancePreferences.QUERY));
212 215
			
213 216
			this.setCQLSeparator(this.getCorpus().getCQLLimitQuery());
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 2996)
1
package org.txm.macro.transcription
2

  
3
import javax.xml.stream.*
4

  
5
import org.txm.importer.PersonalNamespaceContext
6

  
7
import java.io.BufferedOutputStream
8
import java.io.FileOutputStream
9
import java.net.URL
10
import java.util.regex.Pattern
11

  
12
class Vocapia2Transcriber {
13
	
14
	File xmlfile;
15
	protected BufferedOutputStream output;
16
	protected XMLStreamWriter writer;
17
	
18
	public Vocapia2Transcriber(File xmlfile) {
19
		
20
		this.xmlfile = xmlfile;
21
	}
22
	
23
	public boolean process(File outfile, boolean retokenizeWords) {
24
		
25
		if (!xmlfile.exists()) return false;
26
		
27
		output = new BufferedOutputStream(new FileOutputStream(outfile), 16 * 1024);
28
		writer = XMLOutputFactory.newInstance().createXMLStreamWriter(output, "ISO-8859-1");// create a new file
29
		writer.setNamespaceContext(new PersonalNamespaceContext());
30
		
31
		URL url = xmlfile.toURI().toURL();
32
		String filename = outfile.getName()
33
		filename = filename.substring(0, filename.length()-4); // remove ".cqp"
34
		def inputData = url.openStream();
35
		XMLInputFactory factory = XMLInputFactory.newInstance();
36
		XMLStreamReader parser = factory.createXMLStreamReader(inputData);
37
		
38
		boolean flagWord = false
39
		def winfos = [:]
40
		boolean other = false;
41
		String word = ""
42
		try {
43
			
44
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
45
				
46
				switch (event) {
47
					case XMLStreamConstants.START_ELEMENT:
48
						String localname = parser.getLocalName()
49
						switch(localname) {
50
							case "AudioDoc": // <AudioDoc name="xyz" path="xyz.flac"> -> <Trans scribe="see Proc elements" audio_filename="xyz.flac" version="see Proc elements" version_date="see Proc elements">
51
							
52
								writer.writeStartDocument("ISO-8859-1", "1.0")
53
							
54
								writer.writeStartElement("Trans")
55
								writer.writeAttribute("audio_filename", parser.getAttributeValue(null, "path"))
56
								break
57
							
58
							case "Proc": // <Proc name="scribe" version="date" editor="AAA"/>
59
							//continue writing the "Trans" element
60
								if ("scribe" == parser.getAttributeValue(null, "name")) {
61
									writer.writeAttribute("scribe", parser.getAttributeValue(null, "editor"))
62
									writer.writeAttribute("version", parser.getAttributeValue(null, "version"))
63
									writer.writeAttribute("version_date", parser.getAttributeValue(null, "version"))
64
								}
65
							
66
								break;
67
							
68
							case "SpeakerList": // <SpeakerList> -> <Speakers>
69
								writer.writeCharacters("\n") // after <Trans>
70
								writer.writeStartElement("Speakers")
71
								writer.writeCharacters("\n")
72
							
73
								break;
74
							case "Speaker": // <Speaker ch="1" dur="531.38" gender="X" spkid="Enquêtrice" lang="fre" lconf="1.00" nw="1586" tconf="0.95"/> -> <Speaker id="spk1" name="enq4" check="no" dialect="native" accent="" scope="local"/>
75
							
76
								writer.writeStartElement("Speaker")
77
								writer.writeAttribute("id", parser.getAttributeValue(null, "spkid"))
78
								writer.writeAttribute("name", parser.getAttributeValue(null, "spkid"))
79
								writer.writeAttribute("check", "")
80
								writer.writeAttribute("dialect", parser.getAttributeValue(null, "lang"))
81
								writer.writeAttribute("accent", parser.getAttributeValue(null, "gender"))
82
								writer.writeAttribute("scope", "local")
83
								writer.writeEndElement()
84
								writer.writeCharacters("\n")
85
								break;
86
							
87
							case "SegmentList":
88
								writer.writeStartElement("Episode")
89
							//<Section type="report" startTime="0" endTime="3617.593">
90
								writer.writeStartElement("Section")
91
								break;
92
							
93
							case "SpeechSegment": // <SpeechSegment ch="1" sconf="1.00" stime="9.94" etime="43.81" spkid="Enquêtrice" lang="fre" lconf="1.00" trs="1">
94
								writer.writeStartElement("Turn")
95
								writer.writeAttribute("speaker", parser.getAttributeValue(null, "spkid"))
96
								writer.writeAttribute("startTime", parser.getAttributeValue(null, "stime"))
97
								writer.writeAttribute("endTime", parser.getAttributeValue(null, "etime"))
98
								writer.writeCharacters("\n")
99
								writer.writeStartElement("Sync")
100
								writer.writeAttribute("time", parser.getAttributeValue(null, "stime"))
101
								writer.writeEndElement() // Sync
102
								writer.writeCharacters("\n")
103
								break;
104
							case "Word":
105
								flagWord = true
106
								word = ""
107
							
108
							// store w infos in case the word must be splited
109
								def endValue = String.format(Locale.US, "%.2f", (Double.parseDouble(parser.getAttributeValue(null, "stime")) + Double.parseDouble(parser.getAttributeValue(null, "dur"))))
110
								winfos = ["time":parser.getAttributeValue(null, "stime"), "start": parser.getAttributeValue(null, "stime"), "end":endValue]
111
								break
112
						}
113
						break;
114
					case XMLStreamConstants.END_ELEMENT:
115
						String localname = parser.getLocalName()
116
						switch(localname) {
117
							case "AudioDoc": // <AudioDoc name="xyz" path="xyz.flac"> -> <Trans scribe="see Proc elements" audio_filename="xyz.flac" version="see Proc elements" version_date="see Proc elements">
118
							
119
								writer.writeEndElement() // Trans
120
								break
121
							
122
							case "Proc": // <Proc name="scribe" version="date" editor="AAA"/>
123
							
124
								break
125
							
126
							case "SpeakerList": // <SpeakerList> -> <Speakers>
127
							
128
								writer.writeEndElement()
129
								writer.writeCharacters("\n")
130
								break
131
							
132
							case "Speaker": // <Speaker ch="1" dur="531.38" gender="X" spkid="Enquêtrice" lang="fre" lconf="1.00" nw="1586" tconf="0.95"/> -> <Speaker id="spk1" name="enq4" check="no" dialect="native" accent="" scope="local"/>
133
							//already closed
134
								break
135
							
136
							case "SegmentList":
137
								writer.writeEndElement() // Section
138
								writer.writeEndElement() // Episode
139
								writer.writeCharacters("\n")
140
								break
141
							
142
							case "SpeechSegment": // <SpeechSegment ch="1" sconf="1.00" stime="9.94" etime="43.81" spkid="Enquêtrice" lang="fre" lconf="1.00" trs="1">
143
								writer.writeEndElement() // Turn
144
								writer.writeCharacters("\n")
145
								break
146
							
147
							case "Word":
148
								flagWord = false
149
								word = word.trim()
150
								if (word.startsWith("*")) {
151
									other = true
152
									word = word.substring(1)
153
								}
154
							
155
								String otherAttributeValue = Boolean.toString(other) // set now
156
							
157
								if (other && word.endsWith("*")) {
158
									word = word.substring(0, word.length()-1)
159
									other = false
160
								}
161
							
162
							// split before the word
163
								def puncts = []
164
								if (retokenizeWords) {
165
									while (word.length() > 0 && word.matches("\\p{Punct}.+")) {
166
										puncts << word.substring(0, 1)
167
										word = word.substring(1, word.length())
168
									}
169
									
170
									Pattern reg = Pattern.compile("([^']+')(.+)")
171
									def m = reg.matcher(word)
172
									while (word.length() > 0 && m.matches()) {
173
										puncts << m.group(1)
174
										word = m.group(2)
175
										m = reg.matcher(word)
176
									}
177
								}
178
								
179
								for (def punct : puncts) { // pre-retokenize if any
180
									writer.writeStartElement("w")
181
									writer.writeAttribute("time", winfos["time"])
182
									writer.writeAttribute("start", winfos["start"])
183
									writer.writeAttribute("end", winfos["start"])
184
									writer.writeCharacters(punct)
185
									writer.writeEndElement() // w
186
									writer.writeCharacters("\n")
187
									
188
									
189
								}
190
							
191
								puncts = []
192
								if (retokenizeWords) {
193
									while (word.length() > 0 && word.matches(".+\\p{Punct}")) {
194
										puncts << word.substring(word.length()-1, word.length())
195
										word = word.substring(0, word.length()-1)
196
									}
197
								}
198
							
199
								writer.writeStartElement("w") // start the initial word
200
								writer.writeAttribute("time", winfos["time"])
201
								writer.writeAttribute("start", winfos["start"])
202
								writer.writeAttribute("end", winfos["end"])
203
								writer.writeAttribute("other", otherAttributeValue)
204
							
205
								writer.writeCharacters(word)
206
								writer.writeEndElement() // w
207
								writer.writeCharacters("\n")
208
							
209
								for (String punct : puncts) {  // post-retokenize if any
210
									writer.writeStartElement("w")
211
									writer.writeAttribute("time", winfos["time"])
212
									writer.writeAttribute("start", winfos["end"])
213
									writer.writeAttribute("end", winfos["end"])
214
									writer.writeCharacters(punct)
215
									writer.writeEndElement() // w
216
									writer.writeCharacters("\n")
217
								}
218
								break
219
						}
220
						break
221
					
222
					case XMLStreamConstants.CHARACTERS:
223
						if (flagWord) {
224
							word += parser.getText()
225
						}
226
						break
227
				}
228
			}
229
		} catch (Exception e) {
230
			println "Error while processing XML File "+xmlfile+": "
231
			e.printStackTrace();
232
			println "At: "+parser.getLocation();
233
			println "See: "+outfile.getAbsolutePath()
234
		}
235
		
236
		output.flush()
237
		writer.close()
238
		output.close()
239
		//writer.close()
240
		parser.close()
241
		//println "$xmlfile -> $outfile"
242
		return true;
243
	}
244
	
245
	public static void main(String[] args) {
246
		File infile = new File("/home/mdecorde/xml/vocapia","test.xml")
247
		File outfile = new File("/home/mdecorde/xml/vocapia","test.trs")
248
		def processor = new Vocapia2Transcriber(infile)
249
		println processor.process(outfile)
250
	}
251
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2TranscriberMacro.groovy (revision 2996)
1
package org.txm.macro.transcription
2

  
3
import java.time.LocalTime
4
import java.time.format.DateTimeFormatter
5
import org.txm.utils.*
6
import org.txm.utils.logger.*
7

  
8
@Field @Option(name="vocapiaFile", usage="A single vocapia XML file", widget="FileOpen", required=false, def="")
9
		File vocapiaFile;
10

  
11
@Field @Option(name="vocapiaDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
12
		File vocapiaDirectory;
13

  
14
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="")
15
		File resultDirectory;
16
		
17
@Field @Option(name="retokenize_words", usage="retokenize words prefixed or postfixed with puunctuations", widget="Boolean", required=true, def="true")
18
		Boolean retokenize_words;
19

  
20
if (!ParametersDialog.open(this)) return;
21

  
22
resultDirectory.mkdirs();
23

  
24
def xmlFiles = []
25
if (vocapiaDirectory != null && vocapiaDirectory.exists()) {
26
	
27
	println "Processing directory: $vocapiaDirectory"
28
	for (File file : vocapiaDirectory.listFiles()) {
29
		if (file.getName().toLowerCase().endsWith(".xml")) {
30
			xmlFiles << file
31
		}
32
	}
33
} else if (vocapiaFile != null && vocapiaFile.exists()) {
34
	println "Processing file: $vocapiaFile"
35
	xmlFiles << vocapiaFile
36
}
37

  
38
if (xmlFiles.size() == 0) {
39
	println "No XML file found for parameters vocapiaFile=$vocapiaFile and vocapiaDirectory=$vocapiaDirectory"
40
	return false
41
}
42

  
43
ConsoleProgressBar cpb = new ConsoleProgressBar(xmlFiles.size())
44
for (File xmlFile : xmlFiles) {
45
	cpb.tick()
46
	Vocapia2Transcriber v2t = new Vocapia2Transcriber(xmlFile)
47
	String name = FileUtils.stripExtension(xmlFile)
48
	File outFile = new File(resultDirectory, name+".trs")
49
	
50
	if (!v2t.process(outFile, retokenize_words)) {
51
		println "WARNING: ERROR WHILE PROCESSING: "+xmlFile
52
		return false
53
	}
54
}
55
cpb.done()
56

  
57
println "Done: "+xmlFiles.size()+" files processed. Result files in $resultDirectory"
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/stats/InvertCAXAxisMacro.groovy (revision 2996)
1
// @author Sebastien Jacquot
2
// STANDARD DECLARATIONS
3
package org.txm.macro
4

  
5

  
6
import org.kohsuke.args4j.*
7
import groovy.transform.Field
8
import org.txm.rcpapplication.swt.widget.parameters.*
9
import org.txm.searchengine.cqp.clientExceptions.*
10
import org.txm.searchengine.cqp.corpus.*
11
import org.txm.searchengine.cqp.corpus.query.*
12
import org.apache.commons.lang.time.StopWatch
13
import java.util.Arrays
14
import org.jfree.chart.renderer.xy.*
15
import org.jfree.chart.renderer.*
16
import org.jfree.chart.plot.*
17
import org.jfree.data.xy.*
18
import org.jfree.chart.axis.*
19
import java.awt.*;
20
import java.awt.geom.*;
21
import org.jfree.chart.labels.*
22

  
23
import org.txm.ca.core.chartsengine.jfreechart.themes.highcharts.renderers.*
24
import org.txm.ca.rcp.editors.*
25
import org.txm.libs.office.ReadODS
26
import org.txm.ca.core.chartsengine.jfreechart.datasets.*
27
import org.jfree.chart.renderer.AbstractRenderer
28

  
29
println "editor: "+editor
30

  
31
if (!(editor instanceof CAEditor)) {
32
	println "editor is not a CA editor: $editor, Run the macro with F12 when the editor is selected :-)"
33
	return
34
}
35

  
36
ica = editor.getCA();
37
chart = ica.getChart();
38
plot = chart.getXYPlot();
39
dataset = plot.getDataset(); 
40

  
41
// overrides some dataset methods to return inverted X coordinates for columns and rows
42
plot.setDataset(new CAXYDataset(ica) {
43

  
44
        public Number getX(int series, int item) {
45
                if(item == -1)        {
46
                        System.out.println("CAXYDataset.getX()");
47
                }
48
                // Rows
49
                if(series == 0) {
50
                        return -this.rowCoordinates[item][this.axis1];
51
                }
52
                // Cols
53
                else {
54
                        return -this.columnCoordinates[item][this.axis1];
55
                }
56
        }
57
        
58
        
59
         
60
        /**
61
         * Gets the minimum value in the specified series according to the specified axis.
62
         * @param series
63
         * @param axis
64
         * @return
65
         */
66
        public double getMinValue(int series, int axis)        {
67
                double minValue = 0;
68
                double tmpMinValue;
69
                double[][] coordinates = this.rowCoordinates;
70
                if(series != 0)        {
71
                        coordinates = this.columnCoordinates;
72
                }
73
                
74
                for(int i = 0; i < coordinates.length; i++) {
75
                        tmpMinValue = coordinates[i][axis];
76
                        
77
						// invert X coordinate
78
                		if(axis == 0)        {
79
                			tmpMinValue = -tmpMinValue;
80
                		}
81
                        
82
                        if(tmpMinValue < minValue)        {
83
                                minValue = tmpMinValue;
84
                        }
85
                }
86
                
87
                return minValue;
88
        }
89
        
90
        /**
91
         * Gets the maximum value in the specified series according to the specified axis.
92
         * @param series
93
         * @param axis
94
         * @return
95
         */
96
        public double getMaxValue(int series, int axis)        {
97
                double maxValue = 0;
98
                double tmpMaxValue;
99
                double[][] coordinates = this.rowCoordinates;
100
                if(series != 0)        {
101
                        coordinates = this.columnCoordinates;
102
                }
103
                
104
                for(int i = 0; i < coordinates.length; i++) {
105
                        tmpMaxValue = coordinates[i][axis];
106
                        
107
						// invert X coordinate
108
                		if(axis == 0)        {
109
                			tmpMaxValue = -tmpMaxValue;
110
                		}
111
                        
112
                        if(tmpMaxValue > maxValue)        {
113
                                maxValue = tmpMaxValue;
114
                        }
115
                }
116
                
117
                return maxValue;
118
        }
119
}
120
);
121

  
122
// update the limits dotted borders
123
ica.getChartCreator().createCAFactorialMapChartLimitsBorder(chart);
124

  
125

  
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/TRSToTEI.groovy (revision 2996)
163 163
	}
164 164

  
165 165
	
166
	boolean ignoreFirstSync = false // need to skip fist Sync when multiple locutors in Turn
166
	boolean overlapingTurn = false // need to skip fist Sync when multiple locutors in Turn
167 167
	/**
168 168
	 * Process.
169 169
	 *
......
182 182
			case "Turn": // >> sp
183 183
				testCloseU();
184 184
				vSpeaker = parser.getAttributeValue(null, "speaker");
185
				overlapingTurn = false
185 186
				if (vSpeaker == null) { vSpeaker="N/A"	// no spk
186 187
				} else {
187
					localspeakers = vSpeaker.split(" ")
188
					if (localspeakers.size() == 0) { // only one speaker
189
						//println "FOUND ONE SPEAKER"
190
						if (speakersname.containsKey(vSpeaker)) {
191
							vSpeaker = speakersname.get(vSpeaker);
188
					
189
					if (speakersname.containsKey(vSpeaker)) {
190
						//vSpeaker = speakersname.get(vSpeaker);
191
					} else {
192
						localspeakers = vSpeaker.split(" ")
193
						if (localspeakers.size() > 1) { // only one speaker
194
							overlapingTurn = true
192 195
						}
193 196
					}
194 197
				}
195 198

  
196 199
				writer.writeStartElement("sp")
197 200
				writer.writeAttribute("n", Integer.toString(idturn++))
198
				ignoreFirstSync = vSpeaker.contains(" ") // need to skip fist Sync when multiple locutors in Turn
199
				writer.writeAttribute("overlap", ""+ignoreFirstSync)
201
				overlapingTurn = vSpeaker.contains(" ") // need to skip fist Sync when multiple locutors in Turn
202
				writer.writeAttribute("overlap", ""+overlapingTurn)
200 203
				
201 204
				String time = parser.getAttributeValue(null, "startTime");
202 205
				formatedTime = formatTime(time)
......
217 220
			case "Sync": // >> u
218 221
				lastTime = parser.getAttributeValue(null, "time")
219 222
				testCloseU();
220
				if (ignoreFirstSync) { // need to skip fist Sync when multiple locutors in Turn
221
					ignoreFirstSync = false;
222
				} else {
223
//				if (overlapingTurn) { // need to skip fist Sync when multiple locutors in Turn
224
//					overlapingTurn = false;
225
//				} else {
226
//					
227
//				}
228
				if (!overlapingTurn) {
223 229
					writeU()
224 230
				}
225 231
				break;
......
279 285
	private testCloseU() {
280 286
		if (uOpened) {
281 287
			super.processEndElement(); // u
288
			writer.writeCharacters("\n")
282 289
			uOpened = false;
283 290
		}
284 291
	}
......
295 302
//		println "getting spk name? ="+speakers.get(vSpeaker)
296 303
//		println "speakers: $speakers"
297 304
		def attributes = speakers.get(vSpeaker)
305
		//println "ATTRIBUTES="+attributes+" vSpeaker='$vSpeaker'"
298 306
		if (attributes == null) { // in case of Who@n wrong number
299
			if (vSpeaker.startsWith("#") && vSpeaker.endsWith("?")) { // don't show "N/A" vSpeaker
307
//			if (vSpeaker.startsWith("#") && vSpeaker.endsWith("?")) { // don't show "N/A" vSpeaker
300 308
				writer.writeAttribute("who", vSpeaker)
301 309
				writer.writeAttribute("spkid", vSpeaker)
302
				writeAttributes();
303
			}
310
				//writeAttributes();
311
//			} else {
312
//				
313
//			}
304 314
		} else {
305 315
			for (Pair p : attributes) {
306 316
//				println " write attribute "+p.getFirst()+" "+p.getSecond()
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 2996)
424 424
									def g = l2[i]
425 425
									metadata[m] = "" // forcing order of metadata by pre-declaring
426 426
									
427
									if (!metadataGroups.containsKey(g)) metadataGroups[g] = []
428
									
427
									if (!metadataGroups.containsKey(g)) {
428
										metadataGroups[g] = []
429
									}
429 430
									metadataGroups[g] << m // declaring a metadata type
430 431
								}
431 432
								metadataDeclared = true
432 433
							}
433 434
						
434
							//store attributes values in HashMap
435
						//store attributes values in HashMap
435 436
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
436 437
								String name = parser.getAttributeLocalName(i)
437 438
								if (!"type".equals(name)
......
441 442
								&& !"startTime".equals(name)
442 443
								&& !"endTime".equals(name)) {
443 444
									metadata[name] = parser.getAttributeValue(i)
444
								
445
									
445 446
									if (!metadataDeclared && !metadataGroups["metadata"].contains(name)) {
446 447
										metadataGroups["metadata"] << name
447 448
									}
448 449
								}
449 450
							}
450 451
						
451
							// write metadata HTML
452
						// write metadata HTML
452 453
							if (metadataGroups.keySet().size() > 0) {
453 454
								writer.writeStartElement("p")
454 455
								writer.writeAttribute("class", "section-all-metadata");
......
497 498
							writer.writeAttribute("class", "turn");
498 499
						
499 500
							overlapping = ("true" == parser.getAttributeValue(null,"overlap"))
500
							String spid = parser.getAttributeValue(null,"speaker");
501
							String spid = parser.getAttributeValue(null, "who");
501 502
						
502 503
							whos = []
503 504
							if (overlapping) {
504
								writer.writeEmptyElement("br");
505
								writeSpeaker(parser.getAttributeValue(null,"speaker"), false)
505
								//writer.writeEmptyElement("br"); // write all overlaping speakers
506
								//writeSpeaker(""+parser.getAttributeValue(null, "who"), false)
506 507
								
507 508
								writer.writeEmptyElement("br");
508 509
								whos = spid.split(" ")
......
521 522
								writer.writeEmptyElement("br");
522 523
							}
523 524
						
524
							String spk = parser.getAttributeValue(null, "spk")
525
							String spk = parser.getAttributeValue(null, "who")
525 526
							if (spk != null && spk != previousSPK) {
526 527
								endBoldIfNeeded()
527 528
								writer.writeEmptyElement("br");
528
								writeSpeaker(parser.getAttributeValue(null, "spk"), overlapping)
529
								writeSpeaker(parser.getAttributeValue(null, "who"), overlapping)
529 530
								startBoldIfNeeded()
530 531
							}
531 532
						
532 533
							writeCurrentTime()
533 534
							previousSPK = spk
535
							if (overlapping) previousSPK = null
534 536
						
535 537
						//							writenLength = 0;
536 538
						/*writer.writeStartElement("span");
......
546 548
							desc = translateEvent(desc);
547 549
							String type = parser.getAttributeValue(null,"type");
548 550
							if (desc.equals("paroles rapportées")) {
549
								if (parser.getAttributeValue(null, "extent") == "end")
551
								if (parser.getAttributeValue(null, "extent") == "end") {
550 552
									writer.writeCharacters("» ");
551
								else if (parser.getAttributeValue(null, "extent") == "begin")
553
								}
554
								else if (parser.getAttributeValue(null, "extent") == "begin") {
552 555
									writer.writeCharacters(" «");
556
								}
553 557
							} else {
554 558
								writer.writeStartElement("span");
555 559
								writer.writeAttribute("class", "event");
......
564 568
									events.add(desc)
565 569
								}
566 570
								else if (parser.getAttributeValue(null, "extent") == "previous") {
567
									if(parser.getAttributeValue(null, "type") == "pronounce")
571
									if (parser.getAttributeValue(null, "type") == "pronounce")
568 572
										writer.writeCharacters("_["+desc+"] ");
569 573
									else
570 574
										writer.writeCharacters("_["+desc+"] ");
......
574 578
									writer.writeCharacters(" ["+desc+"]_");
575 579
									nextEvent = desc
576 580
								}
577
								else
581
								else {
578 582
									writer.writeCharacters(" ["+desc+"] ");
583
								}
579 584
								writer.writeEndElement(); // span@class=event
580 585
							}
581 586
							break;
......
692 697
							if(l > 0)
693 698
								endOfLastWord = lastword.subSequence(l-1, l);
694 699
						
695
							if(interpvalue != null)
700
							if (interpvalue != null) {
696 701
								interpvalue = interpvalue.replace("\"","&quot;");
697
							if(events.size() > 0)
702
							}
703
							if (events.size() > 0) {
698 704
								interpvalue = interpvalue.replace("event=", "event="+events.toString().replace("\"","&quot;")); // remove ", "
699
						
700
							if(nextEvent.length() > 0)
701
							{
705
							}
706
							if (nextEvent.length() > 0) {
702 707
								interpvalue = interpvalue.replace("event=", "event="+nextEvent+", ")
703 708
								nextEvent = ""
704 709
							}
......
709 714
						//							println "NoSpaceAfter: "+NoSpaceAfter+" contains ? "+lastword
710 715
						//							println "wordvalue starts with '-' ? "+wordvalue
711 716
						//							println "NoSpaceAfter: "+NoSpaceAfter+" contains endOfLastWord ? "+endOfLastWord
712
							if(NoSpaceBefore.contains(wordvalue) ||
717
							if (NoSpaceBefore.contains(wordvalue) ||
713 718
							NoSpaceAfter.contains(lastword) ||
714 719
							wordvalue.startsWith("-") ||
715 720
							NoSpaceAfter.contains(endOfLastWord)) {
......
755 760
					break;
756 761
				
757 762
				case XMLStreamConstants.CHARACTERS:
758
					if(flagform)
759
						if(parser.getText().length() > 0)
763
					if (flagform) {
764
						if (parser.getText().length() > 0) {
760 765
							wordvalue+=(parser.getText().trim());
761
					if(flaginterp)
762
						if(parser.getText().length() > 0)
766
						}
767
					}
768
					if (flaginterp) {
769
						if (parser.getText().length() > 0) {
763 770
							interpvalue+=(parser.getText().trim());
771
						}
772
					}
764 773
					break;
765 774
			}
766 775
		}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 2996)
290 290
	public boolean process() {
291 291
		
292 292
		try {
293
			def anaValues = [:]
294
			def anaType = ""
295
			def anaResp = ""
296
			def anaValue = new StringBuilder()
297
			
293 298
			boolean flagNote = false;
294 299
			String noteContent = "";
295 300
			String rend = ""
......
446 451
								break;
447 452
							case wordTag:
448 453
								wordid = getAttributeValue(parser, null,"id");
449
							
454
								anaValues.clear()
450 455
								wordcount++;
451 456
								if (wordcount >= wordmax) {
452 457
									createNextOutput();
......
460 465
								break;
461 466
							case "ana":
462 467
								flaginterp=true;
463
								interpvalue+=" "+getAttributeValue(parser, null, "type").substring(1)+":"
468
								anaType = getAttributeValue(parser, null, "type").substring(1)
469
								anaResp = getAttributeValue(parser, null, "resp").substring(1)
470
								anaValue.setLength(0)
464 471
								break;
465 472
							case "form":
466 473
								wordvalue=""
467
								interpvalue =""
468 474
								flagform=true
469 475
								break;
470 476
							default:
......
540 546
								break;
541 547
							case "ana":
542 548
								flaginterp = false
549
								if (anaValues[anaType] == null || "src".equals(anaResp)) {
550
									anaValues[anaType] = anaValue.toString().trim()
551
								}
543 552
								break;
544 553
							case wordTag:
545 554
								int l = lastword.length();
......
548 557
									endOfLastWord = lastword.subSequence(l-1, l)
549 558
								}
550 559
							
551
								if (interpvalue != null) {
552
									interpvalue = interpvalue
553
								}
560
								String interpvalue = anaValues.entrySet().join(", ")
561
								
554 562
								if (NoSpaceBefore.contains(wordvalue) ||
555 563
								NoSpaceAfter.contains(lastword) ||
556 564
								wordvalue.startsWith("-") ||
......
591 599
								noteContent += parser.getText().replace("\n", " ")
592 600
							}
593 601
						} else	if (flaginterp && parser.getText().length() > 0) {
594
							interpvalue+=(parser.getText())
602
							anaValue.append(parser.getText())
595 603
						} else if (flagNote == parser.getText().length() > 0) {
596 604
							noteContent += parser.getText().replace("\n", " ")
597 605
						} else if (writeOutOfTextToEditText) {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/pager.groovy (revision 2996)
72 72
	/** The wordvalue. */
73 73
	String wordvalue = "";
74 74

  
75
	/** The interpvalue. */
76
	String interpvalue = "";
77

  
78
	/** The lastword. */
75
		/** The lastword. */
79 76
	String lastword = " ";
80 77

  
81 78
	/** The wordtype. */
......
282 279

  
283 280
		String localname = "";
284 281
		createNextOutput();
282
		def anaValues = [:]
283
		def anaType = ""
284
		def anaResp = ""
285
		def anaValue =  new StringBuilder()
285 286
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
286 287
			switch (event) {
287 288
				case XMLStreamConstants.START_ELEMENT:
......
352 353
						case "w":
353 354
						
354 355
							wordid = parser.getAttributeValue(null,"id");
355
							
356
							anaValues.clear()
356 357
							wordcount++;
357 358
							if (wordcount >= wordmax) {
358 359
								createNextOutput();
......
366 367
							break;
367 368
						case "ana":
368 369
							flaginterp=true;
369
							interpvalue+=" "+parser.getAttributeValue(null,"type").substring(1)+":"
370
							anaType = parser.getAttributeValue(null, "type").substring(1)
371
							anaResp = parser.getAttributeValue(null, "resp").substring(1)
372
							anaValue.setLength(0)
370 373
							break;
371 374
						case "form":
372 375
							wordvalue="";
373
							interpvalue ="";
374 376
							flagform=true;
375 377
							break;
376 378
						//						default:
......
406 408
							break;
407 409
						case "ana":
408 410
							flaginterp = false
411
							if (anaValues[anaType] == null || "src".equals(anaResp)) {
412
								anaValues[anaType] = anaValue.toString().trim()
413
							}
409 414
							break;
410 415
						case "w":
411 416
							int l = lastword.length();
......
413 418
							if (l > 0)
414 419
								endOfLastWord = lastword.subSequence(l-1, l);
415 420

  
416
							if (interpvalue != null)
417
								interpvalue = interpvalue;
418

  
421
							String interpvalue = anaValues.entrySet().join(", ")
422
							
419 423
							if (NoSpaceBefore.contains(wordvalue) ||
420 424
							NoSpaceAfter.contains(lastword) ||
421 425
							wordvalue.startsWith("-") ||
......
441 445
						if (flagNote == parser.getText().length() > 0)
442 446
							noteContent += parser.getText().replace("\n", " ");
443 447
					} else	if (flaginterp && parser.getText().length() > 0) {
444
						interpvalue+=(parser.getText());
448
						anaValue.append(parser.getText());
445 449
					} else if (flagNote == parser.getText().length() > 0) {
446 450
						noteContent += parser.getText().replace("\n", " ");
447 451
					}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/importer.groovy (revision 2996)
34 34

  
35 35
import org.txm.*
36 36
import org.txm.scripts.importer.*
37
import org.txm.utils.io.FileCopy
37 38
import org.txm.importer.scripts.filters.*
38 39
import org.txm.objects.*
39 40
import org.txm.scripts.*
......
144 145
		srcDirectory.mkdir()
145 146
		for (File f : okfiles) {
146 147
			File outputFile = new File (srcDirectory, f.getName())
148
//			println "TEMP REMOVED SURROGATE FIX"
149
//			FileCopy.copy(f, outputFile)
147 150
			CleanFile.removeSurrogateFromXmlFile(f, outputFile)
148 151
		}
149 152
		okfiles = srcDirectory.listFiles()

Formats disponibles : Unified diff