Révision 3466

TXM/trunk/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/CompressCQPIndexes.java (revision 3466)
87 87
		
88 88
		ArrayList<String> args = new ArrayList<>(Arrays.asList(huff.getAbsolutePath(), "-T", "-r", registryfile.getParent()));
89 89
		
90
//		ReadRegistryFile rrf = new ReadRegistryFile(registryfile);
91
//		rrf.read();
92
//		for (String p : rrf.pAttributes) {
93
//			args.add("-P");
94
//			args.add(p);
95
//		}
90
		//		ReadRegistryFile rrf = new ReadRegistryFile(registryfile);
91
		//		rrf.read();
92
		//		for (String p : rrf.pAttributes) {
93
		//			args.add("-P");
94
		//			args.add(p);
95
		//		}
96 96
		args.add("-A");
97 97
		args.add(corpusid);
98 98
		
......
108 108
		
109 109
		ArrayList<String> args2 = new ArrayList<>(Arrays.asList(rdxcompressor.getAbsolutePath(), "-T", "-r", registryfile.getParent()));
110 110
		
111
//		for (String p : rrf.pAttributes) {
112
//			File f = new File(dataDirectory, p+".corpus");
113
//			if (f.length() > 0) {
114
//				args2.add("-P");
115
//				args2.add(p);
116
//			}
117
//		}
111
		//		for (String p : rrf.pAttributes) {
112
		//			File f = new File(dataDirectory, p+".corpus");
113
		//			if (f.length() > 0) {
114
		//				args2.add("-P");
115
		//				args2.add(p);
116
		//			}
117
		//		}
118 118
		args2.add("-A");
119 119
		args2.add(corpusid);
120 120
		
......
128 128
			return false;
129 129
		}
130 130
		
131
		// remove .corpus .corpus.rdx and corpus.rev files
131
		// remove .corpus files if the compression was succesful
132 132
		int s = 0;
133 133
		int a = 0;
134 134
		for (File f : dataDirectory.listFiles()) {
135
			if (f.getName().endsWith(".corpus") || f.getName().endsWith(".corpus.rdx") ||f.getName().endsWith(".corpus.rev")) {
136
				s += f.length();
137
				f.delete();
138
				if (txm081fix) f.createNewFile();
135
			if (f.getName().endsWith(".corpus")) {
136
				
137
				
138
				String path = f.getAbsolutePath();
139
				path = path.substring(0, path.length() - 7)+".huf";
140
				File cfile = new File(path);
141
				if (cfile.exists()) {
142
					s += f.length();
143
					f.delete();
144
					if (txm081fix) f.createNewFile();
145
				} else {
146
					continue;
147
				}
139 148
			}
140
			if (f.getName().matches(".+(\\.hcd|\\.huf|\\.huf\\.syn|\\.crc|\\.crx)")) {
149
			if (f.getName().matches(".+(\\.hcd|\\.huf|\\.huf\\.syn)")) {
141 150
				a += f.length();
142 151
			}
143 152
		}
144 153
		
154
		// remove .corpus.rdx and corpus.rev files if the compression was succesful
155
		for (File f : dataDirectory.listFiles()) {
156
			if (f.getName().endsWith(".corpus.rdx") ||f.getName().endsWith(".corpus.rev")) {
157
				
158
				String path = f.getAbsolutePath();
159
				path = path.substring(0, path.length() - 7)+".crc";
160
				File cfile = new File(path);
161
				if (cfile.exists()) {
162
					s += f.length();
163
					f.delete();
164
					if (txm081fix) f.createNewFile();
165
				} else {
166
					continue;
167
				}
168
			}
169
			if (f.getName().matches(".+(\\.crc|\\.crx)")) {
170
				a += f.length();
171
			}
172
		}
173
		
145 174
		System.out.println("cleared: "+s);
146 175
		System.out.println("created: "+a);
147 176
		System.out.println("diff="+(s-a));
148 177
		return true;
149 178
	}
150

  
179
	
151 180
	/**
152 181
	 * The main method.
153 182
	 *
......
156 185
	public static void main(String[] args) {
157 186
		try {
158 187
			String userdir = System.getProperty("user.home");
159
			File tools = new File(userdir, "workspace-cpp/CWB-lib/src/builds/linux-64"); //$NON-NLS-1$
188
			File tools = new File(userdir, "SVN/txm-sf/CWB/cwb-lib/src/builds/linux-64"); //$NON-NLS-1$
160 189
			File registry = new File(userdir, "runtime-rcpapplication.product/corpora/NOV13-P1/registry/nov13-p1"); //$NON-NLS-1$
161 190
			File data = new File(userdir, "runtime-rcpapplication.product/corpora/NOV13-P1/data/NOV13-P1"); //$NON-NLS-1$
162 191
			CompressCQPIndexes.compressAll(tools, registry, "NOV13-P1", data, true);
163

  
192
			
164 193
		} catch (Exception e) {
165 194
			e.printStackTrace();
166 195
		}
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/CreateTheOtherTurns.groovy (revision 3466)
146 146
							boolean shouldCloseOtherTurn = false;
147 147
							
148 148
							def m0 = word =~ startAndEndOtherReg
149
							if (m0.matches()) {
149
							def m1 = word =~ startOtherReg
150
							def m2 = word =~ endOtherReg
151
							
152
							if (word.trim().equals("*")) {
153
								if (debug) println "- ligne "+parser.getLocation().getLineNumber()+" : ouverture|fermeture de other avec '$word' -> tours '$turnInfos'"
154
								
155
								if (other) { // closing *
156
									previousOtherStarting = ["word='*' location="+getLocation(true, false, false)]
157
									
158
									shouldCloseOtherTurn = true;
159
									
160
									wordToWrite = ""
161
								} else {
162
									previousOtherStarting = ["word='*' location="+getLocation(true, false, false)]
163
									
164
									//if (other) { // don't restart a Turn if already in a Other Turn
165
										writer.writeEndElement() // current Turn
166
										writer.writeCharacters("\n")
167
										
168
										def tmpInfos = new LinkedHashMap()
169
										for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr]
170
										tmpInfos["orig-speaker"] = turnInfos["speaker"]
171
										
172
										if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) { // the current speaker is not the primary speaker
173
											tmpInfos["speaker"] = otherNonPrimarySpeakerId
174
										} else {
175
											tmpInfos["speaker"] = primarySpeakerId
176
										}
177
										tmpInfos["startTime"] = currentTime
178
										writer.writeStartElement("Turn")
179
										for (String attr : tmpInfos.keySet()) {
180
											writer.writeAttribute(attr, tmpInfos[attr])
181
										}
182
										writer.writeCharacters("\n")
183
										
184
										writer.writeStartElement("Sync")
185
										writer.writeAttribute("time", tmpInfos["startTime"])
186
										writer.writeCharacters("\n")
187
										writer.writeEndElement()
188
									//}
189
									
190
									other = true
191
									wordToWrite = ""
192
								}
193
							} else if (m0.matches()) {
150 194
								if (other) {
151 195
									warnings << getLocation(true, false, false)+" with $word: Found a starting&ending * when one 'other' have been started at "+previousOtherStarting
152 196
								}
......
188 232
								wordToWrite = m0.group(2)
189 233
								other = false
190 234
								//}
191
							}
192
							
193
							def m1 = word =~ startOtherReg
194
							
195
							if (!m0.matches() && m1.matches()) { // not and start&end but only a start
235
							} else if (m1.matches()) { // not and start&end but only a start
196 236
								
197 237
								if (other) {
198 238
									warnings << getLocation(true, false, false)+" with $word: Found a starting * when one 'other' have been started at "+previousOtherStarting
......
236 276
								other = true
237 277
								wordToWrite = m1.group(2)
238 278
								//}
239
							}
240
							
241
							def m2 = word =~ endOtherReg
242
							if (!m1.matches() && !m0.matches() && m2.matches()) {
279
							} else if (m2.matches()) {
243 280
								if (debug) println "- ligne "+parser.getLocation().getLineNumber()+" : fermeture de other avec '$word' -> tours '$turnInfos'"
244 281
								
245 282
								if (!other) {
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/SegmentTRSInSectionFromMarkerMacro.groovy (revision 3466)
105 105
				String content = null;
106 106
				if (node instanceof String) {
107 107
					content = node
108
				} else if (node instanceof groovy.util.Node && node.name() == "w" && node.text().contains(newSectionMarker)) {
108
				} else if (node instanceof groovy.util.Node && node.name() == "w") {
109 109
					content = node.text().trim()
110 110
					start = Float.parseFloat(node.@time)
111 111
				}
112 112
				
113
				if (content.equals(newSectionMarker)) {
113
				if (content != null && (content.equals(newSectionMarker) || content.startsWith(newSectionMarker) || content.endsWith(newSectionMarker))) {
114
					
114 115
					if (debug) println "New section at $turn with $node child node"
115 116
					previousSection = currentSection
116 117
					currentSection = new Node(trsEpisode, "Section", new LinkedHashMap(["type":newSectionMarker, "startTime":turn.@startTime, "endTime":previousSection.attributes()["endTime"]]))
......
128 129
						newTurn.attributes()["startTime"] = start
129 130
						turn.attributes()["endTime"] = start
130 131
						
132
						def syncNode = new Node(newTurn, "Sync", new LinkedHashMap())
133
						syncNode.attributes()["time"] = start
131 134
						nFound++
132 135
						
133 136
					}
134 137
					
135 138
					children.remove(i) // remove the mark
136 139
					i--
140
					if (content.startsWith(newSectionMarker)) { // remove the marker and keep the tail content
141
						node.value = node.text().substring(newSectionMarker.length())
142
						newTurn.children().add(node)
143
					} else if (content.endsWith(newSectionMarker)) { // remove the marker and keep the head content
144
						node.value = node.text().substring(0, node.text().length() - newSectionMarker.length())
145
						newTurn.children().add(node)
146
					}
137 147
					
148
					
138 149
				} else if (newTurn != null) {
139 150
					turn.children().remove(i)
140 151
					i--
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 3466)
82 82
							case "Speaker": // <Speaker ch="1" dur="531.38" gender="X" spkid="Enquêtrice" lang="fre" lconf="1.00" nw="1586" tconf="0.95"/> -> <Speaker id="spk1" name="enq4" check="no" dialect="native" accent="" scope="local"/>
83 83
							
84 84
								writer.writeStartElement("Speaker")
85
								writer.writeAttribute("id", parser.getAttributeValue(null, "spkid"))
86
								writer.writeAttribute("name", parser.getAttributeValue(null, "spkid"))
85
								writer.writeAttribute("id", parser.getAttributeValue(null, "spkid").trim())
86
								writer.writeAttribute("name", parser.getAttributeValue(null, "spkid").trim())
87 87
								writer.writeAttribute("check", "")
88 88
								writer.writeAttribute("dialect", parser.getAttributeValue(null, "lang"))
89 89
								writer.writeAttribute("accent", parser.getAttributeValue(null, "gender"))
......
94 94
								/**
95 95
								 * remove the additional speaker if already written
96 96
								 */
97
								if (additionalSpeakers.containsKey(parser.getAttributeValue(null, "spkid"))) {
98
									additionalSpeakers.remove(parser.getAttributeValue(null, "spkid"))
97
								if (additionalSpeakers.containsKey(parser.getAttributeValue(null, "spkid").trim())) {
98
									additionalSpeakers.remove(parser.getAttributeValue(null, "spkid").trim())
99 99
								}
100 100
								break;
101 101
								
......
119 119
								writer.writeStartElement("Turn")
120 120
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
121 121
									String name = parser.getAttributeLocalName(i)
122
									String value = parser.getAttributeValue(i)
122 123
									if (name == "stime") name = "startTime"
123 124
									else if (name == "etime") name = "endTime"
124
									else if (name == "spkid") name = "speaker"
125
									
126
									writer.writeAttribute(name, parser.getAttributeValue(i))
125
									else if (name == "spkid") {
126
										name = "speaker"
127
										value = value.trim()
128
									}
129
									writer.writeAttribute(name, value)
127 130
								}
128 131
							
129 132
								writer.writeCharacters("\n")

Formats disponibles : Unified diff