Révision 3463

TXM/trunk/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/CwbProcess.java (revision 3463)
164 164
		return true;
165 165
	}
166 166
	
167
	public static String getOutputMessages(Process process) throws InterruptedException {
168
		final InputStream is = process.getInputStream();
169
		StringBuffer buffer = new StringBuffer();
170
		Thread t = new Thread() {
171
			@Override
172
			public void run() {
173
				try {
174
					InputStreamReader isr = new InputStreamReader(is);
175
					BufferedReader br = new BufferedReader(isr);
176
					String line;
177
					while ((line = br.readLine()) != null) {
178
						buffer.append(line+"\n");
179
					}
180
					isr.close();
181
				} catch (IOException e) {
182
					e.printStackTrace();
183
				}
184
			}
185
		};
186
		t.start();
187
		t.join();
188
		
189
		return buffer.toString();
190
		
191
	}
192
	
167 193
	public void endProcess() throws InterruptedException {
168 194
		this.process.waitFor();
169 195

  
TXM/trunk/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/CompressCQPIndexes.java (revision 3463)
1
// Copyright © 2010-2022 ENS de Lyon., University of Franche-Comté
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-05-24 10:43:03 +0200 (Tue, 24 May 2016) $
25
// $LastChangedRevision: 3216 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.cwb;
29

  
30
import java.io.BufferedReader;
31
import java.io.File;
32
import java.io.FileInputStream;
33
import java.io.FileOutputStream;
34
import java.io.IOException;
35
import java.io.InputStreamReader;
36
import java.io.OutputStreamWriter;
37
import java.util.ArrayList;
38
import java.util.Arrays;
39

  
40
import org.txm.core.messages.TXMCoreMessages;
41
import org.txm.searchengine.cqp.core.messages.CQPSearchEngineCoreMessages;
42
import org.txm.utils.OSDetector;
43
import org.txm.utils.io.IOUtils;
44
import org.txm.utils.logger.Log;
45

  
46
/**
47
 * The Class CompressCQPIndexes.
48
 */
49
public class CompressCQPIndexes {
50
	
51
	
52
	public static boolean compressAll(File cqpToolsDirectory, File registryfile, String corpusid, File dataDirectory, boolean txm081fix)
53
			throws IOException, InterruptedException {
54
		//Runtime.getRuntime().
55
		
56
		if (!registryfile.exists()) {
57
			System.out.println("No registry file found: "+registryfile);
58
			return false;
59
		}
60
		
61
		if (!dataDirectory.exists()) {
62
			System.out.println("No data directory found: "+dataDirectory);
63
			return false;
64
		}
65
		
66
		File huff = new File(cqpToolsDirectory, "cwb-huffcode");
67
		if (OSDetector.isFamilyWindows()) {
68
			huff = new File(cqpToolsDirectory, "cwb-huffcode.exe");
69
		}
70
		
71
		File rdxcompressor = new File(cqpToolsDirectory, "cwb-compress-rdx");
72
		if (OSDetector.isFamilyWindows()) {
73
			rdxcompressor = new File(cqpToolsDirectory, "cwb-compress-rdx.exe");
74
		}
75
		
76
		if (!huff.exists()) {
77
			System.out.println("No huff executable found: "+huff);
78
			return false;
79
		}
80
		
81
		if (!rdxcompressor.exists()) {
82
			System.out.println("No rdxcompressor executable found: "+rdxcompressor);
83
			return false;
84
		}
85
		
86
		ArrayList<String> args = new ArrayList<>(Arrays.asList(huff.getAbsolutePath(), "-T", "-r", registryfile.getParent()));
87
		
88
//		ReadRegistryFile rrf = new ReadRegistryFile(registryfile);
89
//		rrf.read();
90
//		for (String p : rrf.pAttributes) {
91
//			args.add("-P");
92
//			args.add(p);
93
//		}
94
		args.add("-A");
95
		args.add(corpusid);
96
		
97
		ProcessBuilder processBuilder = new ProcessBuilder(args);
98
		processBuilder.redirectErrorStream(true);
99
		Process process = processBuilder.start();
100
		String messages = CwbProcess.getOutputMessages(process);
101
		System.out.println(messages);
102
		if (process.exitValue() != 0) {
103
			System.out.println("Error while compressing with huff");
104
			return false;
105
		}
106
		
107
		ArrayList<String> args2 = new ArrayList<>(Arrays.asList(rdxcompressor.getAbsolutePath(), "-T", "-r", registryfile.getParent()));
108
		
109
//		for (String p : rrf.pAttributes) {
110
//			File f = new File(dataDirectory, p+".corpus");
111
//			if (f.length() > 0) {
112
//				args2.add("-P");
113
//				args2.add(p);
114
//			}
115
//		}
116
		args2.add("-A");
117
		args2.add(corpusid);
118
		
119
		processBuilder = new ProcessBuilder(args2);
120
		processBuilder.redirectErrorStream(true);
121
		process = processBuilder.start();
122
		messages = CwbProcess.getOutputMessages(process);
123
		System.out.println(messages);
124
		if (process.exitValue() != 0) {
125
			System.out.println("Error while compressing rdx files");
126
			return false;
127
		}
128
		
129
		// remove .corpus .corpus.rdx and corpus.rev files
130
		int s = 0;
131
		int a = 0;
132
		for (File f : dataDirectory.listFiles()) {
133
			if (f.getName().endsWith(".corpus") || f.getName().endsWith(".corpus.rdx") ||f.getName().endsWith(".corpus.rev")) {
134
				s += f.length();
135
				f.delete();
136
				if (txm081fix) f.createNewFile();
137
			}
138
			if (f.getName().matches(".+(\\.hcd|\\.huf|\\.huf\\.syn|\\.crc|\\.crx)")) {
139
				a += f.length();
140
			}
141
		}
142
		
143
		System.out.println("cleared: "+s);
144
		System.out.println("created: "+a);
145
		System.out.println("diff="+(s-a));
146
		return true;
147
	}
148

  
149
	/**
150
	 * The main method.
151
	 *
152
	 * @param args the arguments
153
	 */
154
	public static void main(String[] args) {
155
		try {
156
			String userdir = System.getProperty("user.home");
157
			File tools = new File(userdir, "workspace-cpp/CWB-lib/src/builds/linux-64"); //$NON-NLS-1$
158
			File registry = new File(userdir, "runtime-rcpapplication.product/corpora/NOV13-P1/registry/nov13-p1"); //$NON-NLS-1$
159
			File data = new File(userdir, "runtime-rcpapplication.product/corpora/NOV13-P1/data/NOV13-P1"); //$NON-NLS-1$
160
			CompressCQPIndexes.compressAll(tools, registry, "NOV13-P1", data, true);
161

  
162
		} catch (Exception e) {
163
			e.printStackTrace();
164
		}
165
	}
166
}
0 167

  
TXM/trunk/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/ReadRegistryFile.java (revision 3463)
130 130
		ArrayList<String> errors = new ArrayList<>();
131 131
		
132 132
		// test p-attributes
133
		String[] exts = { ".corpus", ".lexicon", ".corpus.cnt", ".corpus.rdx", ".corpus.rev", ".lexicon.idx", ".lexicon.srt" };
133
		String[] exts = { ".lexicon", ".corpus.cnt", ".lexicon.idx", ".lexicon.srt" };
134 134
		for (String p : pAttributes) {
135 135
			for (String ext : exts) {
136 136
				File f = new File(dataDirectory, p + ext);
......
141 141
			}
142 142
		}
143 143
		
144
		// test p-attributes with optional compression files
145
		String[] extsCompressed = { ".corpus	.hcd	.huf	.huf.syn", ".corpus.rdx	.crc	.crx", ".corpus.rev	.crc	.crx"};
146
		for (String p : pAttributes) {
147
			for (String extsTabulated : extsCompressed) {
148
				String[] exts2 = extsTabulated.split("\t");
149
				String ext = exts2[0];
150
				File f = new File(dataDirectory, p + ext);
151
				if (!f.exists()) {
152
					// System.out.println("MISSING: " + f.exists() + " " + f.getAbsolutePath());
153
					for (int i = 1 ; i < exts2.length ; i++) {
154
						f = new File(dataDirectory, p + exts2[i]);
155
						if (!f.exists()) {
156
							errors.add(f.getName());
157
						}
158
					}
159
				}
160
			}
161
		}
162
		
144 163
		String[] sexts = { ".rng" };
145 164
		String[] spexts = { ".avs", ".avx", ".rng" };
146 165
		for (String s : sattrs.keySet()) {
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/CreateTheOtherTurns.groovy (revision 3463)
132 132
						writer.writeEndElement() // w
133 133
					}
134 134
					
135
					def startOtherReg = /^\*([^*])?([^\p{Zs}]+.*)$/
135
					def startOtherReg = /^([^*])?\*([^\p{Zs}]+.*)$/
136 136
					def endOtherReg = /^(.*[^\p{Zs}]+)\*([^*])?$/
137 137
					def startAndEndOtherReg = /^([^*])?\*(.*[^\p{Zs}]+)\*([^*])?$/
138 138
					String previousOtherStarting = "<none>"

Formats disponibles : Unified diff