Revision 2288 tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy

XTZCompiler.groovy (revision 2288)
80 80
		// get all anatypes
81 81
		sattrsListener = SAttributesListener.scanFiles(inputDirectory, wtag)
82 82
		def texts = module.getProject().getTexts()
83
		println "-- Listing structures&properties to create for "+texts.size()+" texts..."
83
		println "-- Scanning structures&properties to create for "+texts.size()+" texts..."
84 84
		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
85 85
		for (Text t : texts) {
86 86
			try {
......
130 130
		//				anatypes << type
131 131
	}
132 132

  
133
	def cqpFiles = []
133
	def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
134 134
	int cqpFilesUpdated = 0;
135 135
	public boolean doCQPStep() {
136
		println "-- Building CQP files $inputDirectory..."
136
		
137 137
		cqpDirectory.mkdir(); // if not created
138 138

  
139
		def texts = module.getProject().getTexts()
140
		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
141
		cqpFilesUpdated = 0;
142
		for (Text text : texts) {
143
			cpb.tick();
144

  
139
		def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
140
		def textsToProcess = texts.findAll() { text ->
145 141
			File xmlFile = text.getXMLTXMFile()
146 142
			String textname = text.getName()
147 143

  
148 144
			File cqpFile = new File(cqpDirectory, textname + ".cqp")
149
			cqpFiles << cqpFile
150

  
145
			cqpFiles << cqpFile // insert cqp files to concat later
151 146
			// skip step if cqpFile exists AND is more recent than the XML-TXM File
152 147
			boolean mustBuild = false;
153 148
			if (!cqpFile.exists() || xmlFile.lastModified() >= cqpFile.lastModified()) {
154
				mustBuild = true
149
				return true
155 150
			}
156 151

  
157 152
			if (!text.isDirty() && !mustBuild) {
158 153
				Log.finer("skipping .cqp step of $text");
159
				continue
154
				return false
160 155
			}
161 156
			
157
			return true
158
		}
159
		println "-- Building CQP files ${textsToProcess.size()}/${texts.size()}..."
160
		
161
		ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
162
		cqpFilesUpdated = 0;
163
		for (Text text : textsToProcess) {
164
			cpb.tick();
165

  
166
			File xmlFile = text.getXMLTXMFile()
167
			String textname = text.getName()
168

  
169
			File cqpFile = new File(cqpDirectory, textname + ".cqp")
170
			
162 171
			cqpFilesUpdated++
163 172

  
164 173
			XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", anatypes, wtag)
......
180 189

  
181 190
		List<String> pargs = []
182 191
		pargs.add("id")
183
		for (String ana : anatypes)
192
		for (String ana : anatypes) {
184 193
			pargs.add(ana)
185

  
194
		}
195
		
186 196
		String[] pAttrs = pargs
187 197

  
188 198
		def structs = sattrsListener.getStructs()
......
203 213
			}
204 214

  
205 215
			String concat = name+":"+structsProf.get(name); // append the depth
206
			for (String attributeName : structs.get(name)) // append the attributes
216
			for (String attributeName : structs.get(name)) { // append the attributes
207 217
				concat += "+"+attributeName.toLowerCase();
208

  
218
			}
219
			
209 220
			if (structs.get(name).size() == 0) {
210 221
				concat += "+n";
211 222
			} else {
212
				if (!structs.get(name).contains("n"))
223
				if (!structs.get(name).contains("n")) {
213 224
					concat += "+n"
225
				}
214 226
			}
215 227

  
216 228
			if ((name == "p" || name == "body" || name == "back" || name == "front")
217
			&& !concat.contains("+n+") && !concat.endsWith("+n"))
229
				&& !concat.contains("+n+") && !concat.endsWith("+n")) {
218 230
				concat += "+n"
219

  
231
			}
220 232
			sargs.add(concat)
221 233
		}
222 234

  
223 235
		String textSAttributes = "text:0+id+base+project";
224 236
		for (String name : tmpTextAttrs) {
225
			if (!("id".equals(name) || "base".equals(name) || "project".equals(name)))
237
			if (!("id".equals(name) || "base".equals(name) || "project".equals(name))) {
226 238
				textSAttributes += "+"+name.toLowerCase()
239
			}
227 240
		}
228 241

  
229 242
		sargs.add(textSAttributes)
......
233 246

  
234 247
		String[] sAttributes = sargs
235 248
		String[] pAttributes = pAttrs
236
		println " Word properties: "+pAttributes
237
		println " Structures: "+sargs
249
		println " Word properties: "+pAttributes.join(', ')
250
		println " Structures: "+sargs.join(', ')
238 251
		File allcqpFile = new File(cqpDirectory, "all.cqp");
239 252
		allcqpFile.delete()
240 253
		try {
......
244 257
			}
245 258

  
246 259
			if (!cwbEn.run(outputDirectory.getAbsolutePath() + "/$corpusname",
247
			allcqpFile.getAbsolutePath(),
248
			regPath, pAttributes, sAttributes, false)) {
260
				allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) {
249 261
				println "** cwb-encode did not ends well. Activate finer logs to see details."
250 262
				return false;
251 263
			}

Also available in: Unified diff