Revision 2288

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 2288)
80 80
		// get all anatypes
81 81
		sattrsListener = SAttributesListener.scanFiles(inputDirectory, wtag)
82 82
		def texts = module.getProject().getTexts()
83
		println "-- Listing structures&properties to create for "+texts.size()+" texts..."
83
		println "-- Scanning structures&properties to create for "+texts.size()+" texts..."
84 84
		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
85 85
		for (Text t : texts) {
86 86
			try {
......
130 130
		//				anatypes << type
131 131
	}
132 132

  
133
	def cqpFiles = []
133
	def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
134 134
	int cqpFilesUpdated = 0;
135 135
	public boolean doCQPStep() {
136
		println "-- Building CQP files $inputDirectory..."
136
		
137 137
		cqpDirectory.mkdir(); // if not created
138 138

  
139
		def texts = module.getProject().getTexts()
140
		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
141
		cqpFilesUpdated = 0;
142
		for (Text text : texts) {
143
			cpb.tick();
144

  
139
		def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
140
		def textsToProcess = texts.findAll() { text ->
145 141
			File xmlFile = text.getXMLTXMFile()
146 142
			String textname = text.getName()
147 143

  
148 144
			File cqpFile = new File(cqpDirectory, textname + ".cqp")
149
			cqpFiles << cqpFile
150

  
145
			cqpFiles << cqpFile // insert cqp files to concat later
151 146
			// skip step if cqpFile exists AND is more recent than the XML-TXM File
152 147
			boolean mustBuild = false;
153 148
			if (!cqpFile.exists() || xmlFile.lastModified() >= cqpFile.lastModified()) {
154
				mustBuild = true
149
				return true
155 150
			}
156 151

  
157 152
			if (!text.isDirty() && !mustBuild) {
158 153
				Log.finer("skipping .cqp step of $text");
159
				continue
154
				return false
160 155
			}
161 156
			
157
			return true
158
		}
159
		println "-- Building CQP files ${textsToProcess.size()}/${texts.size()}..."
160
		
161
		ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
162
		cqpFilesUpdated = 0;
163
		for (Text text : textsToProcess) {
164
			cpb.tick();
165

  
166
			File xmlFile = text.getXMLTXMFile()
167
			String textname = text.getName()
168

  
169
			File cqpFile = new File(cqpDirectory, textname + ".cqp")
170
			
162 171
			cqpFilesUpdated++
163 172

  
164 173
			XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", anatypes, wtag)
......
180 189

  
181 190
		List<String> pargs = []
182 191
		pargs.add("id")
183
		for (String ana : anatypes)
192
		for (String ana : anatypes) {
184 193
			pargs.add(ana)
185

  
194
		}
195
		
186 196
		String[] pAttrs = pargs
187 197

  
188 198
		def structs = sattrsListener.getStructs()
......
203 213
			}
204 214

  
205 215
			String concat = name+":"+structsProf.get(name); // append the depth
206
			for (String attributeName : structs.get(name)) // append the attributes
216
			for (String attributeName : structs.get(name)) { // append the attributes
207 217
				concat += "+"+attributeName.toLowerCase();
208

  
218
			}
219
			
209 220
			if (structs.get(name).size() == 0) {
210 221
				concat += "+n";
211 222
			} else {
212
				if (!structs.get(name).contains("n"))
223
				if (!structs.get(name).contains("n")) {
213 224
					concat += "+n"
225
				}
214 226
			}
215 227

  
216 228
			if ((name == "p" || name == "body" || name == "back" || name == "front")
217
			&& !concat.contains("+n+") && !concat.endsWith("+n"))
229
				&& !concat.contains("+n+") && !concat.endsWith("+n")) {
218 230
				concat += "+n"
219

  
231
			}
220 232
			sargs.add(concat)
221 233
		}
222 234

  
223 235
		String textSAttributes = "text:0+id+base+project";
224 236
		for (String name : tmpTextAttrs) {
225
			if (!("id".equals(name) || "base".equals(name) || "project".equals(name)))
237
			if (!("id".equals(name) || "base".equals(name) || "project".equals(name))) {
226 238
				textSAttributes += "+"+name.toLowerCase()
239
			}
227 240
		}
228 241

  
229 242
		sargs.add(textSAttributes)
......
233 246

  
234 247
		String[] sAttributes = sargs
235 248
		String[] pAttributes = pAttrs
236
		println " Word properties: "+pAttributes
237
		println " Structures: "+sargs
249
		println " Word properties: "+pAttributes.join(', ')
250
		println " Structures: "+sargs.join(', ')
238 251
		File allcqpFile = new File(cqpDirectory, "all.cqp");
239 252
		allcqpFile.delete()
240 253
		try {
......
244 257
			}
245 258

  
246 259
			if (!cwbEn.run(outputDirectory.getAbsolutePath() + "/$corpusname",
247
			allcqpFile.getAbsolutePath(),
248
			regPath, pAttributes, sAttributes, false)) {
260
				allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) {
249 261
				println "** cwb-encode did not ends well. Activate finer logs to see details."
250 262
				return false;
251 263
			}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImport.groovy (revision 2288)
8 8

  
9 9
import org.eclipse.core.runtime.IProgressMonitor;
10 10
import org.txm.utils.xml.DomUtils;
11
import org.txm.core.preferences.TBXPreferences;
11 12
import org.txm.metadatas.Metadatas
12 13
import org.txm.utils.io.FileCopy;
13 14
import org.txm.*
......
43 44
			return super.getTXMFilesOrder();
44 45
		}
45 46
		Metadatas metadata = importer.getMetadata();
47
		if (metadata == null) { // if metadata was not built, try building it
48
			File allMetadataFile = Metadatas.findMetadataFile(binaryDirectory);
49
			if (allMetadataFile.exists()) {
50
				metadata = new Metadatas(allMetadataFile,
51
						Toolbox.getPreference(TBXPreferences.METADATA_ENCODING),
52
						Toolbox.getPreference(TBXPreferences.METADATA_COLSEPARATOR),
53
						Toolbox.getPreference(TBXPreferences.METADATA_TXTSEPARATOR), 1)
54
			}
55
		}
56
		
46 57
		if (metadata == null) {
47 58
			println "no metadata, using default text order"
48 59
			return super.getTXMFilesOrder();
49 60
		}
61
		
50 62
		File txmDirectory = new File(binaryDirectory, "txm/"+corpusName);
51 63
		ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() {
52 64
			@Override
......
61 73
			def ti = metadata.get(t)
62 74
			for (org.txm.metadatas.Entry e : ti) {
63 75
				if ("textorder".equals(e.getId())) {
64
					textorder[t] = ti.get("textorder")
76
					textorder[t] = ti.value()
65 77
				}
66 78
			}
67 79
		}
......
86 98
			}
87 99
		});
88 100
		//println files
89
		return files;
101
		return texts;
90 102
	}
91 103
	
92 104
	public void start() throws InterruptedException {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZPager.groovy (revision 2288)
74 74
		}
75 75

  
76 76
		def second = 0
77
		def texts = module.getProject().getTexts()
78
		println "-- Building 'default' edition of  ${texts.size()} texts..."
77
		def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
78
		def textsToProcess = texts.findAll() { text ->
79
			File txmFile = text.getXMLTXMFile()
80
			File firstHTMLPageFile = new File(outputDirectory, text.getName()+"_1.html");
81
			boolean mustBuild = false;
82
			if (!firstHTMLPageFile.exists() || txmFile.lastModified() >= firstHTMLPageFile.lastModified()) {
83
				return true
84
			}
85
				
86
			if (!text.isDirty() && !mustBuild) {
87
				Log.finer("skipping 'default html' step of $text");
88
				return false
89
			}
90
			
91
			return true
92
		}
93
		println "-- Building 'default' edition of ${textsToProcess.size()}/${texts.size()} texts..."
79 94

  
80 95
		def css = ["css/txm.css", "css/${corpusname}.css"] // default CSS inclusion
81 96

  
......
90 105
			}
91 106
		}
92 107

  
93
		ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size())
94
		for (Text text : texts) {
108
		ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
109
		for (Text text : textsToProcess) {
95 110

  
96 111
			File txmFile = text.getXMLTXMFile()
97 112
			try {
......
99 114

  
100 115
				String textname = text.getName()
101 116

  
102
				File firstHTMLPageFile = new File(outputDirectory, textname+"_1.html");
103
				boolean mustBuild = false;
104
				if (!firstHTMLPageFile.exists() || txmFile.lastModified() >= firstHTMLPageFile.lastModified()) {
105
					mustBuild = true
106
				}
107
				
108
				if (!text.isDirty() && !mustBuild) {
109
					Log.finer("skipping 'default html' step of $text");
110
					continue
111
				}
112

  
113 117
				Edition edition = text.getEdition("default")
114 118
				if (edition != null) {
115 119
					edition.delete()
......
161 165
		}
162 166

  
163 167
		// save changes
164
		println ""
165 168
		return true;
166 169
	}
167 170

  
......
183 186

  
184 187
		def second = 0
185 188

  
186
		def texts = module.getProject().getTexts()
189
		def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
187 190
		println "-- Building 'facs' edition of ${texts.size()} texts..."
188 191
		File newEditionDirectory = new File(htmlDirectory, "facs");
189 192
		newEditionDirectory.mkdir();
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompilerStep.groovy (revision 2288)
65 65
			inputData = xmlFile.toURI().toURL().openStream();
66 66
			factory = XMLInputFactory.newInstance();
67 67
			parser = factory.createXMLStreamReader(inputData);
68

  
69 68
		} catch (Exception ex) {
70 69
			System.err.println("Exception while parsing $xmlFile : "+ex);
71 70
		}
......
97 96
	 */
98 97
	public boolean process()
99 98
	{
100
		if (!createOutput(cqpFile))
99
		if (!createOutput(cqpFile)) {
101 100
			return false;
102

  
101
		}
102
		
103 103
		String headvalue=""
104 104
		String vAna = "";
105 105
		String vForm = "";
106 106
		String wordid= "";
107 107
		String vHead = "";
108 108

  
109

  
110 109
		int p_id = 0;
111 110
		int s_id = 0;
112 111

  

Also available in: Unified diff