Révision 3715

TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 3715)
13 13
import org.txm.importer.xtz.*
14 14

  
15 15
public class XTZDefaultPagerStep {
16
	
16

  
17 17
	List<String> NoSpaceBefore;
18
	
18

  
19 19
	/** The No space after. */
20 20
	List<String> NoSpaceAfter;
21
	
21

  
22 22
	/** The wordcount. */
23 23
	int wordcount = 0;
24
	
24

  
25 25
	/** The pagecount. */
26 26
	int pagecount = 0;
27
	
27

  
28 28
	/** The wordmax. */
29 29
	int wordmax = 0;
30
	
30

  
31 31
	/** The basename. */
32 32
	String basename = "";
33 33
	String txtname = "";
34 34
	File outdir;
35
	
35

  
36 36
	/** The wordid. */
37 37
	String wordid;
38
	
38

  
39 39
	/** The first word. */
40 40
	boolean firstWord = true;
41
	
41

  
42 42
	boolean enableCollapsibles = false;
43
	
43

  
44 44
	/** The wordvalue. */
45 45
	String wordvalue = "";
46
	
46

  
47 47
	/** The interpvalue. */
48 48
	String interpvalue = "";
49
	
49

  
50 50
	/** The lastword. */
51 51
	String lastword = " ";
52
	
52

  
53 53
	/** The wordtype. */
54 54
	String wordtype;
55
	
55

  
56 56
	/** The flagform. */
57 57
	boolean flagform = false;
58
	
58

  
59 59
	/** The flaginterp. */
60 60
	boolean flaginterp = false;
61
	
61

  
62 62
	/** The url. */
63 63
	private def url;
64
	
64

  
65 65
	/** The input data. */
66 66
	private def inputData;
67
	
67

  
68 68
	/** The factory. */
69 69
	private def factory;
70
	
70

  
71 71
	/** The parser. */
72 72
	private XMLStreamReader parser;
73
	
73

  
74 74
	/** The writer. */
75 75
	OutputStreamWriter writer;
76
	
76

  
77 77
	/** The pagedWriter. */
78 78
	StaxStackWriter pagedWriter = null;
79
	
79

  
80 80
	/** The infile. */
81 81
	File infile;
82
	
82

  
83 83
	/** The outfile. */
84 84
	File outfile;
85
	
85

  
86 86
	/** The pages. */
87 87
	//TODO enhance this to store the page name/id as well
88 88
	ArrayList<File> pages = new ArrayList<File>();
89
	
89

  
90 90
	/** The idxstart. */
91 91
	ArrayList<String> idxstart = new ArrayList<String>();
92 92
	String paginationElement;
......
96 96
	def noteElements = new HashSet<String>();
97 97
	def outOfTextElements = new HashSet<String>();
98 98
	XTZPager pager;
99
	
99

  
100 100
	/**
101 101
	 * Instantiates a new pager.
102 102
	 *
......
123 123
		this.wordTag= pager.wordTag;
124 124
		outdir.mkdirs()
125 125
		this.enableCollapsibles = pager.getImportModule().getProject().getEditionDefinition("default").getEnableCollapsibleMetadata();
126
		
126

  
127 127
		inputData = new BufferedInputStream(url.openStream());
128 128
		factory = XMLInputFactory.newInstance();
129 129
		parser = factory.createXMLStreamReader(inputData);
130
		
130

  
131 131
		String notesListString = pager.getImportModule().getProject().getTextualPlan("Note")
132 132
		if (notesListString != null) for (def s : notesListString.split(",")) noteElements << s;
133
		
133

  
134 134
		String elems = pager.getImportModule().getProject().getTextualPlan("OutSideTextTagsAndKeepContent")
135 135
		if (elems != null) for (def s : elems.split(",")) outOfTextElements << s;
136
		
136

  
137 137
		//process();
138 138
	}
139
	
139

  
140 140
	public String getAttributeValue(def parser, String ns, String name) {
141 141
		for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
142 142
			if (name == parser.getAttributeLocalName(i)) {
......
145 145
		}
146 146
		return null;
147 147
	}
148
	
148

  
149 149
	private def closeMultiWriter() {
150 150
		if (pagedWriter != null) {
151 151
			def tags = []
152 152
			tags.addAll(pagedWriter.getTagStack())
153
			
153

  
154 154
			//println "CLOSING... STACK="+pagedWriter.getTagStack()
155 155
			// def stack = Thread.currentThread().getStackTrace();
156 156
			// int m = Math.min(15, stack.size()-1)
157 157
			// for (def s : stack[1..m]) println s
158 158
			// println "FILE ="+outfile
159 159
			if (firstWord) { // there was no words
160
				pagedWriter.writeCharacters("");
160
				pagedWriter.writeCharacters("")
161 161
				this.idxstart.add("w_0")
162
				pagedWriter.write("<span id=\"w_0\"/>");
162
				pagedWriter.write("<span id=\"w_0\"/>")
163 163
			}
164
			
165
			// write notes before closing all tags 
164

  
165
			// write notes before closing all tags
166 166
			if (notes.size() > 0) {
167
				pagedWriter.writeStartElement("hr", ["id":"notes", "width":"20%", "align":"left"]);
167
				pagedWriter.writeStartElement("hr", ["id":"notes", "width":"20%", "align":"left"])
168 168
				pagedWriter.writeEndElement() // </hr>
169 169
				//pagedWriter.writeStartElement("ol");
170 170
				int i = 1;
171 171
				for (String note : notes) {
172 172
					//pagedWriter.writeStartElement("li");
173
					pagedWriter.writeStartElement("a", ["href":"#noteref_"+i, "name":"note_"+i]);
173
					pagedWriter.writeStartElement("a", ["href":"#noteref_"+i, "name":"note_"+i])
174 174
					pagedWriter.writeStartElement("sup")
175 175
					pagedWriter.writeCharacters(""+i)
176 176
					pagedWriter.writeEndElement() // </sub>
......
181 181
				}
182 182
				notes.clear()
183 183
			}
184
			
184

  
185 185
			pagedWriter.writeEndElements();
186
			
186

  
187 187
			pagedWriter.close();
188
			
188

  
189 189
			//println "STACK TO REWRITE: $tags"
190 190
			int removedDiv = 0;
191 191
			for (int i = 0 ; i < tags.size() ; i++) {
......
203 203
			return [];
204 204
		}
205 205
	}
206
	
206

  
207 207
	/**
208 208
	 * Creates the next output.
209 209
	 *
......
213 213
		wordcount = 0;
214 214
		try {
215 215
			def tags = closeMultiWriter()
216
			
216

  
217 217
			outfile = new File(outdir, txtname+"_"+(++pagecount)+".html")
218 218
			pages.add(outfile)
219 219
			firstWord = true; // waiting for next word
220
			
220

  
221 221
			pagedWriter = new StaxStackWriter(outfile, "UTF-8")
222
			
222

  
223 223
			//pagedWriter.writeStartDocument()
224 224
			pagedWriter.writeDTD("<!DOCTYPE html>")
225 225
			pagedWriter.writeCharacters("\n")
226 226
			pagedWriter.writeStartElement("html")
227
			
227

  
228 228
			pagedWriter.writeCharacters("\n\t")
229 229
			pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"])
230
			
231
			
230

  
231

  
232 232
			for (String css : cssList) {
233 233
				pagedWriter.writeCharacters("\t\n")
234 234
				pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"$css"])
235
				
236 235
			}
236
			
237 237
			pagedWriter.writeCharacters("\t\n")
238 238
			pagedWriter.writeStartElement("head")
239 239
			pagedWriter.writeCharacters("\t\t\n")
......
252 252
			pagedWriter.writeCharacters("\t\t\n")
253 253
			pagedWriter.writeStartElement("div", ["class": "txmeditionpage"]) //<div>
254 254
			pagedWriter.writeCharacters("\n")
255
			
256
//			println "NEW HTML: "+outfile
257
//			println "TAGS: "+tags
255

  
256
			//			println "NEW HTML: "+outfile
257
			//			println "TAGS: "+tags
258 258
			pagedWriter.writeStartElements(tags)
259 259
			return true;
260 260
		} catch (Exception e) {
......
263 263
			return false;
264 264
		}
265 265
	}
266
	
266

  
267 267
	/**
268 268
	 * Creates the output.
269 269
	 *
......
278 278
			return false;
279 279
		}
280 280
	}
281
	
281

  
282 282
	/**
283 283
	 * Gets the page files.
284 284
	 *
......
287 287
	public ArrayList<File> getPageFiles() {
288 288
		return pages;
289 289
	}
290
	
290

  
291 291
	/**
292 292
	 * Gets the idx.
293 293
	 *
......
296 296
	public ArrayList<String> getIdx() {
297 297
		return idxstart;
298 298
	}
299
	
299

  
300 300
	/**
301 301
	 * Go to text.
302 302
	 */
303 303
	private void goToText() {
304
		
304 305
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
305
			if (event == XMLStreamConstants.END_ELEMENT)
306
				if (parser.getLocalName().matches("teiHeader"))
306
			if (event == XMLStreamConstants.END_ELEMENT) {
307
				if (parser.getLocalName().matches("teiHeader")) {
307 308
					return;
309
				}
310
			}
308 311
		}
309 312
	}
310
	
313

  
311 314
	def notes = []
312 315
	def currentOutOfTextElements = [] // stack of element with out of text to edit opened element
313 316
	def writeOutOfTextToEditText = false
......
315 318
	 * Process.
316 319
	 */
317 320
	public boolean process() {
318
		
321

  
319 322
		try {
320 323
			def anaValues = [:]
321 324
			def anaType = ""
322 325
			def anaResp = ""
323 326
			def anaValue = new StringBuilder()
324
			
327

  
325 328
			boolean flagNote = false
326 329
			boolean flagW = false
327
			
330

  
328 331
			boolean allTags = true
329 332
			boolean ignoreUnmanagedTags = true
330 333
			// unmanagedElementsPolicyCombo.setItems("ignore", "keep as is", "rename to span");
......
342 345
			String noteType = ""
343 346
			String rend = ""
344 347
			goToText();
345
			
348

  
346 349
			String localname = ""
347 350
			if (!createNextOutput()) {
348 351
				return false;
349 352
			}
350
			
353

  
351 354
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
352 355
				rend = "";
353 356
				switch (event) {
......
359 362
						} else if (currentOutOfTextElements.size() > 0) {
360 363
							currentOutOfTextElements << localname
361 364
						}
362
					
365

  
363 366
						if (localname == paginationElement) {
364 367
							if (paginate) {
365 368
								createNextOutput()
366 369
							}
367
							
370

  
368 371
							wordcount = 0;
369 372
							pagedWriter.write("\n")
370 373
							if (getAttributeValue(parser, null,"n") != null) {
371 374
								pagedWriter.writeElement("p", ["class":"txmeditionpb", "align":"center"], getAttributeValue(parser, null,"n"))
372 375
							}
373 376
						}
374
					
377

  
375 378
						rend = getAttributeValue(parser, null, "rend")
376
						//if (rend == null) rend = localname;
377
					
379
					//if (rend == null) rend = localname;
380

  
378 381
						switch (localname) {
379 382
							case "text":
380 383
								LinkedHashMap attributes = new LinkedHashMap();
381 384
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
382 385
									attributes[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i).toString()
383 386
								}
384
							
387

  
385 388
								pagedWriter.write("\n")
386 389
								pagedWriter.writeStartElement("p")
387 390
								pagedWriter.writeAttribute("class", rend)
388 391
								if (attributes.containsKey("id")) {
389 392
									pagedWriter.writeElement("h3", attributes["id"])
390 393
								}
391
							
392
							if (enableCollapsibles && parser.getAttributeCount() > 2) {
393
								pagedWriter.writeStartElement("button");
394
								pagedWriter.writeAttribute("class", "collapsible");
395
								pagedWriter.writeAttribute("onclick", "onCollapsibleClicked(this)");
396
								pagedWriter.writeCharacters("➕");
397
								pagedWriter.writeEndElement()
398
								pagedWriter.writeCharacters("\n")
399
							}
400
							pagedWriter.writeStartElement("table");
401
							if (enableCollapsibles && parser.getAttributeCount() > 2) {
402
								pagedWriter.writeAttribute("class", "collapsiblecontent")
403
								pagedWriter.writeAttribute("style", "display:none;")
404
							} else {
405
								pagedWriter.writeAttribute("class", "metadata");
406
							}
407
							
394

  
395
								if (enableCollapsibles && parser.getAttributeCount() > 2) {
396
									pagedWriter.writeStartElement("button");
397
									pagedWriter.writeAttribute("class", "collapsible");
398
									pagedWriter.writeAttribute("onclick", "onCollapsibleClicked(this)");
399
									pagedWriter.writeCharacters("➕");
400
									pagedWriter.writeEndElement()
401
									pagedWriter.writeCharacters("\n")
402
								}
403
								pagedWriter.writeStartElement("table");
404
								if (enableCollapsibles && parser.getAttributeCount() > 2) {
405
									pagedWriter.writeAttribute("class", "collapsiblecontent")
406
									pagedWriter.writeAttribute("style", "display:none;")
407
								} else {
408
									pagedWriter.writeAttribute("class", "metadata");
409
								}
410

  
408 411
								for (String k : attributes.keySet()) {
409 412
									if (k == "id") continue;
410 413
									if (k == "rend") continue;
411
									
414

  
412 415
									pagedWriter.writeStartElement("tr")
413 416
									pagedWriter.writeAttribute("class", "metadata-line")
414 417
									pagedWriter.writeElement("td", ["class": "metadata-cell"], k)
......
416 419
									pagedWriter.writeEndElement() //tr
417 420
								}
418 421
								pagedWriter.writeEndElement() // table
419
								
422

  
420 423
								pagedWriter.writeEndElement() // p
421 424
								pagedWriter.writeCharacters("\n")
422 425
								break;
......
425 428
								pagedWriter.writeAttribute("class", "sync")
426 429
								if (parser.getAttributeValue(null,"time") != null) {
427 430
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"time"))
428
									
431

  
429 432
									writeMediaAccess(parser.getAttributeValue(null,"time"), corpus, txtname)
430 433
								}
431 434
								break;
......
438 441
								break;
439 442
							case "sp":
440 443
								pagedWriter.writeStartElement("p", ["class":"turn", "type":localname])
441
							
444

  
442 445
								if (parser.getAttributeValue(null,"speaker") != null) {
443 446
									pagedWriter.writeStartElement("span")
444 447
									pagedWriter.writeAttribute("class", "spk")
445 448
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"speaker")+": ")
446 449
									pagedWriter.writeEndElement() // span@class=spk
447 450
								}
448
							
451

  
449 452
								break;
450 453
							case "cb":
451 454
								pagedWriter.write("\n")
......
545 548
								if (paginate && wordcount >= wordmax) {
546 549
									createNextOutput();
547 550
								}
548
							
551

  
549 552
								if (firstWord) {
550 553
									firstWord = false;
551 554
									this.idxstart.add(wordid);
......
573 576
						break;
574 577
					case XMLStreamConstants.END_ELEMENT:
575 578
						localname = parser.getLocalName();
576
						
579

  
577 580
						if (currentOutOfTextElements.size() > 0) currentOutOfTextElements.pop()
578
						
581

  
579 582
						writeOutOfTextToEditText = currentOutOfTextElements.size() > 0
580
					
583

  
581 584
						if (localname == paginationElement) {
582 585
							break; // element already processed in the START_ELEMENT event
583 586
						}
584
					
587

  
585 588
						switch (localname) {
586 589
							case "text":
587 590
								break;
......
600 603
								break;
601 604
							case "lb":
602 605
								break;
603
							
606

  
604 607
							case "body":
605 608
							case "div":
606 609
							case "div1":
......
659 662
								if (l > 0) {
660 663
									endOfLastWord = lastword.subSequence(l-1, l)
661 664
								}
662
							
665

  
663 666
								String interpvalue = null;
664 667
								def tooltipProperties = pager.project.getEditionDefinition("default").get(TBXPreferences.EDITION_DEFINITION_TOOLTIP_PROPERTIES, "*");
665 668
								if (tooltipProperties.equals("*")) {
......
671 674
									interpvalue += "- "+wordid
672 675
								}
673 676
								
674
								if (NoSpaceBefore.contains(wordvalue) ||
675
										NoSpaceAfter.contains(lastword) ||
676
										wordvalue.startsWith("-") ||
677
										NoSpaceAfter.contains(endOfLastWord)) {
678
									pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid])
677
								if (!flagNote) { // don't write words of the note elements
678
									if (NoSpaceBefore.contains(wordvalue) ||
679
											NoSpaceAfter.contains(lastword) ||
680
											wordvalue.startsWith("-") ||
681
											NoSpaceAfter.contains(endOfLastWord)) {
682
										pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid])
683
									} else {
684
										pagedWriter.writeCharacters("\n")
685
										pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid])
686
									}
687

  
688
									pagedWriter.writeCharacters(wordvalue)
689
									pagedWriter.writeEndElement()
679 690
								} else {
680
									pagedWriter.writeCharacters("\n")
681
									pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid])
691
									
682 692
								}
683
							
684
								pagedWriter.writeCharacters(wordvalue)
685
								pagedWriter.writeEndElement()
686 693
							//pagedWriter.writeComment("\n")
687 694
								lastword=wordvalue;
688 695
								wordvalue="" // reset
......
744 751
		}
745 752
		return true;
746 753
	}
747
	
754

  
748 755
	private void writeMediaAccess(def time) {
749
		
756

  
750 757
		pagedWriter.writeCharacters(" ");
751 758
		pagedWriter.writeStartElement("a");
752 759
		pagedWriter.writeAttribute("onclick", "txmcommand('id', 'org.txm.backtomedia.commands.function.BackToMedia', 'corpus', '"+basename+"', 'text', '"+txtname+"', 'time', '"+time+"')");

Formats disponibles : Unified diff