Révision 3147

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/fleurs/fleursLoader.groovy (revision 3147)
126 126
		print(srcfile.getName());
127 127
		second++
128 128
	
129
	def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,500,basename);
129
	def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter, p.getEditionDefinition("default").wordsPerPage,basename);
130 130
	
131 131
	Edition editionweb = text.addEdition("default","html",resultfile);
132 132
	for(int i = 0 ; i < ed.getPageFiles().size();i++)
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivamailLoader.groovy (revision 3147)
147 147
		t.setName(txtname);
148 148
		t.setSourceFile(txmFile)
149 149
		t.setTXMFile(txmFile)	
150
	def ed = new pager_old(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, 500, basename, "pb");
150
	def ed = new pager_old(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, 1000, basename, "pb");
151 151
	Edition edition = new Edition(t);
152 152
		edition.setName("default");
153 153
		edition.setIndex(outdir.getAbsolutePath());
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 3147)
143 143
				return parser.getAttributeValue(i).toString()
144 144
			}
145 145
		}
146
		return "";
146
		return null;
147 147
	}
148 148
	
149 149
	private def closeMultiWriter() {
150 150
		if (pagedWriter != null) {
151
			def tags = pagedWriter.getTagStack().clone();
151
			def tags = pagedWriter.getTagStack()
152
			
152 153
			//			println "STACK="+pagedWriter.getTagStack()
153 154
			//			def stack = Thread.currentThread().getStackTrace();
154 155
			//			int m = Math.min(15, stack.size()-1)
......
158 159
				pagedWriter.writeCharacters("");
159 160
				this.idxstart.add("${wordTag}_0")
160 161
				pagedWriter.write("<span id=\"${wordTag}_0\"/>");
161
				//				}
162 162
			}
163 163
			pagedWriter.writeEndElements();
164 164
			// write notes
......
184 184
			pagedWriter.close();
185 185
			
186 186
			//			println "STACK TO REWRITE: $tags"
187
			int removedDiv = 0;
187 188
			for (int i = 0 ; i < tags.size() ; i++) {
188
				String tag = tags.remove(0)
189
				def tag = tags.remove(0)
189 190
				i--
190
				//				println "	tag=$tag"
191
				if (tag == "div") {
192
					break; // remove elements until first "div" tag
191

  
192
				if (tag[0] == "div" ) {
193
					removedDiv++
194
					if (removedDiv == 2) break; // remove elements until first "div" tag
193 195
				}
194 196
			}
195 197
			//			println "STACK TO REWRITE2: $tags"
196
			
197 198
			return tags;
198 199
		} else {
199 200
			return [];
......
236 237
			pagedWriter.writeStartElement("body") //<body>
237 238
			pagedWriter.writeStartElement("div", ["class": pager.getImportModule().getProject().getName()]) //<div> of the corpus
238 239
			pagedWriter.writeStartElement("div", ["class": "txmeditionpage"]) //<div>
239
			//			println "OPENING: $tags"
240
			
240 241
			pagedWriter.writeStartElements(tags)
241 242
			return true;
242 243
		} catch (Exception e) {
......
342 343
						}
343 344
					
344 345
						rend = getAttributeValue(parser, null, "rend")
345
						if (rend == null) rend = "none";
346
						//if (rend == null) rend = localname;
346 347
					
347 348
						switch (localname) {
348 349
							case "text":
......
365 366
									
366 367
									pagedWriter.writeStartElement("div", ["class":"collapsiblecontent"])
367 368
								}
368
								
369
							
369 370
								pagedWriter.writeStartElement("table", ["class":"metadata"]);
370
								
371
							
371 372
								for (String k : attributes.keySet()) {
372 373
									if (k == "id") continue;
373 374
									if (k == "rend") continue;
......
385 386
								pagedWriter.writeEndElement() // p
386 387
								pagedWriter.writeCharacters("\n")
387 388
								break;
388
							case "ref":
389
								pagedWriter.writeStartElement("a")
390
								pagedWriter.writeAttribute("href", getAttributeValue(parser, null, "target"))
391
								pagedWriter.writeAttribute("target", "_blank")
392
								pagedWriter.writeAttribute("class", rend)
389
							case "u":
390
								pagedWriter.writeStartElement("span")
391
								pagedWriter.writeAttribute("class", "sync")
392
								if (parser.getAttributeValue(null,"time") != null) {
393
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"time"))
394
								}
393 395
								break;
396
							case "p":
397
							//case "lg":
398
								pagedWriter.write("\n")
399
								pagedWriter.writeStartElement("p", ["class":rend])
400
								break;
401
							case "sp":
402
								pagedWriter.writeStartElement("p", ["class":"turn"])
403
							
404
								if (parser.getAttributeValue(null,"speaker") != null) {
405
									pagedWriter.writeStartElement("span")
406
									pagedWriter.writeAttribute("class", "spk")
407
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"speaker")+": ")
408
									pagedWriter.writeEndElement() // span@class=spk
409
								}
410
							
411
								break;
412
							case "cb":
413
								pagedWriter.write("\n")
414
								pagedWriter.writeElement("span", ["class":"txmeditioncb", "align":"center"], getAttributeValue(parser, null,"n")) // element ignored in the END_ELEMENT event
415
								break;
416
							case "lb":
417
							//case "l":
418
								pagedWriter.writeEmptyElement("br", ["class":rend])
419
								break;
420
							case "div":
421
							case "div1":
422
							case "div2":
423
							case "div3":
424
							case "div4":
425
							case "div5":
426
								pagedWriter.writeStartElement("div", ["class":rend, "type":localname])
427
								pagedWriter.write("\n");
428
								break;
394 429
							case "head":
395 430
								pagedWriter.write("\n")
396 431
								pagedWriter.writeStartElement("h2", ["class":rend])
......
405 440
									pagedWriter.writeEndElement() // center
406 441
								}
407 442
								break;
408
							case "table":
409
								pagedWriter.writeStartElement("table", ["class":rend])
410
								pagedWriter.write("\n");
411
								break;
412
							case "row":
413
								pagedWriter.writeStartElement("tr", ["class":rend])
414
								break;
415
							case "cell":
416
								pagedWriter.writeStartElement("td", ["class":rend])
417
								break;
418 443
							case "list":
419 444
								String type = getAttributeValue(parser, null,"type")
420 445
								if ("unordered" == type) {
......
440 465
									}
441 466
								}
442 467
								break;
443
							case "p":
444
							//case "lg":
445
								pagedWriter.write("\n")
446
								pagedWriter.writeStartElement("p", ["class":rend])
468
							case "table":
469
								pagedWriter.writeStartElement("table", ["class":rend])
470
								pagedWriter.write("\n");
447 471
								break;
448
							case "cb":
449
								pagedWriter.write("\n")
450
								pagedWriter.writeElement("span", ["class":"txmeditioncb", "align":"center"], getAttributeValue(parser, null,"n")) // element ignored in the END_ELEMENT event
472
							case "row":
473
								pagedWriter.writeStartElement("tr", ["class":rend])
451 474
								break;
452
							case "sp":
453
								pagedWriter.writeStartElement("p", ["class":"turn"])
454
							
455
								if (parser.getAttributeValue(null,"speaker") != null) {
456
									pagedWriter.writeStartElement("span")
457
									pagedWriter.writeAttribute("class", "spk")
458
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"speaker")+": ")
459
									pagedWriter.writeEndElement() // span@class=spk
460
								}
461
							
475
							case "cell":
476
								pagedWriter.writeStartElement("td", ["class":rend])
462 477
								break;
463
							case "u":
464
							//pagedWriter.writeStartElement("p", ["class":"u"])
465
								if (parser.getAttributeValue(null,"time") != null) {
466
									pagedWriter.writeStartElement("span")
467
									pagedWriter.writeAttribute("class", "sync")
468
									pagedWriter.writeCharacters(parser.getAttributeValue(null,"time"))
469
								}
470
							//pagedWriter.writeEndElement(); // span@class=spk
478
							case "ref":
479
								pagedWriter.writeStartElement("a")
480
								pagedWriter.writeAttribute("href", getAttributeValue(parser, null, "target"))
481
								pagedWriter.writeAttribute("target", "_blank")
482
								pagedWriter.writeAttribute("class", rend)
471 483
								break;
472
							case "div":
473
							case "div1":
474
							case "div2":
475
							case "div3":
476
							case "div4":
477
							case "div5":
478
								pagedWriter.writeStartElement("div", ["class":rend, "type":localname])
479
								pagedWriter.write("\n");
484
							case "form":
485
								wordvalue=""
486
								flagform=true
480 487
								break;
481
							case "lb":
482
							//case "l":
483
								pagedWriter.writeEmptyElement("br", ["class":rend]) 
488
							case "ana":
489
								flaginterp=true;
490
								anaType = getAttributeValue(parser, null, "type").substring(1)
491
								anaResp = getAttributeValue(parser, null, "resp").substring(1)
492
								anaValue.setLength(0)
484 493
								break;
485 494
							case wordTag:
486 495
								wordid = getAttributeValue(parser, null,"id");
......
496 505
								}
497 506
								flagW = true
498 507
								break;
499
							case "ana":
500
								flaginterp=true;
501
								anaType = getAttributeValue(parser, null, "type").substring(1)
502
								anaResp = getAttributeValue(parser, null, "resp").substring(1)
503
								anaValue.setLength(0)
504
								break;
505
							case "form":
506
								wordvalue=""
507
								flagform=true
508
								break;
509 508
							default:
510 509
								if (noteElements.contains(localname)) {
511 510
									flagNote = true;
512 511
									noteContent = ""
513 512
									noteType = getAttributeValue(parser, null, "type")
514
								} else if (allTags && !flagW) {
513
								} else if (allTags && !flagW && localname != paginationElement) {
515 514
									pagedWriter.writeStartElement("span", ["class":localname])
516 515
								}
517 516
								break;
......
521 520
						localname = parser.getLocalName();
522 521
						if (currentOutOfTextElements.size() > 0) currentOutOfTextElements.pop()
523 522
						writeOutOfTextToEditText = currentOutOfTextElements.size() > 0
524
						
523
					
525 524
						if (localname == paginationElement) {
526 525
							break; // element already processed in the START_ELEMENT event
527 526
						}
......
601 600
								}
602 601
							
603 602
								String interpvalue = anaValues.entrySet().join(", ")
604
								
603
							
605 604
								if (NoSpaceBefore.contains(wordvalue) ||
606
								NoSpaceAfter.contains(lastword) ||
607
								wordvalue.startsWith("-") ||
608
								NoSpaceAfter.contains(endOfLastWord)) {
605
										NoSpaceAfter.contains(lastword) ||
606
										wordvalue.startsWith("-") ||
607
										NoSpaceAfter.contains(endOfLastWord)) {
609 608
									pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid])
610 609
								} else {
611 610
									pagedWriter.writeCharacters("\n")
......
629 628
										pagedWriter.writeEndElement() // </sub>
630 629
										pagedWriter.writeEndElement() // </a>
631 630
									}
632
								} else if (allTags && !flagW) {
631
								} else if (allTags && !flagW && localname != paginationElement) {
633 632
									pagedWriter.writeEndElement() // </span@class=localname>
634 633
								}
635 634
							//							else {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 3147)
312 312
		for (File f : files) {
313 313
			cpb.tick()
314 314
			
315
			if (!ValidateXml.teiTest(f)) {
315
			if (!ValidateXml.test(f)) {
316 316
				println "Won't process XML file: "+f
317 317
				continue;
318 318
			} else {
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperprince/pager.groovy (revision 3147)
438 438
			List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
439 439
			println("build hyperprince xml-tei file : "+srcfile+" to : "+resultfile );
440 440
			
441
			def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,500);
441
			def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,1000);
442 442
			
443 443
			Edition editionweb = text.addEdition("default","html",resultfile);
444 444
			//println("pages "+ed.getPageFiles())
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperprince/hyperprinceLoader.groovy (revision 3147)
122 122
	List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
123 123
	println("build hyperprince xml-tei file : "+srcfile+" to : "+resultfile );
124 124
	
125
	def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,500);
125
	def ed = new pager(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,1000);
126 126
	
127 127
	Edition editionweb = text.addEdition("default","html",resultfile);
128 128
	//println("pages "+ed.getPageFiles())
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/lasla/laslaLoader.groovy (revision 3147)
142 142
		print(srcfile.getName());
143 143
		second++
144 144
	
145
	def ed = new pager_old(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,500,basename, "br");
145
	def ed = new pager_old(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter,1000,basename, "br");
146 146
	
147 147
	Edition editionweb = text.addEdition("default","html",resultfile);
148 148
//	println("pages "+ed.getPageFiles())
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xmltxmpara/xmltxmparaLoader.groovy (revision 3147)
225 225
	print(srcfile.getName());
226 226
	second++
227 227
	
228
	def ed = new pager_old(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter, 500, basename, "pb");
228
	def ed = new pager_old(srcfile,resultfile, NoSpaceBefore, NoSpaceAfter, 1000, basename, "pb");
229 229
	
230 230
	Edition editionweb = text.addEdition("default", "html", resultfile);
231 231
	for(int i = 0 ; i < ed.getPageFiles().size();i++)
tmp/org.txm.core/src/java/org/txm/utils/ConsoleProgressBar.java (revision 3147)
9 9
	double progress_per_tick = 1.0d;
10 10
	int point_progress = 0;
11 11
	boolean done = false;
12
	String mode = "%%% ";
12
	String mode = "  % ";
13 13
	int percent = 1;
14 14
	
15 15
	public ConsoleProgressBar(long amount) {
......
24 24
		progress_per_tick = 100f / amount;
25 25
		
26 26
		if (amount <= 100L) {
27
			mode = String.format("%03d ", amount);
27
			mode = String.format("%d ", amount);
28
			if (mode.length() == 2) {
29
				mode = "  "+mode;
30
			} else if (mode.length() == 3) {
31
				mode = " "+mode;
32
			}
28 33
			progress_per_tick = 1;
29 34
		}
30 35
	}
tmp/org.txm.core/src/java/org/txm/importer/StaxIdentityParser.java (revision 3147)
282 282
			System.out.println("Unexpected error while parsing file " + inputurl + " : " + e);
283 283
			System.out.println("Location line: " + parser.getLocation().getLineNumber() + " character: " + parser.getLocation().getColumnNumber());
284 284
			org.txm.utils.logger.Log.printStackTrace(e);
285
			
285 286
			// e.printStackTrace();
286 287
			if (writer != null) writer.close();
287 288
			if (output != null) output.close();
tmp/org.txm.core/src/java/org/txm/scripts/importer/StaxStackWriter.groovy (revision 3147)
42 42
		for (int i = 0 ; i < size ; i++) {
43 43
			writeEndElement();
44 44
		}
45
		events = [];
45
		events.clear();
46 46
	}
47 47
	
48
	/**
49
	 * tagsToWrite: List<String> or List<[String, [String, String]]>
50
	 */
48 51
	public void writeStartElements(def tagsToWrite) {
49
		for (String tag : tagsToWrite)
50
			writeStartElement(tag);
52
		for (def tag : tagsToWrite) {
53
			if (tag instanceof String) {
54
				writeStartElement(tag);
55
			} else {
56
				writeStartElement(tag[0]);
57
				for (def att : tag[1]) {
58
					writeAttribute(att[0], att[1])
59
				}
60
			}
61
		}
51 62
	}
52 63
	
53 64
	public def getTagStack() {
......
98 109
	
99 110
	@Override
100 111
	public void writeAttribute (String localName, String value) throws XMLStreamException {
112
		if (value == null) return;
101 113
		writer.writeAttribute(localName, value);
114
		if (events.size() > 0) events[events.size()-1][1] << [localName, value]
102 115
	}
103 116
	
104 117
	@Override
105 118
	public void writeAttribute (String namespaceURI, String localName, String value) throws XMLStreamException {
119
		if (value == null) return;
106 120
		writer.writeAttribute(namespaceURI, localName, value);
121
		if (events.size() > 0) events[events.size()-1][1] << [localName, value]
107 122
	}
108 123
	
109 124
	@Override
110 125
	public void writeAttribute (String prefix, String namespaceURI, String localName, String value) throws XMLStreamException {
126
		if (value == null) return;
111 127
		writer.writeAttribute(prefix, namespaceURI, localName, value);
128
		if (events.size() > 0) events[events.size()-1][1] << [localName, value]
112 129
	}
113 130
	
114 131
	@Override
......
245 262
	@Override
246 263
	public void writeStartElement (String localName) throws XMLStreamException {
247 264
		writer.writeStartElement(localName);
248
		events << localName
265
		events << [localName, []]
249 266
		if (debug) println "START $localName $events"
250 267
	}
251 268
	
252 269
	public void writeStartElement (String localName, def map) throws XMLStreamException {
253 270
		writeStartElement(localName);
271
		
254 272
		for (def key : map.keySet()) {
255 273
			writeAttribute(key.toString(), map[key].toString());
274
			events[events.size()-1][1] << [key.toString(), map[key].toString()]
256 275
		}
257 276
	}
258 277
	
......
260 279
	public void writeStartElement(String namespaceURI, String localName)
261 280
	throws XMLStreamException {
262 281
		writer.writeStartElement(namespaceURI, localName);
263
		events << localName
282
		events << [localName, []]
264 283
		if (debug) println "START $localName $events"
265 284
	}
266 285
	
......
268 287
	public void writeStartElement(String prefix, String localName,
269 288
			String namespaceURI) throws XMLStreamException {
270 289
		writer.writeStartElement(prefix, localName, namespaceURI);
271
		events << localName
290
		events << [localName, []]
272 291
		if (debug) println "START $localName $prefix $events"
273 292
	}
274 293
}
tmp/org.txm.core/src/java/org/txm/objects/CorpusBuild.java (revision 3147)
552 552
		this.getProject().setEncoding(root.getAttribute("encoding"));
553 553
		this.getProject().setLang(root.getAttribute(LANG));
554 554
		String str = root.getAttribute(WORDSPERPAGE);
555
		if (str == null || str.length() == 0) str = "500";
555
		if (str == null || str.length() == 0) str = "1000";
556 556
		this.pageSize = Integer.parseInt(str);
557 557
		str = root.getAttribute(DOEDITIONSTEP);
558 558
		if (str == null || str.length() == 0) str = "true";
......
762 762
				if (built == null || built.length() == 0) built = "true";
763 763
				
764 764
				String pbElement = edition.getAttribute("page_break_tag");
765
				int wpp = 500;
765
				int wpp = 1000;
766 766
				try {
767 767
					wpp = Integer.parseInt(edition.getAttribute("words_per_page"));
768 768
				}
tmp/org.txm.core/src/java/org/txm/objects/BaseOldParameters.java (revision 3147)
147 147
	
148 148
	String DEFAULT_PAGEELEMENT = "pb"; //$NON-NLS-1$
149 149
	
150
	int DEFAULT_WORDSPERPAGE = 500;
150
	int DEFAULT_WORDSPERPAGE = 1000;
151 151
	
152 152
	public String description;
153 153
	
tmp/org.txm.core/src/java/org/txm/objects/EditionDefinition.java (revision 3147)
35 35
	}
36 36
	
37 37
	public int getWordsPerPage() {
38
		return node.getInt(TBXPreferences.EDITION_DEFINITION_WORDS_PER_PAGE, 500);
38
		return node.getInt(TBXPreferences.EDITION_DEFINITION_WORDS_PER_PAGE, 1000);
39 39
	}
40 40
	
41 41
	public boolean getEnableCollapsibleMetadata() {

Formats disponibles : Unified diff