Révision 2381

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2381)
354 354
							output.write("<div");
355 355
							for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
356 356
								String name = parser.getAttributeLocalName(i).replace("_","").toLowerCase()
357
								output.write(" "+name+"=\""+parser.getAttributeValue(i)+"\"");
357
								output.write(" "+name+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
358 358
								sectionAttrs << name
359 359
							}
360 360
							output.write ">\n"
......
368 368
							output.write("<u");
369 369
							for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
370 370
								String name = parser.getAttributeLocalName(i).replace("_","").toLowerCase()
371
								output.write(" "+name+"=\""+parser.getAttributeValue(i)+"\"");
371
								output.write(" "+name+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
372 372
								if (name == "time") formatedTime = parser.getAttributeValue(i)
373 373
								else if (name == "spk") u_name = parser.getAttributeValue(i)
374 374
							}
......
539 539
	 */
540 540
	private void writeAttributes() {
541 541
		for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
542
			output.write(" "+parser.getAttributeLocalName(i).replace("_","").toLowerCase()+"=\""+parser.getAttributeValue(i)+"\"");
542
			output.write(" "+parser.getAttributeLocalName(i).replace("_","").toLowerCase()+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
543 543
		}
544 544
	}
545 545

  
......
564 564
		if (parser != null) parser.close();
565 565
		if (inputData != null) inputData.close();
566 566
		
567
		for (String type : types)
568
			if (!anatypes.contains(type))
567
		for (String type : types) {
568
			if (!anatypes.contains(type)) {
569 569
				anatypes << type
570
			}
571
		}
570 572
	}
571 573

  
572 574
	/**
......
677 679
	public void setDebug() {
678 680
		debug = true;
679 681
	}
680

  
681
}
682
}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompilerStep.groovy (revision 2381)
16 16
 */
17 17
public class XTZCompilerStep extends Step {
18 18

  
19
	static String FORM = "form";
20
	static String ANA = "ana";
21
	static String ID = "id";
22
	static String TYPE = "type";
23
	static String TAB = "\t";
24
	static String QUOTE = "\"";
19
	static String FORM = "form"
20
	static String ANA = "ana"
21
	static String ID = "id"
22
	static String TYPE = "type"
23
	static String TAB = "\t"
24
	static String QUOTE = "\""
25 25

  
26 26
	File xmlFile
27 27
	File cqpFile
28
	String textname, corpusname, projectname;
29
	boolean normalizeAttributeValues = false;
30
	boolean normalizeAnaValues = true;
31
	boolean normalizeFormValues = true;
28
	String textname, corpusname, projectname
29
	boolean normalizeAttributeValues = false
30
	boolean normalizeAnaValues = true
31
	boolean normalizeFormValues = true
32 32

  
33
	def inputData;
34
	XMLInputFactory factory;
35
	XMLStreamReader parser;
36
	OutputStreamWriter output;
33
	def inputData
34
	XMLInputFactory factory
35
	XMLStreamReader parser
36
	OutputStreamWriter output
37 37

  
38
	def anavalues = [:];
39
	def anatypes;
38
	def anavalues = [:]
39
	def anatypes
40 40

  
41 41
	String WTAG = "w"
42 42

  
43 43
	public void setNormalizeAttributeValues(boolean n) {
44
		this.normalizeAttributeValues = n;
44
		this.normalizeAttributeValues = n
45 45
	}
46 46

  
47 47
	public void setNormalizeAnaValues(boolean n) {
48
		this.normalizeAnaValues = n;
48
		this.normalizeAnaValues = n
49 49
	}
50 50

  
51 51
	public void setNormalizeFormValues(boolean n) {
52
		this.normalizeFormValues = n;
52
		this.normalizeFormValues = n
53 53
	}
54 54

  
55 55
	public XTZCompilerStep(File xmlFile, File cqpFile, String textname, String corpusname, String projectname, def anatypes, def wtag) {
56
		this.xmlFile = xmlFile;
57
		this.cqpFile = cqpFile;
56
		this.xmlFile = xmlFile
57
		this.cqpFile = cqpFile
58 58
		this.textname = textname
59
		this.corpusname = corpusname;
60
		this.projectname = projectname;
61
		this.anatypes = anatypes;
59
		this.corpusname = corpusname
60
		this.projectname = projectname
61
		this.anatypes = anatypes
62 62
		this.WTAG = wtag
63 63

  
64 64
		try {
65
			inputData = xmlFile.toURI().toURL().openStream();
66
			factory = XMLInputFactory.newInstance();
67
			parser = factory.createXMLStreamReader(inputData);
65
			inputData = xmlFile.toURI().toURL().openStream()
66
			factory = XMLInputFactory.newInstance()
67
			parser = factory.createXMLStreamReader(inputData)
68 68
		} catch (Exception ex) {
69
			System.err.println("Exception while parsing $xmlFile : "+ex);
69
			System.err.println("Exception while parsing $xmlFile : "+ex)
70 70
		}
71 71
	}
72 72

  
......
79 79
	 */
80 80
	private boolean createOutput(File f) {
81 81
		try {
82
			output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f)) , "UTF-8");
83
			return true;
82
			output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f)) , "UTF-8")
83
			return true
84 84
		} catch (Exception e) {
85
			System.err.println(e);
86
			return false;
85
			System.err.println(e)
86
			return false
87 87
		}
88 88
	}
89 89

  
......
94 94
	 * @param fileName the file name
95 95
	 * @return true, if successful
96 96
	 */
97
	public boolean process()
98
	{
97
	public boolean process() {
99 98
		if (!createOutput(cqpFile)) {
100
			return false;
99
			return false
101 100
		}
102 101
		
103
		String headvalue=""
104
		String vAna = "";
105
		String vForm = "";
106
		String wordid= "";
107
		String vHead = "";
102
		String headvalue = ""
103
		String vAna = ""
104
		String vForm = ""
105
		String wordid= ""
106
		String vHead = ""
108 107

  
109 108
		int p_id = 0;
110 109
		int s_id = 0;
......
125 124
		int nWords = 0;
126 125
		try {
127 126
			String localname;
128
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
129
			{
127
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
130 128
				switch (event) {
131 129
					case XMLStreamConstants.START_ELEMENT:
132 130
						localname = parser.getLocalName().toLowerCase();
......
139 137
									String attrname = parser.getAttributeLocalName(i);
140 138
									String attrvalue = parser.getAttributeValue(i)
141 139

  
142
									if (normalizeAttributeValues)
140
									if (normalizeAttributeValues) {
143 141
										attrvalue = attrvalue.trim();
144

  
145
									if (attrname != ID)
146
										output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+QUOTE)
142
									}
143
									if (attrname != ID) {
144
										output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replace("\"", "&quot;")+QUOTE)
145
									}
147 146
								}
148 147
								output.write(">\n");
149 148

  
......
180 179
							default:
181 180
								if (!foundtei || !foundtext) break;
182 181

  
183
								output.write("<"+localname);
182
								output.write("<"+localname)
184 183

  
185 184
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
186
									String attrname = parser.getAttributeLocalName(i);
185
									String attrname = parser.getAttributeLocalName(i)
187 186

  
188 187
									String attrvalue = parser.getAttributeValue(i)
189
									if (normalizeAttributeValues)
190
										attrvalue = attrvalue.trim();
191

  
192
									output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+QUOTE)
188
									if (normalizeAttributeValues) {
189
										attrvalue = attrvalue.trim()
190
									}
191
									output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replace("\"", "&quot;")+QUOTE)
193 192
								}
194 193
								if (parser.getAttributeCount() == 0) { // add the n attribute
195
									if (!ncounts.containsKey(localname)) ncounts.put(localname, 0);
196
									int ncount = ncounts.get(localname);
197
									ncounts.put(localname, ncount+1);
194
									if (!ncounts.containsKey(localname)) ncounts.put(localname, 0)
195
									int ncount = ncounts.get(localname)
196
									ncounts.put(localname, ncount+1)
198 197
									output.write(" n=\""+ncount+QUOTE)
199 198
								}
200
								output.write(">\n");
199
								output.write(">\n")
201 200
						}
202 201
						break;
203 202

  
......
251 250
								}
252 251
							}
253 252
							if (flagAna) {
254
								if (normalizeAnaValues)
253
								if (normalizeAnaValues) {
255 254
									anavalue += parser.getText().trim();
256
								else
255
								} else {
257 256
									anavalue += parser.getText();
257
								}
258 258
							}
259 259
						}
260 260
						break;

Formats disponibles : Unified diff