Révision 3386

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/TagSentences/TagSentences.groovy (revision 3386)
115 115
		/** The corr_tags. */
116 116
		corr_tags = tc.corr_tags;
117 117
		
118
		strongPunct = tc.punct_strong;
118
		strongPunct = tc.punct_strong; // 
119 119
		
120 120
		reg_comment = /\A\s*<!--.*-->\s*\Z/
121 121
		reg_out_of_sentence = /^(.*)<s( [^>]*)?>(.*)$/
......
159 159
		reg_corr_tags_no_seg2 = /^(.*)<\/($corr_tags_no_seg)>(.*)$/
160 160
		reg_block_tags = /^(.*)<\/($div_tags|$q_tags)>(.*)$/
161 161
		reg_block_tag_alone = "<($div_tags|$q_tags)>"
162
		
163
		//tc.dump()
164
		//println "reg_punct=$reg_punct reg_strong_punct=$reg_strong_punct"
162 165
	}
163 166
	
164 167
	/* (non-Javadoc)
TXM/trunk/org.txm.tokenizer.core/src/org/txm/tokenizer/TokenizerClasses.java (revision 3386)
263 263
	}
264 264
	
265 265
	public void recombine() {
266
		corr_tags = "" + corr_tags_no_seg + "|" + seg_tags + "";
267
		extraword_tags = "" + div_tags + "|" + q_tags + "|" + extraword1_tags + "";
268
		punct_strong = "[" + punct_strong1 + "]|" + punct_strong2 + "";
269
		punct_paren_open = "" + punct_paren_open1 + "|" + punct_paren_open2 + "";
270
		punct_paren_close = "" + punct_paren_close1 + "|" + punct_paren_close2 + "";
271
		punct_paren = "" + punct_paren_open + "|" + punct_paren_close + "";
272
		punct_all = "" + punct_strong + "|" + punct_paren + "|[" + punct_weak + "]";
273
		word_chars = "[^ " + punct_quotes + "" + punct_strong1 + "" + punct_paren_open1 + "" + punct_paren_close1 + "" + punct_weak + "]+|" + entity + "";
266
		
267
		if (corr_tags == null) {
268
			corr_tags = "" + corr_tags_no_seg + "|" + seg_tags + "";
269
		}
270
		if (extraword_tags == null) {
271
			extraword_tags = "" + div_tags + "|" + q_tags + "|" + extraword1_tags + "";
272
		}
273
		if (punct_strong == null) {
274
			punct_strong = "[" + punct_strong1 + "]|" + punct_strong2 + "";
275
		}
276
		if (punct_paren_open == null) {
277
			punct_paren_open = "" + punct_paren_open1 + "|" + punct_paren_open2 + "";
278
		}
279
		if (punct_paren_close == null) {
280
			punct_paren_close = "" + punct_paren_close1 + "|" + punct_paren_close2 + "";
281
		}
282
		if (punct_paren == null) {
283
			punct_paren = "" + punct_paren_open + "|" + punct_paren_close + "";
284
		}
285
		if (punct_all == null) {
286
			punct_all = "" + punct_strong + "|" + punct_paren + "|[" + punct_weak + "]";
287
		}
288
		if (word_chars == null) {
289
			word_chars = "[^ " + punct_quotes + "" + punct_strong1 + "" + punct_paren_open1 + "" + punct_paren_close1 + "" + punct_weak + "]+|" + entity + "";
290
		}
274 291
	}
275 292
	
276 293
	/**
......
455 472
		System.out.println("whitespaces = " + whitespaces + "");
456 473
		System.out.println("regElision = " + regElision + "");
457 474
		System.out.println("regPunct = " + regPunct + "");
458
		System.out.println("TESTS");
475
		System.out.println("TESTS:");
459 476
		
460 477
		for (TTest test : tests) {
461 478
			System.out.println(" " + test + "");
......
847 864
			Field[] fields = TokenizerClasses.class.getFields();
848 865
			for (Field field : fields) {
849 866
				int m = field.getModifiers();
850
				if (Modifier.isStatic(m) && Modifier.isPublic(m) && field.getType().equals(String.class)) {
867
				if (!Modifier.isStatic(m) && Modifier.isPublic(m) && field.getType().equals(String.class)) {
851 868
					String name = field.getName();
852 869
					String value = params.get(name, null);
853 870
					if (value != null) {
854 871
						try {
855 872
							if (debug) System.out.println(" Tokenizer parametrized with " + name + "=" + value);
856
							field.set(field, value);
873
							field.set(this, value);
857 874
						}
858 875
						catch (Exception e) {
859 876
							Log.printStackTrace(e);

Formats disponibles : Unified diff