Révision 3130

tmp/org.txm.tokenizer.core/src/org/txm/tokenizer/TokenizerClasses.java (revision 3130)
249 249
		
250 250
		/** The TT enclitics. */
251 251
		FClitic_en = "'(s|re|ve|d|m|em|ll)|n['‘’]t";
252
		PClitic_fr = "[dcjlmnstDCJLNMST][\'‘’]|[Qq]u[\'‘’]|[Jj]usqu[\'‘’]|[Ll]orsqu[\'‘’]|[Pp]uisqu[\'‘’]|[Qq]uoiqu[\'‘’]";
252
		PClitic_fr = "[dcjlmnstyDCJLNMSTY][\'‘’]|[Qq]u[\'‘’]|[Jj]usqu[\'‘’]|[Ll]orsqu[\'‘’]|[Pp]uisqu[\'‘’]|[Qq]uoiqu[\'‘’]";
253 253
		FClitic_fr = "-t-elles?|-t-ils?|-t-on|-ce|-elles?|-ils?|-je|-la|-les?|-leur|-lui|-mêmes?|-m[\'‘’]|-moi|-nous|-on|-toi|-tu|-t[\'‘’]|-vous|-en|-y|-ci|-là";
254 254
		PClitic_it = "[dD][ae]ll[\'‘’]|[nN]ell[\'‘’]|[Aa]ll[\'‘’]|[lLDd][\'‘’]|[Ss]ull[\'‘’]|[Qq]uest[\'‘’]|[Uu]n[\'‘’]|[Ss]enz[\'‘’]|[Tt]utt[\'‘’]";
255 255
		FClitic_gl = "-la|-las|-lo|-los|-nos";

Formats disponibles : Unified diff