Révision 2967

tmp/org.txm.utils/src/org/txm/utils/AsciiUtils.java (revision 2967)
127 127
		if (s.length() == 0) {
128 128
			return s;
129 129
		}
130
		
130

  
131 131
		// ensure the "w_" prefix presence
132 132
		if (s.startsWith("w")) {
133 133
			if (!s.startsWith("w_")) {
......
136 136
		} else {
137 137
			s = "w_" + s;
138 138
		}
139
		//System.out.println("first="+s);
139
		// System.out.println("first="+s);
140 140

  
141
		String rez = convertNonAscii(s);//.toLowerCase();
142
		//System.out.println("nonasscii="+rez);
141
		String rez = convertNonAscii(s);// .toLowerCase();
142
		// System.out.println("nonasscii="+rez);
143 143
		rez = rez.replaceAll("\\p{Space}++", "_");
144
		//System.out.println("spaces="+rez);
144
		// System.out.println("spaces="+rez);
145 145
		rez = rez.replaceAll("[¤€§µ£°().,;:/?!@§%\\\\\"’ʹ'*+\\-}\\]\\[{#~&]", ""); //$NON-NLS-1$ //$NON-NLS-2$ // "[^\\P{P}_]"
146
		//System.out.println("ponc="+rez);
146
		// System.out.println("ponc="+rez);
147 147

  
148 148
		return rez;
149 149
	}
......
172 172
			return s;
173 173
		}
174 174
		String rez = s.trim();
175
		s = s.replaceAll("\\p{Space}++", "_");
175
		rez = rez.replaceAll("\\p{Space}++", "_");
176 176
		rez = rez.replaceAll("_", "-");
177 177
		rez = convertNonAscii(rez).toLowerCase();
178
		
178

  
179 179
		rez = rez.replaceAll("[¤€§µ£°().,;:/?!@§%\\\\\"’ʹ'*+\\}\\]\\[{#~&]", ""); //$NON-NLS-1$ //$NON-NLS-2$
180 180
		// remove first chars if number
181 181
		char c = rez.charAt(0);
......
199 199
	public static void main(String args[]) {
200 200
		String s = "01The result : - - _ тврьдо È,É,Ê,Ë,Û,Ù,Ï,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç  0 1 2 3 4 5 6 7 8 9 10"; //$NON-NLS-1$
201 201
		System.out.println(AsciiUtils.convertNonAscii(s));
202
		//System.out.println(AsciiUtils.buildId(s));
202
		// System.out.println(AsciiUtils.buildId(s));
203 203
		String s2 = "w_ТВРЬДОтврьдо_123&é\"'(-è_çà)=/*-+~#{[|`\\^@]}¤;:!§/.?µ%£°";
204
		System.out.println("nonascii="+AsciiUtils.convertNonAscii(s2));
205
		System.out.println("word_id="+AsciiUtils.buildWordId(s2));
206
		System.out.println("attribute_id="+AsciiUtils.buildAttributeId(s2));
204
		System.out.println("nonascii=" + AsciiUtils.convertNonAscii(s2));
205
		System.out.println("word_id=" + AsciiUtils.buildWordId(s2));
206
		System.out.println("attribute_id=" + AsciiUtils.buildAttributeId(s2));
207 207
		// output :
208 208
		// The result : E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c
209 209

  

Formats disponibles : Unified diff