Revision 2434

tmp/org.txm.utils/src/org/txm/utils/AsciiUtils.java (revision 2434)
26 26
// $LastChangedBy:$ 
27 27
//
28 28
package org.txm.utils;
29
// TODO: Auto-generated Javadoc
30 29

  
31 30
/**
32 31
 * The Class AsciiUtils.
......
38 37
 * 
39 38
 * AsciiUtils.removePunct(str) supprime les ponctuations
40 39
 * 
41
 * AsciiUtils.buildAttributeId(str) créé un identifiant CQP compatible
40
 * AsciiUtils.buildWordId(str) créé un identifiant de mot CQP compatible
42 41
 * 
43 42
 * AsciiUtils.buildId(str) créé un identifiant compatible corpus CQP
44 43
 *
......
101 100
	}
102 101

  
103 102
	/**
104
	 * Removes the punct.
103
	 * Removes the punct and empty spaces.
105 104
	 *
106 105
	 * @param s
107 106
	 *            the s
......
112 111
	}
113 112

  
114 113
	/**
115
	 * Builds the id.
114
	 * Builds the word id.
116 115
	 *
117 116
	 * @param s
118 117
	 *            the s
......
159 158
	 */
160 159
	public static String buildId(String s) {
161 160
		// TODO: replace this with a lib managing the ID attribute format
162
		if (s.length() == 0)
161
		if (s.length() == 0) {
163 162
			return s;
163
		}
164 164

  
165 165
		String rez = convertNonAscii(s).toLowerCase();
166 166
		rez = rez.replaceAll("\\p{Space}++", "_");
......
170 170
		char c = rez.charAt(0);
171 171
		while (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || c == '7' || c == '8' || c == '9') {
172 172
			rez = rez.substring(1);
173
			if (rez.length() == 0)
173
			if (rez.length() == 0) {
174 174
				return "";
175
			}
175 176
			c = rez.charAt(0);
176 177
		}
177 178

  

Also available in: Unified diff