Révision 2434
tmp/org.txm.utils/src/org/txm/utils/AsciiUtils.java (revision 2434) | ||
---|---|---|
26 | 26 |
// $LastChangedBy:$ |
27 | 27 |
// |
28 | 28 |
package org.txm.utils; |
29 |
// TODO: Auto-generated Javadoc |
|
30 | 29 |
|
31 | 30 |
/** |
32 | 31 |
* The Class AsciiUtils. |
... | ... | |
38 | 37 |
* |
39 | 38 |
* AsciiUtils.removePunct(str) supprime les ponctuations |
40 | 39 |
* |
41 |
* AsciiUtils.buildAttributeId(str) créé un identifiant CQP compatible
|
|
40 |
* AsciiUtils.buildWordId(str) créé un identifiant de mot CQP compatible
|
|
42 | 41 |
* |
43 | 42 |
* AsciiUtils.buildId(str) créé un identifiant compatible corpus CQP |
44 | 43 |
* |
... | ... | |
101 | 100 |
} |
102 | 101 |
|
103 | 102 |
/** |
104 |
* Removes the punct. |
|
103 |
* Removes the punct and empty spaces.
|
|
105 | 104 |
* |
106 | 105 |
* @param s |
107 | 106 |
* the s |
... | ... | |
112 | 111 |
} |
113 | 112 |
|
114 | 113 |
/** |
115 |
* Builds the id. |
|
114 |
* Builds the word id.
|
|
116 | 115 |
* |
117 | 116 |
* @param s |
118 | 117 |
* the s |
... | ... | |
159 | 158 |
*/ |
160 | 159 |
public static String buildId(String s) { |
161 | 160 |
// TODO: replace this with a lib managing the ID attribute format |
162 |
if (s.length() == 0) |
|
161 |
if (s.length() == 0) {
|
|
163 | 162 |
return s; |
163 |
} |
|
164 | 164 |
|
165 | 165 |
String rez = convertNonAscii(s).toLowerCase(); |
166 | 166 |
rez = rez.replaceAll("\\p{Space}++", "_"); |
... | ... | |
170 | 170 |
char c = rez.charAt(0); |
171 | 171 |
while (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || c == '7' || c == '8' || c == '9') { |
172 | 172 |
rez = rez.substring(1); |
173 |
if (rez.length() == 0) |
|
173 |
if (rez.length() == 0) {
|
|
174 | 174 |
return ""; |
175 |
} |
|
175 | 176 |
c = rez.charAt(0); |
176 | 177 |
} |
177 | 178 |
|
Formats disponibles : Unified diff