Révision 2434
| tmp/org.txm.utils/src/org/txm/utils/AsciiUtils.java (revision 2434) | ||
|---|---|---|
| 26 | 26 |
// $LastChangedBy:$ |
| 27 | 27 |
// |
| 28 | 28 |
package org.txm.utils; |
| 29 |
// TODO: Auto-generated Javadoc |
|
| 30 | 29 |
|
| 31 | 30 |
/** |
| 32 | 31 |
* The Class AsciiUtils. |
| ... | ... | |
| 38 | 37 |
* |
| 39 | 38 |
* AsciiUtils.removePunct(str) supprime les ponctuations |
| 40 | 39 |
* |
| 41 |
* AsciiUtils.buildAttributeId(str) créé un identifiant CQP compatible
|
|
| 40 |
* AsciiUtils.buildWordId(str) créé un identifiant de mot CQP compatible
|
|
| 42 | 41 |
* |
| 43 | 42 |
* AsciiUtils.buildId(str) créé un identifiant compatible corpus CQP |
| 44 | 43 |
* |
| ... | ... | |
| 101 | 100 |
} |
| 102 | 101 |
|
| 103 | 102 |
/** |
| 104 |
* Removes the punct. |
|
| 103 |
* Removes the punct and empty spaces.
|
|
| 105 | 104 |
* |
| 106 | 105 |
* @param s |
| 107 | 106 |
* the s |
| ... | ... | |
| 112 | 111 |
} |
| 113 | 112 |
|
| 114 | 113 |
/** |
| 115 |
* Builds the id. |
|
| 114 |
* Builds the word id.
|
|
| 116 | 115 |
* |
| 117 | 116 |
* @param s |
| 118 | 117 |
* the s |
| ... | ... | |
| 159 | 158 |
*/ |
| 160 | 159 |
public static String buildId(String s) {
|
| 161 | 160 |
// TODO: replace this with a lib managing the ID attribute format |
| 162 |
if (s.length() == 0) |
|
| 161 |
if (s.length() == 0) {
|
|
| 163 | 162 |
return s; |
| 163 |
} |
|
| 164 | 164 |
|
| 165 | 165 |
String rez = convertNonAscii(s).toLowerCase(); |
| 166 | 166 |
rez = rez.replaceAll("\\p{Space}++", "_");
|
| ... | ... | |
| 170 | 170 |
char c = rez.charAt(0); |
| 171 | 171 |
while (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || c == '6' || c == '7' || c == '8' || c == '9') {
|
| 172 | 172 |
rez = rez.substring(1); |
| 173 |
if (rez.length() == 0) |
|
| 173 |
if (rez.length() == 0) {
|
|
| 174 | 174 |
return ""; |
| 175 |
} |
|
| 175 | 176 |
c = rez.charAt(0); |
| 176 | 177 |
} |
| 177 | 178 |
|
Formats disponibles : Unified diff