Révision 3527
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/txt/DecodeTXTClipboardMacro.groovy (revision 3527) | ||
---|---|---|
1 |
// Copyright © 2022 ENS de Lyon - https://www.textometrie.org |
|
2 |
// @author sheiden |
|
3 |
|
|
4 |
package org.txm.macro.txt |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
|
|
7 |
import java.lang.Character |
|
8 |
import java.awt.Font |
|
9 |
import java.awt.event.KeyEvent |
|
10 |
|
|
11 |
def TypeCode2TypeName(tc) { |
|
12 |
|
|
13 |
types = [ Character.COMBINING_SPACING_MARK, Character.CONNECTOR_PUNCTUATION, Character.CONTROL, |
|
14 |
Character.CURRENCY_SYMBOL, Character.DASH_PUNCTUATION, Character.DECIMAL_DIGIT_NUMBER, |
|
15 |
Character.ENCLOSING_MARK, Character.END_PUNCTUATION, Character.FORMAT, Character.LETTER_NUMBER, |
|
16 |
Character.LINE_SEPARATOR, Character.LOWERCASE_LETTER, Character.MATH_SYMBOL, |
|
17 |
Character.MODIFIER_SYMBOL, Character.NON_SPACING_MARK, Character.OTHER_LETTER, |
|
18 |
Character.OTHER_NUMBER, Character.OTHER_PUNCTUATION, Character.OTHER_SYMBOL, |
|
19 |
Character.PARAGRAPH_SEPARATOR, Character.PRIVATE_USE, Character.SPACE_SEPARATOR, |
|
20 |
Character.START_PUNCTUATION, Character.SURROGATE, Character.TITLECASE_LETTER, Character.UNASSIGNED, |
|
21 |
Character.UPPERCASE_LETTER ] |
|
22 |
|
|
23 |
typeNames = [ "Combining spacing mark", "Connector punctuation", "Control", "Currency symbol", |
|
24 |
"Dash punctuation", "Decimal digit number", "Enclosing mark", "End punctuation", "Format", |
|
25 |
"Letter number", "Line separator", "Lowercase letter", "Math symbol", "Modifier symbol", |
|
26 |
"Non spacing mark", "Other letter", "Other number", "Other punctuation", "Other symbol", |
|
27 |
"Paragraph separator", "Private use", "Space separator", "Start punctuation", "Surrogate", |
|
28 |
"Titlecase letter", "Unassigned", "Uppercase letter" ] |
|
29 |
|
|
30 |
for (int i = 0; i < types.size(); i++) { |
|
31 |
if (tc == types[i]) { |
|
32 |
return(typeNames[i]) |
|
33 |
} |
|
34 |
} |
|
35 |
return "Unknown" |
|
36 |
} |
|
37 |
|
|
38 |
def DirectionalityCode2DirectionalityName(dc) { |
|
39 |
|
|
40 |
directionalities = [ |
|
41 |
Character.DIRECTIONALITY_ARABIC_NUMBER, |
|
42 |
Character.DIRECTIONALITY_BOUNDARY_NEUTRAL, |
|
43 |
Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, |
|
44 |
Character.DIRECTIONALITY_EUROPEAN_NUMBER, |
|
45 |
Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, |
|
46 |
Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, |
|
47 |
Character.DIRECTIONALITY_LEFT_TO_RIGHT, |
|
48 |
Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, |
|
49 |
Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, |
|
50 |
Character.DIRECTIONALITY_NONSPACING_MARK, |
|
51 |
Character.DIRECTIONALITY_OTHER_NEUTRALS, |
|
52 |
Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR, |
|
53 |
Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, |
|
54 |
Character.DIRECTIONALITY_RIGHT_TO_LEFT, |
|
55 |
Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, |
|
56 |
Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, |
|
57 |
Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, |
|
58 |
Character.DIRECTIONALITY_SEGMENT_SEPARATOR, |
|
59 |
Character.DIRECTIONALITY_UNDEFINED, |
|
60 |
Character.DIRECTIONALITY_WHITESPACE |
|
61 |
] |
|
62 |
|
|
63 |
directionalitiesNames = [ |
|
64 |
"Arabic Number", |
|
65 |
"Boundary Neutral", |
|
66 |
"Common Number Separator", |
|
67 |
"European Number", |
|
68 |
"European Number Separator", |
|
69 |
"European Number Terminator", |
|
70 |
"Left To Right", |
|
71 |
"Left To Right Embedding", |
|
72 |
"Left To Right Override", |
|
73 |
"Nonspacing Mark", |
|
74 |
"Other Neutrals", |
|
75 |
"Paragraph Separator", |
|
76 |
"Pop Directional Format", |
|
77 |
"Right To Left", |
|
78 |
"Right To Left Arabic", |
|
79 |
"Right To Left Embedding", |
|
80 |
"Right To Left Override", |
|
81 |
"Segment Separator", |
|
82 |
"Undefined", |
|
83 |
"Whitespace" |
|
84 |
] |
|
85 |
|
|
86 |
for (int i = 0; i < directionalities.size(); i++) { |
|
87 |
if (dc == directionalities[i]) { |
|
88 |
return(directionalitiesNames[i]) |
|
89 |
} |
|
90 |
} |
|
91 |
return "Unknown" |
|
92 |
} |
|
93 |
|
|
94 |
def isPrintableChar(c) { |
|
95 |
Character.UnicodeBlock block = Character.UnicodeBlock.of( c ) |
|
96 |
return (!Character.isISOControl(c)) && |
|
97 |
c != KeyEvent.CHAR_UNDEFINED && |
|
98 |
block != null && |
|
99 |
block != Character.UnicodeBlock.SPECIALS; |
|
100 |
} |
|
101 |
|
|
102 |
monitor.syncExec(new Runnable() { |
|
103 |
public void run() { |
|
104 |
try { |
|
105 |
s = org.txm.rcp.utils.IOClipboard.read() |
|
106 |
if (s == null || s.length() == 0) { |
|
107 |
println "No text in clipboard. Aborting." |
|
108 |
return |
|
109 |
} |
|
110 |
|
|
111 |
l = s.length() |
|
112 |
maxcodeL = 0 |
|
113 |
maxnameL = 0 |
|
114 |
maxtypeL = 0 |
|
115 |
maxscriptL = 0 |
|
116 |
maxblockL = 0 |
|
117 |
maxdirectL = 0 |
|
118 |
for (int i = 0; i < l; i++) { |
|
119 |
c = s.substring(i, i+1) |
|
120 |
|
|
121 |
cp = s.codePointAt(i) |
|
122 |
name = Character.getName(cp) |
|
123 |
type = TypeCode2TypeName(Character.getType(cp)) |
|
124 |
script = Character.UnicodeScript.of(cp).name() |
|
125 |
block = Character.UnicodeBlock.of(cp) |
|
126 |
direct = DirectionalityCode2DirectionalityName(Character.getDirectionality(cp)) |
|
127 |
|
|
128 |
cpsl = cp.toString().length() |
|
129 |
if (cpsl > maxcodeL) maxcodeL = cpsl |
|
130 |
namesl = name.toString().length() |
|
131 |
if (namesl > maxnameL) maxnameL = namesl |
|
132 |
typesl = type.toString().length() |
|
133 |
if (typesl > maxtypeL) maxtypeL = typesl |
|
134 |
scriptsl = script.toString().length() |
|
135 |
if (scriptsl > maxscriptL) maxscriptL = scriptsl |
|
136 |
blocksl = block.toString().length() |
|
137 |
if (blocksl > maxblockL) maxblockL = blocksl |
|
138 |
directsl = direct.toString().length() |
|
139 |
if (directsl > maxdirectL) maxdirectL = directsl |
|
140 |
} |
|
141 |
|
|
142 |
if (maxcodeL < 4) maxcodeL = 4 |
|
143 |
if (maxnameL < 4) maxnameL = 4 |
|
144 |
if (maxtypeL < 4) maxtypeL = 4 |
|
145 |
if (maxscriptL < 6) maxscriptL = 6 |
|
146 |
if (maxblockL < 5) maxblockL = 5 |
|
147 |
if (maxdirectL < 14) maxdirectL = 14 |
|
148 |
|
|
149 |
println sprintf("%-15s %${maxcodeL}s %-${maxnameL}s %-${maxtypeL}s %-${maxscriptL}s %-${maxblockL}s %-${maxdirectL}s", "Char", "Code", "Name", "Type", "Script", "Block", "Directionality") |
|
150 |
for (int i = 0; i < l; i++) { |
|
151 |
c = s.substring(i, i+1) |
|
152 |
cp = s.codePointAt(i) |
|
153 |
name = Character.getName(cp) |
|
154 |
type = TypeCode2TypeName(Character.getType(cp)) |
|
155 |
script = Character.UnicodeScript.of(cp).name() |
|
156 |
block = Character.UnicodeBlock.of(cp) |
|
157 |
direct = DirectionalityCode2DirectionalityName(Character.getDirectionality(cp)) |
|
158 |
if (isPrintableChar(c.charAt(0))) { |
|
159 |
println sprintf("%-15s %${maxcodeL}s %-${maxnameL}s %-${maxtypeL}s %-${maxscriptL}s %-${maxblockL}s %-${maxdirectL}s", c, cp, name, type, script, block, direct) |
|
160 |
} else { |
|
161 |
println sprintf("<not printable> %${maxcodeL}s %-${maxnameL}s %-${maxtypeL}s %-${maxscriptL}s %-${maxblockL}s %-${maxdirectL}s", cp, name, type, script, block, direct) |
|
162 |
} |
|
163 |
} |
|
164 |
/* |
|
165 |
for (char c : s) { |
|
166 |
charValue = c.charValue() |
|
167 |
charName = java.lang.Character.getName(c) |
|
168 |
numericValue = java.lang.Character.getNumericValue(c) |
|
169 |
charType = java.lang.Character.getType(c) |
|
170 |
|
|
171 |
} |
|
172 |
*/ |
|
173 |
|
|
174 |
} catch (Exception e) { |
|
175 |
e.printStackTrace() |
|
176 |
} |
|
177 |
} |
|
178 |
}) |
|
179 |
|
Formats disponibles : Unified diff