Révision 3527

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/txt/DecodeTXTClipboardMacro.groovy (revision 3527)
1
// Copyright © 2022 ENS de Lyon - https://www.textometrie.org
2
// @author sheiden
3

  
4
package org.txm.macro.txt
5
// STANDARD DECLARATIONS
6

  
7
import java.lang.Character
8
import java.awt.Font
9
import java.awt.event.KeyEvent
10

  
11
def TypeCode2TypeName(tc) {
12

  
13
	types = [ Character.COMBINING_SPACING_MARK, Character.CONNECTOR_PUNCTUATION, Character.CONTROL,
14
                Character.CURRENCY_SYMBOL, Character.DASH_PUNCTUATION, Character.DECIMAL_DIGIT_NUMBER,
15
                Character.ENCLOSING_MARK, Character.END_PUNCTUATION, Character.FORMAT, Character.LETTER_NUMBER,
16
                Character.LINE_SEPARATOR, Character.LOWERCASE_LETTER, Character.MATH_SYMBOL,
17
                Character.MODIFIER_SYMBOL, Character.NON_SPACING_MARK, Character.OTHER_LETTER,
18
                Character.OTHER_NUMBER, Character.OTHER_PUNCTUATION, Character.OTHER_SYMBOL,
19
                Character.PARAGRAPH_SEPARATOR, Character.PRIVATE_USE, Character.SPACE_SEPARATOR,
20
                Character.START_PUNCTUATION, Character.SURROGATE, Character.TITLECASE_LETTER, Character.UNASSIGNED,
21
                Character.UPPERCASE_LETTER ]
22
                                                 
23
	typeNames = [ "Combining spacing mark", "Connector punctuation", "Control", "Currency symbol",
24
                "Dash punctuation", "Decimal digit number", "Enclosing mark", "End punctuation", "Format",
25
                "Letter number", "Line separator", "Lowercase letter", "Math symbol", "Modifier symbol",
26
                "Non spacing mark", "Other letter", "Other number", "Other punctuation", "Other symbol",
27
                "Paragraph separator", "Private use", "Space separator", "Start punctuation", "Surrogate",
28
                "Titlecase letter", "Unassigned", "Uppercase letter" ]
29
                
30
	for (int i = 0; i < types.size(); i++) {
31
        	if (tc == types[i]) {
32
            	return(typeNames[i])
33
            	}
34
	}
35
	return "Unknown"
36
}
37

  
38
def DirectionalityCode2DirectionalityName(dc) {
39

  
40
	directionalities = [
41
		Character.DIRECTIONALITY_ARABIC_NUMBER,
42
		Character.DIRECTIONALITY_BOUNDARY_NEUTRAL,
43
		Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
44
		Character.DIRECTIONALITY_EUROPEAN_NUMBER,
45
		Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
46
		Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
47
		Character.DIRECTIONALITY_LEFT_TO_RIGHT,
48
		Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
49
		Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
50
		Character.DIRECTIONALITY_NONSPACING_MARK,
51
		Character.DIRECTIONALITY_OTHER_NEUTRALS,
52
		Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR,
53
		Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
54
		Character.DIRECTIONALITY_RIGHT_TO_LEFT,
55
		Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
56
		Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
57
		Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
58
		Character.DIRECTIONALITY_SEGMENT_SEPARATOR,
59
		Character.DIRECTIONALITY_UNDEFINED,
60
		Character.DIRECTIONALITY_WHITESPACE
61
	]
62

  
63
	directionalitiesNames = [
64
		"Arabic Number",
65
		"Boundary Neutral",
66
		"Common Number Separator",
67
		"European Number",
68
		"European Number Separator",
69
		"European Number Terminator",
70
		"Left To Right",
71
		"Left To Right Embedding",
72
		"Left To Right Override",
73
		"Nonspacing Mark",
74
		"Other Neutrals",
75
		"Paragraph Separator",
76
		"Pop Directional Format",
77
		"Right To Left",
78
		"Right To Left Arabic",
79
		"Right To Left Embedding",
80
		"Right To Left Override",
81
		"Segment Separator",
82
		"Undefined",
83
		"Whitespace"
84
	]
85
	
86
	for (int i = 0; i < directionalities.size(); i++) {
87
        	if (dc == directionalities[i]) {
88
            	return(directionalitiesNames[i])
89
            	}
90
	}
91
	return "Unknown"
92
}
93

  
94
def isPrintableChar(c) {
95
    Character.UnicodeBlock block = Character.UnicodeBlock.of( c )
96
    return (!Character.isISOControl(c)) &&
97
            c != KeyEvent.CHAR_UNDEFINED &&
98
            block != null &&
99
            block != Character.UnicodeBlock.SPECIALS;
100
}
101

  
102
monitor.syncExec(new Runnable() {
103
	public void run() {
104
		try {
105
			s = org.txm.rcp.utils.IOClipboard.read()
106
			if (s == null || s.length() == 0) {
107
				println "No text in clipboard. Aborting."
108
				return
109
			}
110
        
111
			l = s.length()
112
			maxcodeL = 0
113
			maxnameL = 0
114
			maxtypeL = 0
115
			maxscriptL = 0
116
			maxblockL = 0
117
			maxdirectL = 0
118
			for (int i = 0; i < l; i++) {
119
				c = s.substring(i, i+1)
120
				
121
				cp = s.codePointAt(i)
122
				name = Character.getName(cp)
123
				type = TypeCode2TypeName(Character.getType(cp))
124
				script = Character.UnicodeScript.of(cp).name()
125
				block = Character.UnicodeBlock.of(cp)
126
				direct = DirectionalityCode2DirectionalityName(Character.getDirectionality(cp))
127
				
128
				cpsl = cp.toString().length()
129
				if (cpsl > maxcodeL) maxcodeL = cpsl
130
				namesl = name.toString().length()
131
				if (namesl > maxnameL) maxnameL = namesl
132
				typesl = type.toString().length()
133
				if (typesl > maxtypeL) maxtypeL = typesl
134
				scriptsl = script.toString().length()
135
				if (scriptsl > maxscriptL) maxscriptL = scriptsl
136
				blocksl = block.toString().length()
137
				if (blocksl > maxblockL) maxblockL = blocksl
138
				directsl = direct.toString().length()
139
				if (directsl > maxdirectL) maxdirectL = directsl
140
			}
141
			
142
			if (maxcodeL < 4) maxcodeL = 4
143
			if (maxnameL < 4) maxnameL = 4
144
			if (maxtypeL < 4) maxtypeL = 4
145
			if (maxscriptL < 6) maxscriptL = 6
146
			if (maxblockL < 5) maxblockL = 5
147
			if (maxdirectL < 14) maxdirectL = 14
148

  
149
			println sprintf("%-15s %${maxcodeL}s %-${maxnameL}s %-${maxtypeL}s %-${maxscriptL}s %-${maxblockL}s %-${maxdirectL}s", "Char", "Code", "Name", "Type", "Script", "Block", "Directionality")
150
			for (int i = 0; i < l; i++) {
151
				c = s.substring(i, i+1)
152
				cp = s.codePointAt(i)
153
				name = Character.getName(cp)
154
				type = TypeCode2TypeName(Character.getType(cp))
155
				script = Character.UnicodeScript.of(cp).name()
156
				block = Character.UnicodeBlock.of(cp)
157
				direct = DirectionalityCode2DirectionalityName(Character.getDirectionality(cp))
158
				if (isPrintableChar(c.charAt(0))) {
159
					println sprintf("%-15s %${maxcodeL}s %-${maxnameL}s %-${maxtypeL}s %-${maxscriptL}s %-${maxblockL}s %-${maxdirectL}s", c, cp, name, type, script, block, direct)
160
				} else {
161
					println sprintf("<not printable> %${maxcodeL}s %-${maxnameL}s %-${maxtypeL}s %-${maxscriptL}s %-${maxblockL}s %-${maxdirectL}s", cp, name, type, script, block, direct)
162
				}				
163
			}
164
/*
165
			for (char c : s) {
166
				charValue = c.charValue()
167
				charName = java.lang.Character.getName(c)
168
				numericValue = java.lang.Character.getNumericValue(c)
169
				charType = java.lang.Character.getType(c)
170

  
171
			}
172
*/
173

  
174
		} catch (Exception e) {
175
			e.printStackTrace()
176
		}
177
	}
178
})
179

  

Formats disponibles : Unified diff