Révision 3190
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/ImporterUnCorpusOkapiMacro.groovy (revision 3190) | ||
---|---|---|
1 |
// Copyright © 2021 MY_INSTITUTION |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
12 |
import org.txm.searchengine.cqp.corpus.Subcorpus |
|
13 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
14 |
import org.txm.rcp.views.corpora.CorporaView |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
|
|
18 |
@Field @Option(name="nom_sous_corpus", usage="Nom du sous-corpus à créer", widget="String", required=true, def="") |
|
19 |
def nom_sous_corpus |
|
20 |
@Field @Option(name="liste_identifiants_sujets", usage="Liste des identifiants de notice séparés par des '|'", widget="Text", required=true, def="") |
|
21 |
def liste_identifiants_sujets |
|
22 |
|
|
23 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
24 |
println "Erreur : la sélection de la vue Corpus n'est pas un corpus ($corpusViewSelection)." |
|
25 |
return false |
|
26 |
} |
|
27 |
|
|
28 |
CQPCorpus corpus = corpusViewSelection |
|
29 |
corpus.compute() |
|
30 |
CQPCorpus parentCorpus = corpus.getMainCorpus() |
|
31 |
|
|
32 |
String parentCorpusName = parentCorpus.getCqpId() |
|
33 |
|
|
34 |
if (parentCorpusName.startsWith("AF-VOIX-OFF-")) { |
|
35 |
|
|
36 |
} else if (parentCorpusName.startsWith("AFNOTICES") || parentCorpusName.startsWith("AF-NOTICES")) { |
|
37 |
|
|
38 |
} else { |
|
39 |
println "Erreur : le corpus n'est pas un corpus d'AF-VOIX-OFF ni AFNOTICE." |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
// Open the parameters input dialog box |
|
44 |
if (!ParametersDialog.open(this)) return; |
|
45 |
|
|
46 |
println "Création du sous-corpus avec la liste d'identifiants: $liste_identifiants_sujets" |
|
47 |
String prop = "div_id" |
|
48 |
if (parentCorpusName.startsWith("AFNOTICES") || parentCorpusName.startsWith("AF-NOTICES")) { |
|
49 |
prop = "notice_identifiantdelanotice" |
|
50 |
} |
|
51 |
|
|
52 |
String query = "[_.$prop=\"$liste_identifiants_sujets\"] expand to $prop" |
|
53 |
println "Requête CQL de création du corpus : $query" |
|
54 |
Subcorpus sub = parentCorpus.createSubcorpus(new CQLQuery(query), nom_sous_corpus) |
|
55 |
|
|
56 |
int s = sub.getSize() |
|
57 |
if (s > 0) { |
|
58 |
println "Le sous-corpus $nom_sous_corpus a été créé." |
|
59 |
monitor.syncExec(new Runnable() { |
|
60 |
public void run() { |
|
61 |
CorporaView.refresh(); |
|
62 |
CorporaView.expand(parentCorpus); |
|
63 |
} |
|
64 |
}); |
|
65 |
} else { |
|
66 |
println "Erreur : Le corpus créé et vide ! >_<'" |
|
67 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/ImporterCorpusOkapiMacro.groovy (revision 3190) | ||
---|---|---|
1 |
// Copyright © 2021 MY_INSTITUTION |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.projects.antract |
|
7 |
|
|
8 |
import org.txm.macro.projects.antract.OkapiSaphirAPI |
|
9 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
10 |
|
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import groovy.transform.Field |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
15 |
import org.txm.searchengine.cqp.corpus.Subcorpus |
|
16 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
17 |
import org.txm.rcp.views.corpora.CorporaView |
|
18 |
import org.txm.backtomedia.preferences.BackToMediaPreferences |
|
19 |
|
|
20 |
// BEGINNING OF PARAMETERS |
|
21 |
|
|
22 |
@Field @Option(name="liste_identifiants_sujets", usage="Liste des identifiants de notice séparés par des '|'", widget="Text", required=true, def="") |
|
23 |
def liste_identifiants_sujets |
|
24 |
@Field @Option(name="nom_sous_corpus", usage="Nom du sous-corpus à créer", widget="String", required=true, def="") |
|
25 |
def nom_sous_corpus |
|
26 |
//@Field @Option(name="identifiant_corpus_okapi", usage="Identifiant du corpus Okapi à importer", widget="String", required=true, def="") |
|
27 |
//def identifiant_corpus_okapi |
|
28 |
|
|
29 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
30 |
println "Erreur: la sélection n'est pas un corpus." |
|
31 |
return false |
|
32 |
} |
|
33 |
|
|
34 |
CQPCorpus corpus = corpusViewSelection |
|
35 |
corpus.compute() |
|
36 |
CQPCorpus parentCorpus = corpus.getMainCorpus() |
|
37 |
|
|
38 |
String parentCorpusName = parentCorpus.getCqpId() |
|
39 |
|
|
40 |
if (parentCorpusName.startsWith("AF-VOIX-OFF-")) { |
|
41 |
|
|
42 |
} else if (parentCorpusName.startsWith("AFNOTICES") || parentCorpusName.startsWith("AF-NOTICES")) { |
|
43 |
|
|
44 |
} else { |
|
45 |
println "Erreur: le corpus n'est pas un corpus d'AF-VOIX-OFF ni AFNOTICE" |
|
46 |
return false |
|
47 |
} |
|
48 |
|
|
49 |
// Open the parameters input dialog box |
|
50 |
if (!ParametersDialog.open(this)) return; |
|
51 |
|
|
52 |
|
|
53 |
//monitor.syncExec(new Runnable() { |
|
54 |
// public void run() { |
|
55 |
// if (!OkapiSaphirAPI.initializeCredentials(org.eclipse.swt.widgets.Display.getCurrent().getActiveShell(), BackToMediaPreferences.MEDIA_AUTH_LOGIN, BackToMediaPreferences.MEDIA_AUTH_PASSWORD, "OKAPI - Antract", "S'identifier sur la plateforme Okapi.", "OKAPI - Antract", monitor)) { |
|
56 |
// return null; |
|
57 |
// } |
|
58 |
// } |
|
59 |
//}); |
|
60 |
// |
|
61 |
//String user = System.getProperty(BackToMediaPreferences.MEDIA_AUTH_LOGIN) |
|
62 |
//String password = System.getProperty(BackToMediaPreferences.MEDIA_AUTH_PASSWORD) |
|
63 |
//if (user == null || user.length() == 0 || password == null || password.length() == 0) { |
|
64 |
// println "Annulation." |
|
65 |
//} |
|
66 |
// |
|
67 |
//String sessionID = OkapiSaphirAPI.login(user, password) |
|
68 |
//if (sessionID.length() == 0) { |
|
69 |
// println "Erreur : La session Okapi n'a pu être démarrée." |
|
70 |
// System.setProperty(BackToMediaPreferences.MEDIA_AUTH_LOGIN, "") |
|
71 |
// System.setProperty(BackToMediaPreferences.MEDIA_AUTH_PASSWORD, "") |
|
72 |
//} |
|
73 |
// |
|
74 |
//String corpora = OkapiSaphirAPI.get_user_corpus(sessionID, user, null, null, null) |
|
75 |
//if (corpora.length() > 0 ) { |
|
76 |
// println "CORPORA" |
|
77 |
// println corpora |
|
78 |
//} else { |
|
79 |
// println "Erreur : la liste des corpus n'a pas pu être récupérée." |
|
80 |
//} |
|
81 |
|
|
82 |
liste_identifiants_sujets = liste_identifiants_sujets.replace("\n", "|").replace("\t", "|").replace(" ", "|").replaceAll("(\\|)+", "|").trim() |
|
83 |
println "Création du sous-corpus avec la liste: $liste_identifiants_sujets" |
|
84 |
String prop = "div_id" |
|
85 |
if (parentCorpusName.startsWith("AFNOTICES") || parentCorpusName.startsWith("AF-NOTICES")) { |
|
86 |
prop = "notice_identifiantdelanotice" |
|
87 |
} |
|
88 |
|
|
89 |
liste_identifiants_sujets = (liste_identifiants_sujets.split("\\|") as List) |
|
90 |
liste_identifiants_sujets.remove("") |
|
91 |
liste_identifiants_sujets.unique() |
|
92 |
liste_identifiants_sujets = getShortCQL(liste_identifiants_sujets) |
|
93 |
|
|
94 |
String query = "[_.$prop=\"$liste_identifiants_sujets\"] expand to $prop" |
|
95 |
println "CQL=$query" |
|
96 |
if (query.length() > 15500) { |
|
97 |
println "Erreur : la requête CQL est trop longue (${query.length()})." |
|
98 |
return |
|
99 |
} |
|
100 |
Subcorpus sub = parentCorpus.createSubcorpus(new CQLQuery(query), nom_sous_corpus) |
|
101 |
|
|
102 |
int s = sub.getSize() |
|
103 |
if (s > 0) { |
|
104 |
println "$nom_sous_corpus subcorpus is created." |
|
105 |
monitor.syncExec(new Runnable() { |
|
106 |
public void run() { |
|
107 |
CorporaView.refresh(); |
|
108 |
CorporaView.expand(parentCorpus); |
|
109 |
} |
|
110 |
}); |
|
111 |
} else { |
|
112 |
println "Error: the created corpus is empty!" |
|
113 |
} |
|
114 |
|
|
115 |
// UTILS UTILS UTILS UTILS UTILS UTILS UTILS |
|
116 |
|
|
117 |
def getShortCQL(def list) { |
|
118 |
def origtree = ["":list] |
|
119 |
breakNode(origtree, "", 0) |
|
120 |
def CQL1 = list.join("|") |
|
121 |
def CQL2 = writeNode(origtree) |
|
122 |
if (CQL1.length() < CQL2.length()) { |
|
123 |
return CQL1; |
|
124 |
} else { |
|
125 |
return CQL2; |
|
126 |
} |
|
127 |
} |
|
128 |
|
|
129 |
def breakNode(def tree, def key, def p) { |
|
130 |
def space = (" "*p) |
|
131 |
//println space + "breaking: $key of $tree at $p" |
|
132 |
|
|
133 |
def children = new LinkedHashMap() |
|
134 |
def ids = tree[key] |
|
135 |
tree[key] = children |
|
136 |
|
|
137 |
for (def child : ids) { |
|
138 |
//println space+"child="+child |
|
139 |
if (child.length() <= p) continue; |
|
140 |
|
|
141 |
def letter = child[p] |
|
142 |
if (!children.containsKey(letter)) { |
|
143 |
children[letter] = [] |
|
144 |
} |
|
145 |
|
|
146 |
children[letter].add(child) |
|
147 |
} |
|
148 |
|
|
149 |
for (def k : children.keySet()) { |
|
150 |
breakNode(children, k , p+1) |
|
151 |
} |
|
152 |
} |
|
153 |
|
|
154 |
|
|
155 |
def writeNode(def tree) { |
|
156 |
def s = "" |
|
157 |
//println "tree: "+tree |
|
158 |
|
|
159 |
int n = 0; |
|
160 |
if (tree.size() > 1) s += "(" |
|
161 |
String ss = "" |
|
162 |
for (def child : tree.keySet()) { |
|
163 |
if (n > 0) ss += "|" |
|
164 |
ss += child |
|
165 |
ss += writeNode(tree[child]) |
|
166 |
n++ |
|
167 |
} |
|
168 |
|
|
169 |
if (ss.matches("([0-9]\\|){2,10}[0-9]")) ss = "["+ss.replace("|", "") +"]" |
|
170 |
if (ss == "[0123456789]") ss = "[0-9]" |
|
171 |
if (ss == "[123456789]") ss = "[1-9]" |
|
172 |
if (ss == "[23456789]") ss = "[2-9]" |
|
173 |
if (ss == "[3456789]") ss = "[3-9]" |
|
174 |
if (ss == "[0123456789]") ss = "[0-9]" |
|
175 |
if (ss == "[012345678]") ss = "[0-8]" |
|
176 |
if (ss == "[01234567]") ss = "[0-7]" |
|
177 |
if (ss == "[0123456]") ss = "[0-6]" |
|
178 |
if (ss == "[12345678]") ss = "[1-8]" |
|
179 |
if (ss == "[1234567]") ss = "[1-7]" |
|
180 |
s += ss |
|
181 |
|
|
182 |
if (tree.size() > 1) s += ")" |
|
183 |
return s |
|
184 |
} |
|
185 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/ListerCorpusOkapiMacro.groovy (revision 3190) | ||
---|---|---|
1 |
// Copyright © 2021 MY_INSTITUTION |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.projects.antract |
|
7 |
|
|
8 |
import org.txm.macro.projects.antract.OkapiSaphirAPI |
|
9 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
10 |
|
|
11 |
import org.kohsuke.args4j.* |
|
12 |
import groovy.transform.Field |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
15 |
import org.txm.searchengine.cqp.corpus.Subcorpus |
|
16 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
17 |
import org.txm.rcp.views.corpora.CorporaView |
|
18 |
import org.txm.backtomedia.preferences.BackToMediaPreferences |
|
19 |
|
|
20 |
// BEGINNING OF PARAMETERS |
|
21 |
|
|
22 |
monitor.syncExec(new Runnable() { |
|
23 |
public void run() { |
|
24 |
if (!OkapiSaphirAPI.initializeCredentials(org.eclipse.swt.widgets.Display.getCurrent().getActiveShell(), BackToMediaPreferences.MEDIA_AUTH_LOGIN, BackToMediaPreferences.MEDIA_AUTH_PASSWORD, "OKAPI - Antract", "S'identifier sur la plateforme Okapi.", "OKAPI - Antract", monitor)) { |
|
25 |
return null; |
|
26 |
} |
|
27 |
} |
|
28 |
}); |
|
29 |
|
|
30 |
String user = System.getProperty(BackToMediaPreferences.MEDIA_AUTH_LOGIN) |
|
31 |
String password = System.getProperty(BackToMediaPreferences.MEDIA_AUTH_PASSWORD) |
|
32 |
if (user == null || user.length() == 0 || password == null || password.length() == 0) { |
|
33 |
println "Annulation." |
|
34 |
} |
|
35 |
|
|
36 |
String sessionID = OkapiSaphirAPI.login(user, password) |
|
37 |
if (sessionID.length() == 0) { |
|
38 |
println "Erreur : La session Okapi n'a pu être démarrée." |
|
39 |
System.setProperty(BackToMediaPreferences.MEDIA_AUTH_LOGIN, "") |
|
40 |
System.setProperty(BackToMediaPreferences.MEDIA_AUTH_PASSWORD, "") |
|
41 |
} |
|
42 |
|
|
43 |
String corpora = OkapiSaphirAPI.get_user_corpus(sessionID, user, null, null, null) |
|
44 |
if (corpora.length() > 0 ) { |
|
45 |
println "CORPORA" |
|
46 |
println corpora |
|
47 |
} else { |
|
48 |
println "Erreur : la liste des corpus n'a pas pu être récupérée." |
|
49 |
} |
|
50 |
|
Formats disponibles : Unified diff