Feature #1756
Reduce the number of JNI calls while creating a Partition
Status: | New | Start date: | 03/29/2016 | |
---|---|---|---|---|
Priority: | Normal | Due date: | ||
Assignee: | - | % Done: | 0% |
|
Category: | Diagnostic and optimization | Spent time: | - | |
Target version: | TXM X.X |
Description
Improve partition creation duration process by reducing the number of calls of CQI.cqpQuery()
[WIP]
The partition creation can be long when there are a lot of parts.
After profiling the process, the bottleneck is located in native method org.txm.searchengine.cqp.MemCqiServer.cqpQuery(String, String, String)
Here are some tests, for trace, that have not been really significative on Windows (maybe 5 or 10% less duration on a partition with 2400 parts than current method) but measures are tricky. The tests consist to reduce the native calls number from Java that can be heavy through JNI by defining a native method to execute all the queries and by centralizing the multi-parts creation in Java side (rather than calling a native method for each part creation).
- create org.txm.searchengine.cqp.MemCqiServer.cqpQueries(String, String[], String[])
public native Boolean cqpQueries(String arg0, String[] arg1, String[] arg2) throws IOException, UnexpectedAnswerException, CqiServerError ;
- create org.txm.searchengine.cqp.ICqiClient.cqpQueries(String, String[], String[])
public void cqpQueries(String motherCorpus, String[] subcorpusNamers, String[] queries) throws IOException, UnexpectedAnswerException, CqiServerError;
- create org.txm.searchengine.cqp.MemCqiClient.cqpQueries(String, String[], String[])
@Override public void cqpQueries(String arg0, String[] arg1, String[] arg2) throws IOException, UnexpectedAnswerException, CqiServerError { Boolean ret = server.cqpQueries(arg0, arg1, arg2); if(ret == null || !ret) { int e = server.getErrorCode(); throwExceptionFromCqi(e); } }
- create org.txm.searchengine.cqp.corpus.Partition.createParts(String, List<String>, List<String>)
private ArrayList<Part> createParts(String partitionName, List<String> partNames, List<String> queries) throws CqiClientException { ArrayList<Part> parts = new ArrayList<Part>(partNames.size()); //Log.finest(NLS.bind(Messages.CREATING_PART,partName, query)); // long start = System.currentTimeMillis(); ArrayList<String> cqpPartIds = new ArrayList<String>(partNames.size()); for(int i = 0; i < partNames.size(); i++) { try { String partCqpId = CqpObject.partNamePrefix + Corpus.getNextSubcorpusCounter(); cqpPartIds.add(partCqpId); parts.add(new Part(partCqpId, partitionName, partNames.get(i), this, new Query(queries.get(i)))); } catch(InvalidCqpIdException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { CorpusManager.getCorpusManager().getCqiClient().cqpQueries(this.corpus.getQualifiedCqpId(), cqpPartIds.toArray(new String[cqpPartIds.size()]), queries.toArray(new String[queries.size()])); } catch (Exception e) { try { throw new CqiClientException(Messages.Partition_9 + partitionName + " last error: "+Toolbox.getCqiClient().getLastCQPError()); //$NON-NLS-1$ //$NON-NLS-2$ } catch (Exception e1) { System.out.println(Messages.Partition_18+e1); org.txm.utils.logger.Log.printStackTrace(e1); return null; } } // long end = System.currentTimeMillis(); //Log.finest(NLS.bind(Messages.PART_CREATED, partitionName + "_" + partName, (end - start))); //$NON-NLS-1$//$NON-NLS-2$ return parts; }
- create org.txm.searchengine.cqp.corpus.Partition.Partition(Corpus, String, List<String>, List<String>)
public Partition(Corpus corpus, String name, List<String> queries, List<String> partnames) throws CqiClientException { this.corpus = corpus; this.structure = null; this.property = null; if (name == null || name.trim().length() == 0) name = "noname"; //$NON-NLS-1$ this.name = name; Log.info(NLS.bind(Messages.NEW_PARTION, this.corpus, this.name)); long start = System.currentTimeMillis(); this.parts = new ArrayList<Part>(); // FIXME: tests optimisations CQP LIB parts.addAll(createParts(name, partnames, queries)); // for (int i = 0; i < queries.size(); i++) { // String queryS = queries.get(i); // String partitionName = this.getName(); // String partName = partnames.get(i); // if (partName.trim().length() == 0) partName = "-"; //$NON-NLS-1$ // Part part = createPart(partitionName, partName, queryS); // parts.add(part); // } // // long end = System.currentTimeMillis(); Log.info(NLS.bind(Messages.PARTITION_CREATED, this.name, (end - start))); }
- create JNIEXPORT jobject JNICALL Java_org_txm_searchengine_cqp_MemCqiServer_cqpQueries
(JNIEnv *, jobject, jstring, jobjectArray, jobjectArray); in MemCqiServer.h/MemCqiServer.c
JNIEXPORT jobject JNICALL Java_org_txm_searchengine_cqp_MemCqiServer_cqpQueries (JNIEnv * env, jobject obj, jstring motherCorpus, jobjectArray subcorpusNames, jobjectArray jqueries) { char *child, *mother, *query, *c, *sc; jboolean iscopy; mother = (*env)->GetStringUTFChars(env, motherCorpus, &iscopy); int *children = (*env)->GetObjectArrayElement(env, subcorpusNames, NULL); int childrenCount = (*env)->GetArrayLength(env, subcorpusNames); int *queries = (*env)->GetObjectArrayElement(env, jqueries, NULL); if (!split_subcorpus_spec(mother, &c, &sc)) { (*env)->ReleaseStringChars(env, motherCorpus, mother); //(*env)->ReleaseStringChars(env, subcorpus, child); //(*env)->ReleaseStringChars(env, jquery, query); return throwException(env, obj); } else { int test2 = cqi_activate_corpus(mother); int i; for(i = 0; i < childrenCount; i++) { child = (*env)->GetStringUTFChars(env, (*env)->GetObjectArrayElement(env, subcorpusNames, i), &iscopy); query = (*env)->GetStringUTFChars(env, (*env)->GetObjectArrayElement(env, jqueries, i), &iscopy); /* printf("\n ******* i = "); printf("%d", i); printf(" ******* child "); printf(child); printf(" ******* query "); printf(query);*/ // fflush(stdout); char *cqp_query; int len = strlen(child) + strlen(query) + 10; cqp_query = (char *) cl_realloc(cqp_query, len); int test1 = check_subcorpus_name(child); //printf("\ntests results: subcorpus_name=%d activation=%d\n", test1, test2); if (!test1 || !test2) { (*env)->ReleaseStringChars(env, motherCorpus, mother); //(*env)->ReleaseStringChars(env, subcorpus, child); //(*env)->ReleaseStringChars(env, jquery, query); return throwException(env, obj); } else { query_lock = floor(1e9 * cl_runif()) + 1; // activate query lock mode with random key //printf("CQPSERVER: query_lock = %d\n", query_lock); if (query_has_semicolon(query)) sprintf(cqp_query, "%s = %s", child, query); else sprintf(cqp_query, "%s = %s;", child, query); //printf("CQi: parsing %s\n", cqp_query); if (!cqp_parse_string(cqp_query)) { // parser and execute fprintf(stderr, "start of throw exeption"); return throwCLException(env, obj); //fprintf(stderr, "End of throw exeption"); } else { char *full_child = combine_subcorpus_spec(c, child); // c is the 'physical' part of the mother corpus CorpusList *childcl = cqi_find_corpus(full_child); if ((childcl) == NULL) { (*env)->ReleaseStringChars(env, motherCorpus, mother); //(*env)->ReleaseStringChars(env, subcorpus, child); //(*env)->ReleaseStringChars(env, jquery, query); return throwCLException(env, obj); } else { if (server_log) { printf("'%s' ran the following query on %s\n", "cqplib", mother); printf("\t%s\n", cqp_query); printf("and got %d matches.\n", childcl->size); } } if (full_child) cl_free(full_child); } query_lock = 0; // deactivate query lock mode } if (cqp_query) cl_free(cqp_query); //(*env)->ReleaseStringUTFChars(env, string1, child); //(*env)->ReleaseStringUTFChars(env, string2, query); } } if (c) cl_free(c); if (sc) cl_free(sc); (*env)->ReleaseStringChars(env, motherCorpus, mother); //(*env)->ReleaseStringChars(env, subcorpus, child); //(*env)->ReleaseStringChars(env, jquery, query); return toBoolean(env, obj, JNI_TRUE); }Other tips:
- we may assume that we give an unique corpora id from Java layer to native layer and remove the tests check_subcorpus_name()
- passing array through JNI and getting array elements seems to be heavy maybe we may use buffer instead
- mange this test "if (query_has_semicolon(query))" in another way, eg. add a function parameter
Related issues
History
#1 Updated by Sebastien Jacquot over 7 years ago
- Description updated (diff)
#2 Updated by Sebastien Jacquot over 7 years ago
- Description updated (diff)
#3 Updated by Sebastien Jacquot over 7 years ago
- Description updated (diff)
#4 Updated by Sebastien Jacquot about 4 years ago
- Target version changed from TXM X.X to TXM 0.8.2
#5 Updated by Sebastien Jacquot about 4 years ago
- Category set to Diagnostic and optimization
#6 Updated by Sebastien Jacquot about 4 years ago
- Subject changed from Improve partition creation duration process to Improve partition creation duration process (reduce expensive calls of cqpQuery() of cqplib through JNI)
#7 Updated by Matthieu Decorde about 4 years ago
- Tracker changed from Task to Feature
- Subject changed from Improve partition creation duration process (reduce expensive calls of cqpQuery() of cqplib through JNI) to RCP: X.X, Reduce the number of JNI call while creating a Partition
- Description updated (diff)
#8 Updated by Matthieu Decorde about 4 years ago
- Target version changed from TXM 0.8.2 to TXM X.X
#9 Updated by Matthieu Decorde about 4 years ago
- Subject changed from RCP: X.X, Reduce the number of JNI call while creating a Partition to RCP: X.X, Reduce the number of JNI calls while creating a Partition
#10 Updated by Sebastien Jacquot about 4 years ago
- Tracker changed from Feature to Task
- Subject changed from RCP: X.X, Reduce the number of JNI calls while creating a Partition to Reduce the number of JNI call while creating a Partition
- Target version changed from TXM X.X to TXM 0.8.2
#11 Updated by Sebastien Jacquot about 4 years ago
- Tracker changed from Task to Feature
- Target version changed from TXM 0.8.2 to TXM X.X
#12 Updated by Sebastien Jacquot about 4 years ago
- Target version changed from TXM X.X to TXM 0.8.2
#13 Updated by Sebastien Jacquot about 4 years ago
- Subject changed from Reduce the number of JNI call while creating a Partition to Reduce the number of JNI calls while creating a Partition
#14 Updated by Sebastien Jacquot almost 4 years ago
- Target version changed from TXM 0.8.2 to TXM X.X