72 |
72 |
if (headers == null) return false;
|
73 |
73 |
int colMax = headers.getLastCellNum();
|
74 |
74 |
|
75 |
|
ArrayList<Integer> columnIdxToTest = new ArrayList<Integer>();
|
76 |
|
ArrayList<String> columnsTest = new ArrayList<String>();
|
|
75 |
ArrayList<Integer> columnIdxToTest = new ArrayList<>();
|
|
76 |
ArrayList<String> columnsTest = new ArrayList<>();
|
77 |
77 |
for (int colIndex = 0; colIndex < colMax; colIndex++) {
|
78 |
78 |
Cell cell = headers.getCell(colIndex);
|
79 |
79 |
if (cell != null) {
|
... | ... | |
90 |
90 |
return false;
|
91 |
91 |
}
|
92 |
92 |
|
93 |
|
ArrayList<Integer> removed = new ArrayList<Integer>();
|
|
93 |
ArrayList<Integer> removed = new ArrayList<>();
|
94 |
94 |
for (int rowIndex = 0; rowIndex < nRows; rowIndex++) {
|
95 |
95 |
Row row = ws.getRow(rowIndex);
|
96 |
96 |
if (row == null) continue;
|
... | ... | |
155 |
155 |
|
156 |
156 |
int colMax = headers.getLastCellNum();
|
157 |
157 |
|
158 |
|
ArrayList<Integer> columnIdxToTest = new ArrayList<Integer>();
|
159 |
|
ArrayList<Integer> columnIdxToWrite = new ArrayList<Integer>();
|
160 |
|
ArrayList<String> columns = new ArrayList<String>();
|
161 |
|
ArrayList<String> columnsTest = new ArrayList<String>();
|
|
158 |
ArrayList<Integer> columnIdxToTest = new ArrayList<>();
|
|
159 |
ArrayList<Integer> columnIdxToWrite = new ArrayList<>();
|
|
160 |
ArrayList<String> columns = new ArrayList<>();
|
|
161 |
ArrayList<String> columnsTest = new ArrayList<>();
|
162 |
162 |
for (int colIndex = 0; colIndex < colMax; colIndex++) {
|
163 |
163 |
Cell cell = headers.getCell(colIndex);
|
164 |
164 |
if (cell != null) {
|
... | ... | |
182 |
182 |
|
183 |
183 |
if (columnIdxToWrite.size() != columnsSelection.size()) {
|
184 |
184 |
System.out.println("Error: not all columns found=" + columnIdxToWrite + " of " + columnsSelection);
|
185 |
|
ArrayList<String> notFound = new ArrayList<String>(columnsSelection);
|
|
185 |
ArrayList<String> notFound = new ArrayList<>(columnsSelection);
|
186 |
186 |
notFound.removeAll(columns);
|
187 |
187 |
System.out.println("NOT FOUND: " + notFound);
|
188 |
188 |
return false;
|
... | ... | |
307 |
307 |
}
|
308 |
308 |
|
309 |
309 |
protected void _getRecord() {
|
310 |
|
record = new HashMap<String, String>();
|
|
310 |
record = new HashMap<>();
|
311 |
311 |
Row row = ws.getRow(iRow);
|
312 |
312 |
|
313 |
313 |
for (int colIndex = 0; colIndex < header.length; colIndex++) {
|
... | ... | |
316 |
316 |
if (cell != null) {
|
317 |
317 |
String value = cellToString(cell).trim();
|
318 |
318 |
if (value == null) {
|
319 |
|
record.put(header[colIndex], "");
|
|
319 |
record.put(col, "");
|
320 |
320 |
}
|
321 |
321 |
else {
|
322 |
|
record.put(header[colIndex], value);
|
|
322 |
record.put(col, value);
|
323 |
323 |
}
|
324 |
324 |
}
|
325 |
325 |
else {
|
326 |
|
record.put(header[colIndex], "");
|
|
326 |
record.put(col, "");
|
327 |
327 |
}
|
328 |
328 |
}
|
329 |
329 |
|
... | ... | |
343 |
343 |
*/
|
344 |
344 |
public static ArrayList<ArrayList<String>> toTable(File inputFile, String sheetName) {
|
345 |
345 |
|
346 |
|
ArrayList<ArrayList<String>> data = new ArrayList<ArrayList<String>>();
|
|
346 |
ArrayList<ArrayList<String>> data = new ArrayList<>();
|
347 |
347 |
|
348 |
348 |
if (!inputFile.canRead()) {
|
349 |
349 |
System.out.println("** Excel2XML: '" + inputFile.getName() + "' file not readable. Aborting.");
|
... | ... | |
374 |
374 |
Row firstRow = ws.getRow(0);
|
375 |
375 |
int colMax = firstRow.getLastCellNum();
|
376 |
376 |
|
377 |
|
ArrayList<String> headers = new ArrayList<String>();
|
|
377 |
ArrayList<String> headers = new ArrayList<>();
|
378 |
378 |
for (int it = 0; it < colMax; it++) {
|
379 |
379 |
headers.add(firstRow.getCell(it).getStringCellValue());
|
380 |
380 |
}
|
381 |
381 |
|
382 |
382 |
for (int rowIndex = 0; rowIndex < nRows; rowIndex++) {
|
383 |
383 |
Row row = ws.getRow(rowIndex);
|
384 |
|
ArrayList<String> dataLine = new ArrayList<String>();
|
|
384 |
ArrayList<String> dataLine = new ArrayList<>();
|
385 |
385 |
data.add(dataLine);
|
386 |
386 |
for (int colIndex = 0; colIndex < colMax; colIndex++) {
|
387 |
387 |
Cell cell = row.getCell(colIndex);
|
... | ... | |
438 |
438 |
}
|
439 |
439 |
}
|
440 |
440 |
|
|
441 |
|
441 |
442 |
public static void main(String[] args) throws Exception {
|
|
443 |
File tableFile = new File("/home/mdecorde/Téléchargements/Requêtes_SVO-SOV.xlsx");
|
|
444 |
// ReadExcel excel = new ReadExcel(tableFile, null);
|
|
445 |
ArrayList<ArrayList<String>> table = ReadExcel.toTable(tableFile, null);
|
|
446 |
for (ArrayList<String> line : table) {
|
|
447 |
System.out.println(line);
|
|
448 |
}
|
|
449 |
}
|
|
450 |
|
|
451 |
public static void mainAF(String[] args) throws Exception {
|
442 |
452 |
// ArrayList<ArrayList<String>> data = toTable(new File("/home/mdecorde/xml/ruscorpora1m-test/metadata.xlsx"), null);
|
443 |
453 |
// if (data.size() == 0) {
|
444 |
454 |
// System.out.println("no data.");
|
... | ... | |
452 |
462 |
System.out.println("open...");
|
453 |
463 |
ReadExcel excel = new ReadExcel(tableFile, null);
|
454 |
464 |
|
455 |
|
HashMap<String, String> lineRules = new HashMap<String, String>(); // line tests to select line to keep
|
|
465 |
HashMap<String, String> lineRules = new HashMap<>(); // line tests to select line to keep
|
456 |
466 |
List<String> columnsSelection; // list of columns to keep
|
457 |
|
HashMap<String, String> columnsToCopy = new HashMap<String, String>();
|
458 |
|
HashMap<String, String> columnsToRenameRules = new HashMap<String, String>();
|
459 |
|
HashMap<String, String[]> searchAndReplaceRules = new HashMap<String, String[]>();
|
|
467 |
HashMap<String, String> columnsToCopy = new HashMap<>();
|
|
468 |
HashMap<String, String> columnsToRenameRules = new HashMap<>();
|
|
469 |
HashMap<String, String[]> searchAndReplaceRules = new HashMap<>();
|
460 |
470 |
// //emissions
|
461 |
|
// table2File = new File("/home/mdecorde/TEMP/ANTRACT/AF/emissions.xlsx");
|
462 |
|
// columnsSelection = Arrays.asList(
|
463 |
|
// "Identifiant de la notice", "Titre propre", "Notes du titre", "Date de diffusion", "Durée", "Nom fichier segmenté (info)", "antract_video",
|
464 |
|
// "antract_debut","antract_fin","antract_duree","antract_tc_type","antract_tc_date");
|
465 |
|
// lineRules.put("Type de notice", "Notice sommaire");
|
466 |
|
// columnsToRenameRules.put("Identifiant de la notice", "id");
|
467 |
|
//
|
468 |
|
// columnsToCopy.put("Notes du titre", "subtitle"); // not working yet
|
469 |
|
// columnsToCopy.put("Titre propre", "title"); // not working yet
|
470 |
|
// columnsToCopy.put("Date de diffusion", "textorder"); // not working yet
|
471 |
|
// searchAndReplaceRules.put("textorder", new String[] {"../../....", "$3$2$1"}); // not working yet
|
|
471 |
// table2File = new File("/home/mdecorde/TEMP/ANTRACT/AF/emissions.xlsx");
|
|
472 |
// columnsSelection = Arrays.asList(
|
|
473 |
// "Identifiant de la notice", "Titre propre", "Notes du titre", "Date de diffusion", "Durée", "Nom fichier segmenté (info)", "antract_video",
|
|
474 |
// "antract_debut","antract_fin","antract_duree","antract_tc_type","antract_tc_date");
|
|
475 |
// lineRules.put("Type de notice", "Notice sommaire");
|
|
476 |
// columnsToRenameRules.put("Identifiant de la notice", "id");
|
|
477 |
//
|
|
478 |
// columnsToCopy.put("Notes du titre", "subtitle"); // not working yet
|
|
479 |
// columnsToCopy.put("Titre propre", "title"); // not working yet
|
|
480 |
// columnsToCopy.put("Date de diffusion", "textorder"); // not working yet
|
|
481 |
// searchAndReplaceRules.put("textorder", new String[] {"../../....", "$3$2$1"}); // not working yet
|
472 |
482 |
|
473 |
483 |
// sujets
|
474 |
484 |
table2File = new File("/home/mdecorde/TEMP/ANTRACT/AF/sujets.xlsx");
|
... | ... | |
487 |
497 |
}
|
488 |
498 |
|
489 |
499 |
System.out.println("copying column: " + columnsToCopy.size());
|
490 |
|
//excel2.copyColumns(columnsToCopy);
|
|
500 |
// excel2.copyColumns(columnsToCopy);
|
491 |
501 |
|
492 |
502 |
System.out.println("search&replace column: " + searchAndReplaceRules.size());
|
493 |
|
//excel2.searchAndReplaceInLines(searchAndReplaceRules);
|
|
503 |
// excel2.searchAndReplaceInLines(searchAndReplaceRules);
|
494 |
504 |
|
495 |
505 |
System.out.println("renaming column: " + columnsToRenameRules.size());
|
496 |
506 |
excel2.renameColumns(columnsToRenameRules);
|