Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / office / MergeExcelDirMacro.groovy @ 2046

History | View | Annotate | Download (4.7 kB)

1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// @author mdecorde
3
// @author sheiden
4

    
5
package org.txm.macro.office
6

    
7
import org.kohsuke.args4j.*
8
import groovy.transform.Field
9
import java.nio.charset.Charset
10
import org.txm.rcpapplication.swt.widget.parameters.*
11
import org.txm.utils.*
12
import javax.xml.stream.*
13
import java.net.URL
14

    
15
import org.apache.poi.ss.usermodel.*
16
import org.apache.poi.hssf.usermodel.*
17
import org.apache.poi.xssf.usermodel.*
18
import org.apache.poi.ss.util.*
19

    
20
@Field @Option(name="inputDirectory", usage="répertoire des fichiers Excel à fusionner", widget="Folder", required=true, def="")
21
def inputDirectory
22

    
23
@Field @Option(name="outputFile", usage="output file", widget="FileSave", required=true, def="")
24
def outputFile
25

    
26
@Field @Option(name="sheetName", usage="sheet name (if no name is given the first sheet will be used)", widget="String", required=false, def="")
27
def sheetName
28

    
29
@Field @Option(name="columnList", usage="list of columns to extract, separated by comma", widget="String", required=false, def="")
30
def columnList
31

    
32
@Field @Option(name="multipleValues", usage="list of columns with multiple values, separated by comma", widget="String", required=false, def="")
33
def multipleValues
34

    
35
@Field @Option(name="normalizeIdentifiers", usage="normalize column names to simple lower case letters", widget="Boolean", required=true, def="true")
36
def normalizeIdentifiers
37

    
38
def stringToIdent = { str -> org.txm.utils.AsciiUtils.buildAttributeId(org.txm.utils.AsciiUtils.convertNonAscii(str)).toLowerCase() }
39

    
40
if (!ParametersDialog.open(this)) return false
41

    
42
if (!inputDirectory.exists()) {
43
        println "** $scriptName: no '"+inputDirectory.name+"' directory found. Aborting."
44
        return false
45
}
46

    
47
if (!inputDirectory.canRead()) {
48
        println "** $scriptName: '"+inputDirectory.name+"' directory not readable. Aborting."
49
        return false
50
}
51

    
52
def aborted = false
53

    
54
def scriptName = this.class.getSimpleName()
55

    
56
columnList = columnList.split(",") as List
57

    
58
println "columnList = "+columnList
59

    
60
multipleValues = multipleValues.split(",") as List
61

    
62
// create output file
63
out = new FileOutputStream(outputFile)
64
HSSFWorkbook wb = new HSSFWorkbook()
65
finalSheet = wb.createSheet()
66

    
67
// create header line
68
nFinalRows = 1
69
r = finalSheet.createRow(0)
70
columnList.eachWithIndex { columnName, i ->
71
        c = r.createCell(i)
72
        c.setCellValue(normalizeIdentifiers?stringToIdent(columnName):columnName)
73
}
74

    
75
def f = []
76

    
77
inputDirectory.eachFileMatch(~/.*\.xlsx/) { f << it }
78

    
79
if (f.size() == 0) {
80
        println "** $scriptName: no .xlsx file found. Aborting."
81
        return false
82
}
83

    
84
debug = false
85
//ConsoleProgressBar cpb = new ConsoleProgressBar(f.size())
86

    
87
try {
88

    
89
        // for each .xlsx input file (lexicographic order)
90
        f.sort { it.name }.each { inputFile ->
91

    
92
                if (aborted) return false
93

    
94
                if (debug) println "Processing "+inputFile+"."
95
                //cpb.tick()
96
                rownum = 0
97

    
98
                // open input sheet
99
                wb2 = WorkbookFactory.create(inputFile)
100
                ws = null
101
                if (sheetName.length() == 0) {
102
                        ws = wb2.getSheetAt(0)
103
                } else {
104
                        ws = wb2.getSheet(sheetName)
105
                        if (ws == null) {
106
                                println "** $scriptName: no '"+sheetName+" found. Aborting."
107
                                aborted = true
108
                                return false
109
                        }
110
                }
111

    
112
                if (ws == null) {
113
                        println "** $scriptName: no sheet found. Aborting."
114
                        aborted = true
115
                        return false
116
                }
117

    
118
                nRows = ws.getPhysicalNumberOfRows()
119
                if (debug) println nRows+" rows."
120

    
121
                // get columns positions
122
                firstRow = ws.getRow(0)
123
                nCols = firstRow.getPhysicalNumberOfCells()
124
                def colNames = []
125
                def cellIndexes = []
126
                def multipleValuesCols = []
127
                0.upto(nCols-1, { iCol ->
128
                        value = firstRow.getCell(iCol).getStringCellValue()
129
                        if (columnList.contains(value)) {
130
                                colNames << value
131
                                cellIndexes << iCol
132
                        }
133
                        if (multipleValues.contains(value)) {
134
                                multipleValuesCols << iCol
135
                        }
136
                })
137

    
138
                if (cellIndexes.size() != columnList.size()) {
139
                        columnList.removeAll(colNames)
140
                        println "** $scriptName: some columns missing in file $inputFile: "+columnList.join(", ")+". Aborting."
141

    
142
                        aborted = true
143
                        return false
144
                }
145

    
146
                // sort columns indexes by columnList parameter order
147
                cellIndexes.sort { columnList.indexOf(colNames[cellIndexes.indexOf(it)]) }
148

    
149
                // copy column values
150
                1.upto(nRows-1, { iRow ->
151
                        row = ws.getRow(iRow)
152
                        r = finalSheet.createRow(nFinalRows)
153
                        
154
                        nCellCreated = 0
155
                        cellIndexes.each { iCol ->
156
                                value = row.getCell(iCol).getStringCellValue()
157
                                c = r.createCell(nCellCreated)
158
                                if (iCol in multipleValuesCols && value.size() > 0) {
159
                                        (lstr = value.split(";")*.trim() as List).removeAll { it.isEmpty() }
160
                                        c.setCellValue("|"+lstr.join("|")+"|")
161
                                } else {
162
                                        c.setCellValue(value)
163
                                }
164
                                nCellCreated++
165
                        }
166
                        
167
                        nFinalRows++
168
                })
169
        }
170
} catch(Exception e) {
171
        aborted = true
172
        return false
173
}
174
//cpb.done()
175

    
176
/* write sheet */
177

    
178
wb.write(out)
179
out.close()
180

    
181
if (aborted) return false
182

    
183
println nFinalRows+" rows written."
184

    
185
return true
186