Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / ReunitShortLines.groovy @ 1688

History | View | Annotate | Download (2.3 kB)

1 881 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 881 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 881 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 881 mdecorde
// Sophia Antipolis, University of Paris 3.
5 881 mdecorde
//
6 881 mdecorde
// The TXM platform is free software: you can redistribute it
7 881 mdecorde
// and/or modify it under the terms of the GNU General Public
8 881 mdecorde
// License as published by the Free Software Foundation,
9 881 mdecorde
// either version 2 of the License, or (at your option) any
10 881 mdecorde
// later version.
11 881 mdecorde
//
12 881 mdecorde
// The TXM platform is distributed in the hope that it will be
13 881 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 881 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 881 mdecorde
// PURPOSE. See the GNU General Public License for more
16 881 mdecorde
// details.
17 881 mdecorde
//
18 881 mdecorde
// You should have received a copy of the GNU General
19 881 mdecorde
// Public License along with the TXM platform. If not, see
20 881 mdecorde
// http://www.gnu.org/licenses.
21 881 mdecorde
//
22 881 mdecorde
//
23 881 mdecorde
//
24 881 mdecorde
// $LastChangedDate:$
25 881 mdecorde
// $LastChangedRevision:$
26 881 mdecorde
// $LastChangedBy:$
27 881 mdecorde
//
28 1000 mdecorde
package org.txm.scripts.importer
29 881 mdecorde
30 881 mdecorde
import java.io.File;
31 881 mdecorde
32 881 mdecorde
// TODO: Auto-generated Javadoc
33 881 mdecorde
/**
34 881 mdecorde
 * The Class ReunitShortLines.
35 881 mdecorde
 */
36 881 mdecorde
class ReunitShortLines {
37 881 mdecorde
38 881 mdecorde
        /**
39 881 mdecorde
         * Instantiates a new reunit short lines.
40 881 mdecorde
         *
41 881 mdecorde
         * @param file the file
42 881 mdecorde
         * @param maxsize the maxsize
43 881 mdecorde
         * @param encoding the encoding
44 881 mdecorde
         */
45 881 mdecorde
        ReunitShortLines(File file, int maxsize, String encoding)
46 881 mdecorde
        {
47 881 mdecorde
                File temp = new File( file.getParentFile(), "reu.txt");
48 881 mdecorde
                Reader input = new InputStreamReader(new FileInputStream(file) , encoding);
49 881 mdecorde
                OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(temp) , encoding);
50 881 mdecorde
51 881 mdecorde
                String line = input.readLine();
52 881 mdecorde
                boolean oklength = true;
53 881 mdecorde
                boolean previousok = false;
54 881 mdecorde
                while(line != null)
55 881 mdecorde
                {
56 881 mdecorde
                        if(line.length() == 0)
57 881 mdecorde
                        {
58 881 mdecorde
                                writer.write("\n");
59 881 mdecorde
                                oklength = true;
60 881 mdecorde
                        }
61 881 mdecorde
                        else if(line.startsWith("**** "))
62 881 mdecorde
                        {
63 881 mdecorde
                                writer.write("\n"+line);
64 881 mdecorde
                                oklength = true;
65 881 mdecorde
                        }
66 881 mdecorde
                        else
67 881 mdecorde
                        {
68 881 mdecorde
                        previousok = oklength;
69 881 mdecorde
                        oklength = line.length() > maxsize;
70 881 mdecorde
71 881 mdecorde
                        if(previousok && oklength)
72 881 mdecorde
                                writer.write("\n");
73 881 mdecorde
74 881 mdecorde
                        writer.write(line);
75 881 mdecorde
76 881 mdecorde
                        if(previousok && ! oklength)
77 881 mdecorde
                                writer.write("\n");
78 881 mdecorde
79 881 mdecorde
                        }
80 881 mdecorde
                        line = input.readLine();
81 881 mdecorde
                }
82 881 mdecorde
        }
83 881 mdecorde
84 881 mdecorde
        /**
85 881 mdecorde
         * The main method.
86 881 mdecorde
         *
87 881 mdecorde
         * @param args the arguments
88 881 mdecorde
         */
89 881 mdecorde
        public static void main(String[] args)
90 881 mdecorde
        {
91 881 mdecorde
                new ReunitShortLines(new File("/home/mdecorde/xml/voeux/Voeux.txt"), 30, "cp1252")
92 881 mdecorde
        }
93 881 mdecorde
}