root / tmp / org.txm.core / src / java / org / txm / scripts / importer / ReunitShortLines.groovy @ 1688
History | View | Annotate | Download (2.3 kB)
1 | 881 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 881 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 881 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 881 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 881 | mdecorde | //
|
6 | 881 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 881 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 881 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 881 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 881 | mdecorde | // later version.
|
11 | 881 | mdecorde | //
|
12 | 881 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 881 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 881 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 881 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 881 | mdecorde | // details.
|
17 | 881 | mdecorde | //
|
18 | 881 | mdecorde | // You should have received a copy of the GNU General
|
19 | 881 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 881 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 881 | mdecorde | //
|
22 | 881 | mdecorde | //
|
23 | 881 | mdecorde | //
|
24 | 881 | mdecorde | // $LastChangedDate:$
|
25 | 881 | mdecorde | // $LastChangedRevision:$
|
26 | 881 | mdecorde | // $LastChangedBy:$
|
27 | 881 | mdecorde | //
|
28 | 1000 | mdecorde | package org.txm.scripts.importer
|
29 | 881 | mdecorde | |
30 | 881 | mdecorde | import java.io.File; |
31 | 881 | mdecorde | |
32 | 881 | mdecorde | // TODO: Auto-generated Javadoc
|
33 | 881 | mdecorde | /**
|
34 | 881 | mdecorde | * The Class ReunitShortLines.
|
35 | 881 | mdecorde | */
|
36 | 881 | mdecorde | class ReunitShortLines { |
37 | 881 | mdecorde | |
38 | 881 | mdecorde | /**
|
39 | 881 | mdecorde | * Instantiates a new reunit short lines.
|
40 | 881 | mdecorde | *
|
41 | 881 | mdecorde | * @param file the file
|
42 | 881 | mdecorde | * @param maxsize the maxsize
|
43 | 881 | mdecorde | * @param encoding the encoding
|
44 | 881 | mdecorde | */
|
45 | 881 | mdecorde | ReunitShortLines(File file, int maxsize, String encoding) |
46 | 881 | mdecorde | { |
47 | 881 | mdecorde | File temp = new File( file.getParentFile(), "reu.txt"); |
48 | 881 | mdecorde | Reader input = new InputStreamReader(new FileInputStream(file) , encoding); |
49 | 881 | mdecorde | OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(temp) , encoding); |
50 | 881 | mdecorde | |
51 | 881 | mdecorde | String line = input.readLine();
|
52 | 881 | mdecorde | boolean oklength = true; |
53 | 881 | mdecorde | boolean previousok = false; |
54 | 881 | mdecorde | while(line != null) |
55 | 881 | mdecorde | { |
56 | 881 | mdecorde | if(line.length() == 0) |
57 | 881 | mdecorde | { |
58 | 881 | mdecorde | writer.write("\n");
|
59 | 881 | mdecorde | oklength = true;
|
60 | 881 | mdecorde | } |
61 | 881 | mdecorde | else if(line.startsWith("**** ")) |
62 | 881 | mdecorde | { |
63 | 881 | mdecorde | writer.write("\n"+line);
|
64 | 881 | mdecorde | oklength = true;
|
65 | 881 | mdecorde | } |
66 | 881 | mdecorde | else
|
67 | 881 | mdecorde | { |
68 | 881 | mdecorde | previousok = oklength; |
69 | 881 | mdecorde | oklength = line.length() > maxsize; |
70 | 881 | mdecorde | |
71 | 881 | mdecorde | if(previousok && oklength)
|
72 | 881 | mdecorde | writer.write("\n");
|
73 | 881 | mdecorde | |
74 | 881 | mdecorde | writer.write(line); |
75 | 881 | mdecorde | |
76 | 881 | mdecorde | if(previousok && ! oklength)
|
77 | 881 | mdecorde | writer.write("\n");
|
78 | 881 | mdecorde | |
79 | 881 | mdecorde | } |
80 | 881 | mdecorde | line = input.readLine(); |
81 | 881 | mdecorde | } |
82 | 881 | mdecorde | } |
83 | 881 | mdecorde | |
84 | 881 | mdecorde | /**
|
85 | 881 | mdecorde | * The main method.
|
86 | 881 | mdecorde | *
|
87 | 881 | mdecorde | * @param args the arguments
|
88 | 881 | mdecorde | */
|
89 | 881 | mdecorde | public static void main(String[] args) |
90 | 881 | mdecorde | { |
91 | 881 | mdecorde | new ReunitShortLines(new File("/home/mdecorde/xml/voeux/Voeux.txt"), 30, "cp1252") |
92 | 881 | mdecorde | } |
93 | 881 | mdecorde | } |