001: /*
002: * Adjusts tabs and spaces.
003: * Copyright (C) 2002-2007 Stephen Ostermiller
004: * http://ostermiller.org/contact.pl?regarding=Java+Utilities
005: *
006: * This program is free software; you can redistribute it and/or modify
007: * it under the terms of the GNU General Public License as published by
008: * the Free Software Foundation; either version 2 of the License, or
009: * (at your option) any later version.
010: *
011: * This program is distributed in the hope that it will be useful,
012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: * GNU General Public License for more details.
015: *
016: * See COPYING.TXT for details.
017: */
018:
019: package com.Ostermiller.util;
020:
021: import java.io.*;
022: import java.text.MessageFormat;
023: import java.util.ResourceBundle;
024: import java.util.Locale;
025:
026: /**
027: * Stream editor to alter the line separators on text to match
028: * that of a given platform.
029: * More information about this class is available from <a target="_top" href=
030: * "http://ostermiller.org/utils/LineEnds.html">ostermiller.org</a>.
031: *
032: * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
033: * @since ostermillerutils 1.00.00
034: */
035: public class Tabs {
036:
037: /**
038: * Version number of this program
039: *
040: * @since ostermillerutils 1.00.00
041: */
042: public static final String version = "1.1";
043:
044: /**
045: * Locale specific strings displayed to the user.
046: *
047: * @since ostermillerutils 1.00.00
048: */
049: protected static ResourceBundle labels = ResourceBundle.getBundle(
050: "com.Ostermiller.util.Tabs", Locale.getDefault());
051:
052: /**
053: * Can be passed instead of a spaces argument to use tabs instead.
054: *
055: * @since ostermillerutils 1.00.00
056: */
057: public final static int TABS = -1;
058:
059: private enum TabsCmdLnOption {
060: /** --help */
061: HELP(new CmdLnOption(labels.getString("help.option"))
062: .setDescription(labels.getString("help.message"))),
063: /** --version */
064: VERSION(new CmdLnOption(labels.getString("version.option"))
065: .setDescription(labels.getString("version.message"))),
066: /** --about */
067: ABOUT(new CmdLnOption(labels.getString("about.option"))
068: .setDescription(labels.getString("about.message"))),
069: /** --width */
070: WIDTH(new CmdLnOption(labels.getString("width.option"), 'w')
071: .setDescription(labels.getString("w.message"))
072: .setRequiredArgument()),
073: /** --guess */
074: GUESS(new CmdLnOption(labels.getString("guess.option"), 'g')
075: .setDescription(labels.getString("g.message") + " ("
076: + labels.getString("default") + ")")),
077: /** --tabs */
078: TABS(new CmdLnOption(labels.getString("tabs.option"), 't')
079: .setDescription(labels.getString("t.message"))),
080: /** --spaces */
081: SPACES(new CmdLnOption(labels.getString("spaces.option"), 's')
082: .setDescription(
083: labels.getString("s.message") + " ("
084: + labels.getString("default") + "=4)")
085: .setRequiredArgument()),
086: /** --force */
087: FORCE(new CmdLnOption(labels.getString("force.option"), 'f')
088: .setDescription(labels.getString("f.message"))),
089: /** --noforce */
090: NOFORCE(new CmdLnOption(labels.getString("noforce.option"))
091: .setDescription(labels.getString("noforce.message")
092: + " (" + labels.getString("default") + ")")),
093: /** --reallyverbose */
094: REALLYVERBOSE(new CmdLnOption(labels
095: .getString("reallyverbose.option"), 'V')
096: .setDescription(labels.getString("V.message"))),
097: /** --verbose */
098: VERBOSE(
099: new CmdLnOption(labels.getString("verbose.option"), 'v')
100: .setDescription(labels.getString("v.message"))),
101: /** --quiet */
102: QUIET(new CmdLnOption(labels.getString("quiet.option"), 'q')
103: .setDescription(labels.getString("q.message") + " ("
104: + labels.getString("default") + ")")),
105: /** --reallyquiet */
106: REALLYQUIET(new CmdLnOption(labels
107: .getString("reallyquiet.option"), 'Q')
108: .setDescription(labels.getString("Q.message")));
109:
110: private CmdLnOption option;
111:
112: private TabsCmdLnOption(CmdLnOption option) {
113: option.setUserObject(this );
114: this .option = option;
115: }
116:
117: private CmdLnOption getCmdLineOption() {
118: return option;
119: }
120: }
121:
122: /**
123: * Converts the tabs in files, or standard input.
124: * Run with --help argument for more information.
125: *
126: * @param args Command line arguments.
127: *
128: * @since ostermillerutils 1.00.00
129: */
130: public static void main(String[] args) {
131: CmdLn commandLine = new CmdLn(args).setDescription(labels
132: .getString("tabs")
133: + labels.getString("purpose.message"));
134: for (TabsCmdLnOption option : TabsCmdLnOption.values()) {
135: commandLine.addOption(option.getCmdLineOption());
136: }
137: int inputTabWidth = TABS;
138: int outputTabWidth = 4;
139: boolean force = false;
140: boolean printMessages = false;
141: boolean printExtraMessages = false;
142: boolean printErrors = true;
143: for (CmdLnResult result : commandLine.getResults()) {
144: switch ((TabsCmdLnOption) result.getOption()
145: .getUserObject()) {
146: case HELP: {
147: commandLine.printHelp();
148: System.exit(0);
149: }
150: break;
151: case VERSION: {
152: // print out the version message
153: System.out.println(MessageFormat.format(labels
154: .getString("version"),
155: (Object[]) new String[] { version }));
156: System.exit(0);
157: }
158: break;
159: case ABOUT: {
160: System.out
161: .println(labels.getString("tabs")
162: + " -- "
163: + labels.getString("purpose.message")
164: + "\n"
165: + MessageFormat
166: .format(
167: labels
168: .getString("copyright"),
169: (Object[]) new String[] {
170: "2002-2007",
171: "Stephen Ostermiller (http://ostermiller.org/contact.pl?regarding=Java+Utilities)" })
172: + "\n\n" + labels.getString("license"));
173: System.exit(0);
174: }
175: break;
176: case WIDTH: {
177: try {
178: inputTabWidth = Integer.parseInt(commandLine
179: .getResult('w').getArgument());
180: } catch (NumberFormatException x) {
181: inputTabWidth = -1;
182: }
183: if (inputTabWidth < 1 || inputTabWidth > 20) {
184: System.err.println(labels.getString("widtherror"));
185: System.exit(1);
186: }
187: }
188: break;
189: case GUESS: {
190: inputTabWidth = TABS;
191: }
192: break;
193: case SPACES: {
194: try {
195: outputTabWidth = Integer.parseInt(commandLine
196: .getResult('s').getArgument());
197: } catch (NumberFormatException x) {
198: outputTabWidth = -1;
199: }
200: if (outputTabWidth < 1 || outputTabWidth > 20) {
201: System.err.println("widtherror");
202: System.exit(1);
203: }
204: }
205: break;
206: case TABS: {
207: outputTabWidth = TABS;
208: }
209: break;
210: case FORCE: {
211: force = true;
212: }
213: break;
214: case NOFORCE: {
215: force = false;
216: }
217: break;
218: case REALLYVERBOSE: {
219: printExtraMessages = true;
220: printMessages = true;
221: printErrors = true;
222: }
223: break;
224: case VERBOSE: {
225: printExtraMessages = false;
226: printMessages = true;
227: printErrors = true;
228: }
229: break;
230: case QUIET: {
231: printExtraMessages = false;
232: printMessages = false;
233: printErrors = true;
234: }
235: break;
236: case REALLYQUIET: {
237: printExtraMessages = false;
238: printMessages = false;
239: printErrors = false;
240: }
241: break;
242: }
243: }
244:
245: int exitCond = 0;
246: boolean done = false;
247: for (String argument : commandLine.getNonOptionArguments()) {
248: done = true;
249: File source = new File(argument);
250: if (!source.exists()) {
251: if (printErrors) {
252: System.err.println(MessageFormat.format(labels
253: .getString("doesnotexist"),
254: (Object[]) new String[] { argument }));
255: }
256: exitCond = 1;
257: } else if (!source.canRead()) {
258: if (printErrors) {
259: System.err.println(MessageFormat.format(labels
260: .getString("cantread"),
261: (Object[]) new String[] { argument }));
262: }
263: exitCond = 1;
264: } else if (!source.canWrite()) {
265: if (printErrors) {
266: System.err.println(MessageFormat.format(labels
267: .getString("cantwrite"),
268: (Object[]) new String[] { argument }));
269: }
270: exitCond = 1;
271: } else {
272: try {
273: if (convert(source, inputTabWidth, outputTabWidth,
274: !force)) {
275: if (printMessages) {
276: System.out
277: .println(MessageFormat
278: .format(
279: labels
280: .getString("modified"),
281: (Object[]) new String[] { argument }));
282: }
283: } else {
284: if (printExtraMessages) {
285: System.out
286: .println(MessageFormat
287: .format(
288: labels
289: .getString("alreadycorrect"),
290: (Object[]) new String[] { argument }));
291: }
292: }
293: } catch (IOException x) {
294: if (printErrors) {
295: System.err.println(argument + ": "
296: + x.getMessage());
297: }
298: exitCond = 1;
299: }
300: }
301: }
302: if (!done) {
303: if (inputTabWidth == TABS) {
304: System.err.println(labels.getString("stdinguess"));
305: exitCond = 1;
306: } else {
307: try {
308: convert(System.in, System.out, inputTabWidth,
309: outputTabWidth, !force);
310: } catch (IOException x) {
311: System.err.println(x.getMessage());
312: exitCond = 1;
313: }
314: }
315: }
316: System.exit(exitCond);
317: }
318:
319: private final static int DEFAULT_INPUT_TAB_WIDTH = 4;
320: private final static int DEFAULT_INPUT_FILE_TAB_WIDTH = TABS;
321: private final static int DEFAULT_OUTPUT_TAB_WIDTH = 4;
322:
323: private final static boolean DEFAULT_MODIFY_BINARY = false;
324:
325: /**
326: * Read form the input stream, changing the tabs at the beginning of each line
327: * to four spaces, write the result to the output stream.
328: *
329: * @param in stream that contains the text which needs line number conversion.
330: * @param out stream where converted text is written.
331: * @return true if the output was modified from the input, false if it is exactly the same
332: * @throws BinaryDataException if non-text data is encountered.
333: * @throws IOException if an input or output error occurs.
334: *
335: * @since ostermillerutils 1.00.00
336: */
337: public static boolean convert(InputStream in, OutputStream out)
338: throws IOException {
339: return convert(in, out, DEFAULT_INPUT_TAB_WIDTH,
340: DEFAULT_OUTPUT_TAB_WIDTH, DEFAULT_MODIFY_BINARY);
341: }
342:
343: /**
344: * Read form the input stream, changing the tabs at the beginning of each line
345: * to the specified number of spaces, write the result to the output stream.
346: *
347: * @param in stream that contains the text which needs line number conversion.
348: * @param out stream where converted text is written.
349: * @param inputTabWidth number of spaces used instead of a tab in the input.
350: * @return true if the output was modified from the input, false if it is exactly the same
351: * @throws BinaryDataException if non-text data is encountered.
352: * @throws IOException if an input or output error occurs.
353: * @throws IllegalArgumentException if tab widths are not between 1 and 20 or TABS.
354: *
355: * @since ostermillerutils 1.00.00
356: */
357: public static boolean convert(InputStream in, OutputStream out,
358: int inputTabWidth) throws IOException {
359: return convert(in, out, inputTabWidth,
360: DEFAULT_OUTPUT_TAB_WIDTH, DEFAULT_MODIFY_BINARY);
361: }
362:
363: /**
364: * Read form the input stream, changing the tabs at the beginning of each line
365: * to the specified number of spaces or the other way around, write the result
366: * to the output stream.
367: *
368: * The current system's line separator is used.
369: *
370: * @param in stream that contains the text which needs line number conversion.
371: * @param out stream where converted text is written.
372: * @param inputTabWidth number of spaces used instead of a tab in the input.
373: * @param outputTabWidth TABS if tabs should be used, otherwise, number of spaces to use.
374: * @return true if the output was modified from the input, false if it is exactly the same
375: * @throws BinaryDataException if non-text data is encountered.
376: * @throws IOException if an input or output error occurs.
377: *
378: * @since ostermillerutils 1.00.00
379: */
380: public static boolean convert(InputStream in, OutputStream out,
381: int inputTabWidth, int outputTabWidth) throws IOException {
382: return convert(in, out, inputTabWidth, outputTabWidth,
383: DEFAULT_MODIFY_BINARY);
384: }
385:
386: /**
387: * Read form the input stream, changing the tabs at the beginning of each line
388: * to the specified number of spaces or the other way around, write the result
389: * to the output stream.
390: *
391: * The current system's line separator is used.
392: *
393: * @param in stream that contains the text which needs line number conversion.
394: * @param out stream where converted text is written.
395: * @param inputTabWidth number of spaces used instead of a tab in the input.
396: * @param outputTabWidth TABS if tabs should be used, otherwise, number of spaces to use.
397: * @param binaryException throw an exception and abort the operation if binary data is encountered and binaryExcepion is false.
398: * @return true if the output was modified from the input, false if it is exactly the same.
399: * @throws BinaryDataException if non-text data is encountered.
400: * @throws IOException if an input or output error occurs.
401: *
402: * @since ostermillerutils 1.00.00
403: */
404: public static boolean convert(InputStream in, OutputStream out,
405: int inputTabWidth, int outputTabWidth,
406: boolean binaryException) throws IOException {
407: if ((inputTabWidth < 1 || inputTabWidth > 20)
408: && inputTabWidth != TABS) {
409: throw new IllegalArgumentException(labels
410: .getString("widtherror"));
411: }
412: if ((outputTabWidth < 1 || outputTabWidth > 20)
413: && outputTabWidth != TABS) {
414: throw new IllegalArgumentException(labels
415: .getString("widtherror"));
416: }
417: int state = STATE_INIT;
418: int spaces = 0;
419: int tabs = 0;
420: int tabStops = 0;
421: int extraSpaces = 0;
422: boolean modified = false;
423:
424: byte[] buffer = new byte[BUFFER_SIZE];
425: int read;
426: while ((read = in.read(buffer)) != -1) {
427: for (int i = 0; i < read; i++) {
428: byte b = buffer[i];
429: if (binaryException && b != '\r' && b != '\n'
430: && b != '\t' && b != '\f' && (b & 0xff) < 32) {
431: throw new BinaryDataException(labels
432: .getString("binaryexcepion"));
433: }
434: switch (b) {
435: case ' ': {
436: if (state == STATE_INIT) {
437: spaces++;
438: extraSpaces++;
439: if (extraSpaces == inputTabWidth) {
440: tabStops++;
441: extraSpaces = 0;
442: }
443: } else {
444: out.write(b);
445: }
446: }
447: break;
448: case '\t': {
449: if (state == STATE_INIT) {
450: if (spaces > 0) {
451: // put tabs before spaces
452: modified = true;
453: }
454: tabs++;
455: tabStops++;
456: extraSpaces = 0;
457: } else {
458: out.write(b);
459: }
460: }
461: break;
462: case '\r':
463: case '\n': {
464: out.write(b);
465: spaces = 0;
466: tabs = 0;
467: tabStops = 0;
468: extraSpaces = 0;
469: state = STATE_INIT;
470: }
471: break;
472: default: {
473: if (state == STATE_INIT) {
474: if (outputTabWidth == TABS) {
475: for (int j = 0; j < tabStops; j++) {
476: out.write((byte) '\t');
477: }
478: } else {
479: extraSpaces += tabStops * outputTabWidth;
480: tabStops = 0;
481: }
482: for (int j = 0; j < extraSpaces; j++) {
483: out.write((byte) ' ');
484: }
485: if (extraSpaces != spaces || tabStops != tabs)
486: modified = true;
487: }
488: out.write(b);
489: state = STATE_SOMETHING;
490: }
491: break;
492: }
493: }
494: }
495: return modified;
496: }
497:
498: /**
499: * Change the tabs at the beginning of each line of the file to four spaces.
500: * Guess the tab width of the input file.
501: *
502: * @param f File to be converted.
503: * @return true if the file was modified, false if it was already in the correct format
504: * @throws BinaryDataException if non-text data is encountered.
505: * @throws IOException if an input or output error occurs.
506: *
507: * @since ostermillerutils 1.00.00
508: */
509: public static boolean convert(File f) throws IOException {
510: return convert(f, DEFAULT_INPUT_FILE_TAB_WIDTH,
511: DEFAULT_OUTPUT_TAB_WIDTH, DEFAULT_MODIFY_BINARY);
512: }
513:
514: /**
515: * Change the tabs at the beginning of each line of the file
516: * to the specified number of spaces.
517: *
518: * @param f File to be converted.
519: * @param inputTabWidth number of spaces used instead of a tab in the input, or TAB to guess.
520: * @return true if the output was modified from the input, false if it is exactly the same
521: * @throws BinaryDataException if non-text data is encountered.
522: * @throws IOException if an input or output error occurs.
523: * @throws IllegalArgumentException if tab widths are not between 1 and 20 or TABS.
524: *
525: * @since ostermillerutils 1.00.00
526: */
527: public static boolean convert(File f, int inputTabWidth)
528: throws IOException {
529: return convert(f, inputTabWidth, DEFAULT_OUTPUT_TAB_WIDTH,
530: DEFAULT_MODIFY_BINARY);
531: }
532:
533: /**
534: * Change the tabs at the beginning of each line of the file
535: * to the specified number of spaces or the other way around.
536: *
537: * @param f File to be converted.
538: * @param inputTabWidth number of spaces used instead of a tab in the input, or TAB to guess.
539: * @param outputTabWidth true if tabs should be used, false if spaces should be used.
540: * @return true if the output was modified from the input, false if it is exactly the same
541: * @throws BinaryDataException if non-text data is encountered.
542: * @throws IOException if an input or output error occurs.
543: *
544: * @since ostermillerutils 1.00.00
545: */
546: public static boolean convert(File f, int inputTabWidth,
547: int outputTabWidth) throws IOException {
548: return convert(f, inputTabWidth, outputTabWidth,
549: DEFAULT_MODIFY_BINARY);
550: }
551:
552: /**
553: * Change the tabs at the beginning of each line of the file
554: * to the specified number of spaces or the other way around.
555: *
556: * @param f File to be converted.
557: * @param inputTabWidth number of spaces used instead of a tab in the input, or TABS to guess.
558: * @param outputTabWidth true if tabs should be used, false if spaces should be used.
559: * @param binaryException throw an exception and abort the operation if binary data is encountered and binaryExcepion is false.
560: * @return true if the file was modified, false if it was already in the correct format
561: * @throws BinaryDataException if non-text data is encountered.
562: * @throws IOException if an input or output error occurs.
563: * @throws IllegalArgumentException if tab widths are not between 1 and 20 or TABS.
564: *
565: * @since ostermillerutils 1.00.00
566: */
567: public static boolean convert(File f, int inputTabWidth,
568: int outputTabWidth, boolean binaryException)
569: throws IOException {
570: File temp = null;
571: InputStream in = null;
572: OutputStream out = null;
573: boolean modified = false;
574: try {
575: if (inputTabWidth == TABS) {
576: inputTabWidth = guessTabWidth(new FileInputStream(f));
577: }
578: in = new FileInputStream(f);
579: temp = File.createTempFile("LineEnds", null, null);
580: out = new FileOutputStream(temp);
581: modified = convert(in, out, inputTabWidth, outputTabWidth,
582: binaryException);
583: in.close();
584: in = null;
585: out.flush();
586: out.close();
587: out = null;
588: if (modified) {
589: FileHelper.move(temp, f, true);
590: } else {
591: if (!temp.delete()) {
592: throw new IOException(
593: MessageFormat.format(labels
594: .getString("tempdeleteerror"),
595: (Object[]) new String[] { temp
596: .toString() }));
597: }
598: }
599: } finally {
600: if (in != null) {
601: in.close();
602: in = null;
603: }
604: if (out != null) {
605: out.flush();
606: out.close();
607: out = null;
608: }
609: }
610: return modified;
611: }
612:
613: /**
614: * Buffer size when reading from input stream.
615: *
616: * @since ostermillerutils 1.00.00
617: */
618: private final static int BUFFER_SIZE = 1024;
619: private final static int STATE_INIT = 0;
620: private final static int STATE_SOMETHING = 1;
621:
622: final private static int MAX_SPACES = 128;
623: final private static int MAX_TABS = 16;
624: final private static int MAX_COMBINED = 256;
625:
626: /**
627: * Guess the number of spaces per tab at the beginning of each line.
628: *
629: * @param in Input stream for which to guess tab width
630: * @return the least value (two or greater) which has some line that starts with n times spaces for n zero to max spaces starting a line.
631: * @throws IOException if an input or output error occurs.
632: *
633: * @since ostermillerutils 1.00.00
634: */
635: public static int guessTabWidth(InputStream in) throws IOException {
636: byte[] buffer = new byte[BUFFER_SIZE];
637: int[][] data = new int[MAX_SPACES][MAX_TABS];
638: int[] spaceData = new int[MAX_SPACES * MAX_TABS];
639: int read;
640: int state = STATE_INIT;
641: int tabs = 0;
642: int spaces = 0;
643: int mostTabs = 0;
644: int mostSpaces = 0;
645: while ((read = in.read(buffer)) != -1) {
646: for (int i = 0; i < read; i++) {
647: byte b = buffer[i];
648: switch (b) {
649: case ' ': {
650: if (state == STATE_INIT)
651: spaces++;
652: }
653: break;
654: case '\t': {
655: if (state == STATE_INIT)
656: tabs++;
657: }
658: break;
659: case '\r':
660: case '\n': {
661: state = STATE_INIT;
662: if (spaces < MAX_SPACES && tabs < MAX_TABS) {
663: data[spaces][tabs]++;
664: if (tabs > mostTabs)
665: mostTabs = tabs;
666: if (spaces > mostSpaces)
667: mostSpaces = spaces;
668: spaces = 0;
669: tabs = 0;
670: }
671: }
672: break;
673: default: {
674: state = STATE_SOMETHING;
675: }
676: break;
677: }
678: }
679: }
680: for (int tabWidth = 2; tabWidth <= 20; tabWidth++) {
681: int mostCombined = 0;
682: for (int tabInd = 0; tabInd <= mostTabs; tabInd++) {
683: for (int spaceInd = 0; spaceInd <= mostSpaces; spaceInd++) {
684: int totInd = spaceInd + (tabInd * tabWidth);
685: if (totInd < MAX_COMBINED) {
686: int numLines = data[spaceInd][tabInd];
687: if (numLines > 0) {
688: if (mostCombined < totInd)
689: mostCombined = totInd;
690: spaceData[totInd] += numLines;
691: }
692: }
693: }
694: }
695: boolean found = true;
696: for (int combInd = 0; found && combInd < mostCombined; combInd += tabWidth) {
697: found = spaceData[combInd] > 0;
698: }
699: if (found)
700: return tabWidth;
701: }
702: return 2;
703: }
704: }
|