001: /*
002: * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
003: * (license2)
004: * Initial Developer: H2 Group
005: */
006: package org.h2.tools.i18n;
007:
008: import java.io.File;
009: import java.io.FileInputStream;
010: import java.io.FileOutputStream;
011: import java.io.FileReader;
012: import java.io.FileWriter;
013: import java.io.IOException;
014: import java.io.InputStreamReader;
015: import java.io.OutputStream;
016: import java.io.OutputStreamWriter;
017: import java.net.HttpURLConnection;
018: import java.net.URL;
019: import java.net.URLEncoder;
020: import java.util.ArrayList;
021: import java.util.HashMap;
022: import java.util.HashSet;
023: import java.util.Iterator;
024: import java.util.Map;
025: import java.util.Properties;
026: import java.util.Set;
027: import java.util.Stack;
028: import java.util.Map.Entry;
029:
030: import org.h2.server.web.PageParser;
031: import org.h2.tools.doc.XMLParser;
032: import org.h2.util.FileUtils;
033: import org.h2.util.IOUtils;
034: import org.h2.util.SortedProperties;
035: import org.h2.util.StringUtils;
036:
037: /**
038: * This class updates the translation source code files by parsing
039: * the HTML documentation. It also generates the translated HTML
040: * documentation.
041: */
042: public class PrepareTranslation {
043: private static final String MAIN_LANGUAGE = "en";
044: private static final String DELETED_PREFIX = "~";
045: private static final boolean AUTO_TRANSLATE = false;
046: private static final String[] EXCLUDE = { "datatypes.html",
047: "functions.html", "grammar.html" };
048:
049: public static void main(String[] args) throws Exception {
050: new PrepareTranslation().run(args);
051: }
052:
053: private void run(String[] args) throws Exception {
054: String baseDir = "src/docsrc/textbase";
055: prepare(baseDir, "src/main/org/h2/res");
056: prepare(baseDir, "src/main/org/h2/server/web/res");
057:
058: // convert the txt files to properties files
059: PropertiesToUTF8.textUTF8ToProperties(
060: "src/docsrc/text/_docs_de.utf8.txt",
061: "src/docsrc/text/_docs_de.properties");
062: PropertiesToUTF8.textUTF8ToProperties(
063: "src/docsrc/text/_docs_ja.utf8.txt",
064: "src/docsrc/text/_docs_ja.properties");
065:
066: // create the .jsp files and extract the text in the main language
067: extractFromHtml("src/docsrc/html", "src/docsrc/text",
068: MAIN_LANGUAGE);
069:
070: // add missing translations and create a new baseline
071: prepare(baseDir, "src/docsrc/text");
072:
073: // create the translated documentation
074: buildHtml("src/docsrc/text", "docs/html", "en");
075: // buildHtml("src/docsrc/text", "docs/html", "de");
076: buildHtml("src/docsrc/text", "docs/html", "ja");
077:
078: // convert the properties files back to utf8 text files, including the
079: // main language (to be used as a template)
080: PropertiesToUTF8.propertiesToTextUTF8(
081: "src/docsrc/text/_docs_en.properties",
082: "src/docsrc/text/_docs_en.utf8.txt");
083: PropertiesToUTF8.propertiesToTextUTF8(
084: "src/docsrc/text/_docs_de.properties",
085: "src/docsrc/text/_docs_de.utf8.txt");
086: PropertiesToUTF8.propertiesToTextUTF8(
087: "src/docsrc/text/_docs_ja.properties",
088: "src/docsrc/text/_docs_ja.utf8.txt");
089:
090: // delete temporary files
091: File[] list = new File("src/docsrc/text").listFiles();
092: for (int i = 0; i < list.length; i++) {
093: if (!list[i].getName().endsWith(".utf8.txt")) {
094: list[i].delete();
095: }
096: }
097: }
098:
099: private static void buildHtml(String templateDir, String targetDir,
100: String language) throws IOException {
101: File[] list = new File(templateDir).listFiles();
102: new File(targetDir).mkdirs();
103: // load the main 'translation'
104: String propName = templateDir + "/_docs_" + MAIN_LANGUAGE
105: + ".properties";
106: Properties prop = FileUtils.loadProperties(propName);
107: propName = templateDir + "/_docs_" + language + ".properties";
108: if (!(new File(propName)).exists()) {
109: throw new IOException("Translation not found: " + propName);
110: }
111: Properties transProp = FileUtils.loadProperties(propName);
112: for (Iterator it = transProp.keySet().iterator(); it.hasNext();) {
113: String key = (String) it.next();
114: String t = transProp.getProperty(key);
115: // overload with translations, but not the ones starting with #
116: if (t.startsWith("##")) {
117: prop.put(key, t.substring(2));
118: } else if (!t.startsWith("#")) {
119: prop.put(key, t);
120: }
121: }
122: // add spaces to each token
123: for (Iterator it = prop.keySet().iterator(); it.hasNext();) {
124: String key = (String) it.next();
125: String t = prop.getProperty(key);
126: prop.put(key, " " + t + " ");
127: }
128:
129: ArrayList fileNames = new ArrayList();
130: for (int i = 0; i < list.length; i++) {
131: String name = list[i].getName();
132: if (!name.endsWith(".jsp")) {
133: continue;
134: }
135: // remove '.jsp'
136: name = name.substring(0, name.length() - 4);
137: fileNames.add(name);
138: }
139: for (int i = 0; i < list.length; i++) {
140: String name = list[i].getName();
141: if (!name.endsWith(".jsp")) {
142: continue;
143: }
144: // remove '.jsp'
145: name = name.substring(0, name.length() - 4);
146: String template = IOUtils.readStringAndClose(
147: new FileReader(templateDir + "/" + name + ".jsp"),
148: -1);
149: String html = PageParser.parse(null, template, prop);
150: html = StringUtils
151: .replaceAll(html, "lang=\"" + MAIN_LANGUAGE + "\"",
152: "lang=\"" + language + "\"");
153: for (int j = 0; j < fileNames.size(); j++) {
154: String n = (String) fileNames.get(j);
155: if ("frame".equals(n)) {
156: // don't translate 'frame.html' to 'frame_ja.html',
157: // otherwise we can't switch back to English
158: continue;
159: }
160: html = StringUtils.replaceAll(html, n + ".html\"", n
161: + "_" + language + ".html\"");
162: }
163: html = StringUtils.replaceAll(html, "_" + MAIN_LANGUAGE
164: + ".html\"", ".html\"");
165: String target;
166: if (language.equals(MAIN_LANGUAGE)) {
167: target = targetDir + "/" + name + ".html";
168: } else {
169: target = targetDir + "/" + name + "_" + language
170: + ".html";
171: }
172: OutputStream out = new FileOutputStream(target);
173: OutputStreamWriter writer = new OutputStreamWriter(out,
174: "UTF-8");
175: writer.write(html);
176: writer.close();
177: }
178: }
179:
180: private static boolean exclude(String fileName) {
181: for (int i = 0; i < EXCLUDE.length; i++) {
182: if (fileName.endsWith(EXCLUDE[i])) {
183: return true;
184: }
185: }
186: return false;
187: }
188:
189: private static void extractFromHtml(String dir, String target,
190: String language) throws Exception {
191: File[] list = new File(dir).listFiles();
192: for (int i = 0; i < list.length; i++) {
193: File f = list[i];
194: String name = f.getName();
195: if (!name.endsWith(".html")) {
196: continue;
197: }
198: if (exclude(name)) {
199: continue;
200: }
201: // remove '.html'
202: name = name.substring(0, name.length() - 5);
203: if (name.indexOf('_') >= 0) {
204: // ignore translated files
205: continue;
206: }
207: String template = extract(name, f, target);
208: FileWriter writer = new FileWriter(target + "/" + name
209: + ".jsp");
210: writer.write(template);
211: writer.close();
212: }
213: }
214:
215: // private static boolean isText(String s) {
216: // if (s.length() < 2) {
217: // return false;
218: // }
219: // for (int i = 0; i < s.length(); i++) {
220: // char c = s.charAt(i);
221: // if (!Character.isDigit(c) && c != '.' && c != '-' && c != '+') {
222: // return true;
223: // }
224: // }
225: // return false;
226: // }
227:
228: private static String getSpace(String s, boolean start) {
229: if (start) {
230: for (int i = 0; i < s.length(); i++) {
231: if (!Character.isWhitespace(s.charAt(i))) {
232: if (i == 0) {
233: return "";
234: } else {
235: return s.substring(0, i);
236: }
237: }
238: }
239: return s;
240: } else {
241: for (int i = s.length() - 1; i >= 0; i--) {
242: if (!Character.isWhitespace(s.charAt(i))) {
243: if (i == s.length() - 1) {
244: return "";
245: } else {
246: return s.substring(i + 1, s.length());
247: }
248: }
249: }
250: // if all spaces, return an empty string to avoid duplicate spaces
251: return "";
252: }
253: }
254:
255: private static String extract(String documentName, File f,
256: String target) throws Exception {
257: String xml = IOUtils.readStringAndClose(new InputStreamReader(
258: new FileInputStream(f), "UTF-8"), -1);
259: StringBuffer template = new StringBuffer(xml.length());
260: int id = 0;
261: Properties prop = new SortedProperties();
262: XMLParser parser = new XMLParser(xml);
263: StringBuffer buff = new StringBuffer();
264: Stack stack = new Stack();
265: String tag = "";
266: boolean ignoreEnd = false;
267: String nextKey = "";
268: boolean templateIsCopy = false;
269: while (true) {
270: int event = parser.next();
271: if (event == XMLParser.END_DOCUMENT) {
272: break;
273: } else if (event == XMLParser.CHARACTERS) {
274: String s = parser.getText();
275: String trim = s.trim();
276: if (trim.length() == 0) {
277: if (buff.length() > 0) {
278: buff.append(s);
279: } else {
280: template.append(s);
281: }
282: } else if ("p".equals(tag) || "li".equals(tag)
283: || "a".equals(tag) || "td".equals(tag)
284: || "th".equals(tag) || "h1".equals(tag)
285: || "h2".equals(tag) || "h3".equals(tag)
286: || "h4".equals(tag) || "body".equals(tag)
287: || "b".equals(tag) || "code".equals(tag)
288: || "form".equals(tag) || "span".equals(tag)
289: || "em".equals(tag)) {
290: if (buff.length() == 0) {
291: nextKey = documentName + "_" + (1000 + id++)
292: + "_" + tag;
293: template.append(getSpace(s, true));
294: } else if (templateIsCopy) {
295: buff.append(getSpace(s, true));
296: }
297: if (templateIsCopy) {
298: buff.append(trim);
299: buff.append(getSpace(s, false));
300: } else {
301: buff.append(clean(trim));
302: }
303: } else if ("pre".equals(tag) || "title".equals(tag)
304: || "script".equals(tag) || "style".equals(tag)) {
305: // ignore, don't translate
306: template.append(s);
307: } else {
308: System.out.println(f.getName()
309: + " invalid wrapper tag for text: " + tag
310: + " text: " + s);
311: System.out.println(parser.getRemaining());
312: throw new Exception();
313: }
314: } else if (event == XMLParser.START_ELEMENT) {
315: stack.add(tag);
316: String name = parser.getName();
317: if ("code".equals(name) || "a".equals(name)
318: || "b".equals(name) || "span".equals(name)) {
319: // keep tags if wrapped, but not if this is the wrapper
320: if (buff.length() > 0) {
321: buff.append(' ');
322: buff.append(parser.getToken().trim());
323: ignoreEnd = false;
324: } else {
325: ignoreEnd = true;
326: template.append(parser.getToken());
327: }
328: } else if ("p".equals(tag) || "li".equals(tag)
329: || "td".equals(tag) || "th".equals(tag)
330: || "h1".equals(tag) || "h2".equals(tag)
331: || "h3".equals(tag) || "h4".equals(tag)
332: || "body".equals(tag) || "form".equals(tag)) {
333: if (buff.length() > 0) {
334: if (templateIsCopy) {
335: template.append(buff.toString());
336: } else {
337: template.append("${" + nextKey + "}");
338: }
339: add(prop, nextKey, buff);
340: }
341: template.append(parser.getToken());
342: } else {
343: template.append(parser.getToken());
344: }
345: tag = name;
346: } else if (event == XMLParser.END_ELEMENT) {
347: String name = parser.getName();
348: if ("code".equals(name) || "a".equals(name)
349: || "b".equals(name) || "span".equals(name)
350: || "em".equals(name)) {
351: if (ignoreEnd) {
352: if (buff.length() > 0) {
353: if (templateIsCopy) {
354: template.append(buff.toString());
355: } else {
356: template.append("${" + nextKey + "}");
357: }
358: add(prop, nextKey, buff);
359: }
360: template.append(parser.getToken());
361: } else {
362: if (buff.length() > 0) {
363: buff.append(parser.getToken());
364: buff.append(' ');
365: }
366: }
367: } else {
368: if (buff.length() > 0) {
369: if (templateIsCopy) {
370: template.append(buff.toString());
371: } else {
372: template.append("${" + nextKey + "}");
373: }
374: add(prop, nextKey, buff);
375: }
376: template.append(parser.getToken());
377: }
378: tag = (String) stack.pop();
379: } else if (event == XMLParser.DTD) {
380: template.append(parser.getToken());
381: } else if (event == XMLParser.COMMENT) {
382: template.append(parser.getToken());
383: } else {
384: int eventType = parser.getEventType();
385: throw new Exception("Unexpected event " + eventType
386: + " at " + parser.getRemaining());
387: }
388: // if(!xml.startsWith(template.toString())) {
389: // System.out.println(nextKey);
390: // System.out.println(template.substring(template.length()-60)
391: // +";");
392: // System.out.println(xml.substring(template.length()-60,
393: // template.length()));
394: // System.out.println(template.substring(template.length()-55)
395: // +";");
396: // System.out.println(xml.substring(template.length()-55,
397: // template.length()));
398: // break;
399: // }
400: }
401: new File(target).mkdirs();
402: String propFileName = target + "/_docs_" + MAIN_LANGUAGE
403: + ".properties";
404: Properties old = FileUtils.loadProperties(propFileName);
405: prop.putAll(old);
406: PropertiesToUTF8.storeProperties(prop, propFileName);
407: String t = template.toString();
408: if (templateIsCopy && !t.equals(xml)) {
409: for (int i = 0; i < Math.min(t.length(), xml.length()); i++) {
410: if (t.charAt(i) != xml.charAt(i)) {
411: int start = Math.max(0, i - 30), end = Math.min(
412: i + 30, xml.length());
413: t = t.substring(start, end);
414: xml = xml.substring(start, end);
415: }
416: }
417: System.out
418: .println("xml--------------------------------------------------: ");
419: System.out.println(xml);
420: System.out
421: .println("t---------------------------------------------------: ");
422: System.out.println(t);
423: System.exit(1);
424: }
425: return t;
426: }
427:
428: private static String clean(String text) {
429: if (text.indexOf('\r') < 0 && text.indexOf('\n') < 0) {
430: return text;
431: }
432: text = text.replace('\r', ' ');
433: text = text.replace('\n', ' ');
434: text = StringUtils.replaceAll(text, " ", " ");
435: text = StringUtils.replaceAll(text, " ", " ");
436: return text;
437: }
438:
439: private static void add(Properties prop, String document,
440: StringBuffer text) {
441: String s = text.toString().trim();
442: text.setLength(0);
443: prop.setProperty(document, s);
444: }
445:
446: private void prepare(String baseDir, String path)
447: throws IOException {
448: File dir = new File(path);
449: File[] list = dir.listFiles();
450: File main = null;
451: ArrayList translations = new ArrayList();
452: for (int i = 0; list != null && i < list.length; i++) {
453: File f = list[i];
454: if (f.getName().endsWith(".properties")) {
455: if (f.getName().endsWith(
456: "_" + MAIN_LANGUAGE + ".properties")) {
457: main = f;
458: } else {
459: translations.add(f);
460: }
461: }
462: }
463: Properties p = FileUtils.loadProperties(main.getAbsolutePath());
464: Properties base = FileUtils.loadProperties(baseDir + "/"
465: + main.getName());
466: PropertiesToUTF8.storeProperties(p, main.getAbsolutePath());
467: for (int i = 0; i < translations.size(); i++) {
468: File trans = (File) translations.get(i);
469: String language = trans.getName();
470: language = language.substring(
471: language.lastIndexOf('_') + 1, language
472: .lastIndexOf('.'));
473: prepare(p, base, trans, language);
474: }
475: PropertiesToUTF8.storeProperties(p, baseDir + "/"
476: + main.getName());
477: }
478:
479: private void prepare(Properties main, Properties base, File trans,
480: String language) throws IOException {
481: Properties p = FileUtils
482: .loadProperties(trans.getAbsolutePath());
483: Properties oldTranslations = new Properties();
484: for (Iterator it = base.keySet().iterator(); it.hasNext();) {
485: String key = (String) it.next();
486: String m = base.getProperty(key);
487: String t = p.getProperty(key);
488: if (t != null && !t.startsWith("#")) {
489: oldTranslations.setProperty(m, t);
490: }
491: }
492: HashSet toTranslate = new HashSet();
493: // add missing keys, using # and the value from the main file
494: Iterator it = main.keySet().iterator();
495: while (it.hasNext()) {
496: String key = (String) it.next();
497: String now = main.getProperty(key);
498: if (!p.containsKey(key)) {
499: String t = oldTranslations.getProperty(now);
500: if (t == null) {
501: if (AUTO_TRANSLATE) {
502: toTranslate.add(key);
503: } else {
504: System.out
505: .println(trans.getName()
506: + ": key "
507: + key
508: + " not found in translation file; added dummy # 'translation'");
509: t = "#" + now;
510: p.put(key, t);
511: }
512: } else {
513: p.put(key, t);
514: }
515: } else {
516: String t = p.getProperty(key);
517: String last = base.getProperty(key);
518: if (t.startsWith("#") && !t.startsWith("##")) {
519: // not translated before
520: t = oldTranslations.getProperty(now);
521: if (t == null) {
522: t = "#" + now;
523: }
524: p.put(key, t);
525: } else if (last != null && !last.equals(now)) {
526: t = oldTranslations.getProperty(now);
527: if (t == null) {
528: // main data changed since the last run: review translation
529: System.out.println(trans.getName() + ": key "
530: + key
531: + " changed, please review; last="
532: + last + " now=" + now);
533: if (AUTO_TRANSLATE) {
534: toTranslate.add(key);
535: } else {
536: String old = p.getProperty(key);
537: t = "#" + now + " #" + old;
538: p.put(key, t);
539: }
540: } else {
541: p.put(key, t);
542: }
543: }
544: }
545: }
546: Map autoTranslated = new HashMap();
547: if (AUTO_TRANSLATE) {
548: HashSet set = new HashSet();
549: for (it = toTranslate.iterator(); it.hasNext();) {
550: String key = (String) it.next();
551: String now = main.getProperty(key);
552: set.add(now);
553: }
554: if ("de".equals(language)) {
555: autoTranslated = autoTranslate(set, "en", language);
556: }
557: }
558: for (it = toTranslate.iterator(); it.hasNext();) {
559: String key = (String) it.next();
560: String now = main.getProperty(key);
561: String t;
562: if (AUTO_TRANSLATE) {
563: t = "##" + autoTranslated.get(now);
564: } else {
565: System.out
566: .println(trans.getName()
567: + ": key "
568: + key
569: + " not found in translation file; added dummy # 'translation'");
570: t = "#" + now;
571: }
572: p.put(key, t);
573: }
574: // remove keys that don't exist in the main file (deleted or typo in the key)
575: it = new ArrayList(p.keySet()).iterator();
576: while (it.hasNext()) {
577: String key = (String) it.next();
578: if (!main.containsKey(key)
579: && !key.startsWith(DELETED_PREFIX)) {
580: String newKey = key;
581: while (true) {
582: newKey = DELETED_PREFIX + newKey;
583: if (!p.containsKey(newKey)) {
584: break;
585: }
586: }
587: System.out.println(trans.getName() + ": key " + key
588: + " not found in main file; renamed to "
589: + newKey);
590: p.put(newKey, p.getProperty(key));
591: p.remove(key);
592: }
593: }
594: PropertiesToUTF8.storeProperties(p, trans.getAbsolutePath());
595: }
596:
597: private Map autoTranslate(Set toTranslate, String sourceLanguage,
598: String targetLanguage) {
599: HashMap results = new HashMap();
600: if (toTranslate.size() == 0) {
601: return results;
602: }
603: int maxLength = 1500;
604: int minSeparator = 100000;
605: HashMap keyMap = new HashMap(toTranslate.size());
606: StringBuffer buff = new StringBuffer(maxLength);
607: // TODO make sure these numbers don't occur in the original text
608: int separator = minSeparator;
609: for (Iterator it = toTranslate.iterator(); it.hasNext();) {
610: String original = (String) it.next();
611: if (original != null) {
612: original = original.trim();
613: if (buff.length() + original.length() > maxLength) {
614: System.out
615: .println("remaining: "
616: + (toTranslate.size() - separator + minSeparator));
617: translateChunk(buff, separator, sourceLanguage,
618: targetLanguage, keyMap, results);
619: }
620: keyMap.put(new Integer(separator), original);
621: buff.append(separator);
622: buff.append(' ');
623: buff.append(original);
624: buff.append(' ');
625: separator++;
626: }
627: }
628: translateChunk(buff, separator, sourceLanguage, targetLanguage,
629: keyMap, results);
630: return results;
631: }
632:
633: private void translateChunk(StringBuffer buff, int separator,
634: String source, String target, HashMap keyMap,
635: HashMap results) {
636: buff.append(separator);
637: String original = buff.toString();
638: String translation = "";
639: try {
640: translation = translate(original, source, target);
641: System.out.println("original: " + original);
642: System.out.println("translation: " + translation);
643: } catch (Throwable e) {
644: System.out.println("Exception translating [" + original
645: + "]: " + e);
646: e.printStackTrace();
647: }
648: for (Iterator it = keyMap.entrySet().iterator(); it.hasNext();) {
649: Entry entry = (Entry) it.next();
650: separator = ((Integer) entry.getKey()).intValue();
651: String o = (String) entry.getValue();
652: String startSeparator = String.valueOf(separator);
653: int start = translation.indexOf(startSeparator);
654: int end = translation
655: .indexOf(String.valueOf(separator + 1));
656: if (start < 0 || end < 0) {
657: System.out.println("No translation for " + o);
658: results.put(o, "#" + o);
659: } else {
660: String t = translation.substring(start
661: + startSeparator.length(), end);
662: t = t.trim();
663: results.put(o, t);
664: }
665: }
666: keyMap.clear();
667: buff.setLength(0);
668: }
669:
670: /**
671: * Translate the text using Google Translate
672: */
673: String translate(String text, String sourceLanguage,
674: String targetLanguage) throws Exception {
675: Thread.sleep(4000);
676: String url = "http://translate.google.com/translate_t?langpair="
677: + sourceLanguage
678: + "|"
679: + targetLanguage
680: + "&text="
681: + URLEncoder.encode(text, "UTF-8");
682: HttpURLConnection conn = (HttpURLConnection) (new URL(url))
683: .openConnection();
684: conn.setRequestProperty("User-Agent",
685: "Mozilla/5.0 (compatible; Java)");
686: String result = IOUtils.readStringAndClose(IOUtils
687: .getReader(conn.getInputStream()), -1);
688: int start = result.indexOf("<div id=result_box");
689: start = result.indexOf('>', start) + 1;
690: int end = result.indexOf("</div>", start);
691: return result.substring(start, end);
692: }
693:
694: }
|