001: /*
002: Copyright (C) 2005 MySQL AB
003:
004: This program is free software; you can redistribute it and/or modify
005: it under the terms of version 2 of the GNU General Public License as
006: published by the Free Software Foundation.
007:
008: There are special exceptions to the terms and conditions of the GPL
009: as it is applied to this software. View the full text of the
010: exception in file EXCEPTIONS-CONNECTOR-J in the directory of this
011: software distribution.
012:
013: This program is distributed in the hope that it will be useful,
014: but WITHOUT ANY WARRANTY; without even the implied warranty of
015: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016: GNU General Public License for more details.
017:
018: You should have received a copy of the GNU General Public License
019: along with this program; if not, write to the Free Software
020: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
021: */
022:
023: package testsuite.simple;
024:
025: import java.sql.Connection;
026: import java.sql.SQLException;
027: import java.sql.Statement;
028: import java.sql.Types;
029: import java.util.ArrayList;
030: import java.util.HashMap;
031: import java.util.Iterator;
032: import java.util.List;
033: import java.util.Locale;
034: import java.util.Map;
035: import java.util.Properties;
036:
037: import testsuite.BaseTestCase;
038:
039: public class CharsetTests extends BaseTestCase {
040:
041: public CharsetTests(String name) {
042: super (name);
043: // TODO Auto-generated constructor stub
044: }
045:
046: public static void main(String[] args) {
047: junit.textui.TestRunner.run(CharsetTests.class);
048: }
049:
050: public void testCP932Backport() throws Exception {
051: if (versionMeetsMinimum(4, 1, 12)) {
052: if (versionMeetsMinimum(5, 0)) {
053: if (!versionMeetsMinimum(5, 0, 3)) {
054: return;
055: }
056: }
057:
058: Properties props = new Properties();
059: props.put("useUnicode", "true");
060: props.put("characterEncoding", "WINDOWS-31J");
061: getConnectionWithProps(props).close();
062: }
063: }
064:
065: public void testNECExtendedCharsByEUCJPSolaris() throws Exception {
066: if (!isRunningOnJdk131()) {
067: if (versionMeetsMinimum(5, 0, 5)) {
068: char necExtendedChar = 0x3231; // 0x878A of WINDOWS-31J, NEC
069: // special(row13).
070: String necExtendedCharString = String
071: .valueOf(necExtendedChar);
072:
073: Properties props = new Properties();
074:
075: props.put("useUnicode", "true");
076: props.put("characterEncoding", "EUC_JP_Solaris");
077:
078: Connection conn2 = getConnectionWithProps(props);
079: Statement stmt2 = conn2.createStatement();
080:
081: stmt2.executeUpdate("DROP TABLE IF EXISTS t_eucjpms");
082: createTable("t_eucjpms", "(c1 char(1))"
083: + " default character set = eucjpms");
084: stmt2.executeUpdate("INSERT INTO t_eucjpms VALUES ('"
085: + necExtendedCharString + "')");
086: this .rs = stmt2
087: .executeQuery("SELECT c1 FROM t_eucjpms");
088: this .rs.next();
089: assertEquals(necExtendedCharString, this .rs
090: .getString("c1"));
091:
092: this .rs.close();
093: stmt2.close();
094: conn2.close();
095:
096: props.put("characterSetResults", "EUC_JP_Solaris");
097: conn2 = getConnectionWithProps(props);
098: stmt2 = conn.createStatement();
099:
100: this .rs = stmt2
101: .executeQuery("SELECT c1 FROM t_eucjpms");
102: this .rs.next();
103: assertEquals(necExtendedCharString, rs.getString("c1"));
104:
105: stmt2.executeUpdate("DROP TABLE t_eucjpms");
106: this .rs.close();
107: stmt2.close();
108: conn2.close();
109: }
110: }
111: }
112:
113: /**
114: * Test data of sjis. sjis consists of ASCII, JIS-Roman, JISX0201 and
115: * JISX0208.
116: */
117: public static final char[] SJIS_CHARS = new char[] { 0xFF71, // halfwidth
118: // katakana
119: // letter A,
120: // 0xB100 of
121: // SJIS, one
122: // of
123: // JISX0201.
124: 0x65E5, // CJK unified ideograph, 0x93FA of SJIS, one of JISX0208.
125: 0x8868, // CJK unified ideograph, 0x955C of SJIS, one of '5c'
126: // character.
127: 0x2016 // 0x8161 of SJIS/WINDOWS-31J, converted to differently
128: // to/from ucs2
129: };
130:
131: /**
132: * Test data of cp932. WINDOWS-31J consists of ASCII, JIS-Roman, JISX0201,
133: * JISX0208, NEC special characters(row13), NEC selected IBM special
134: * characters, and IBM special characters.
135: */
136: private static final char[] CP932_CHARS = new char[] { 0xFF71, // halfwidth
137: // katakana
138: // letter A,
139: // 0xB100 of
140: // WINDOWS-31J,
141: // one of
142: // JISX0201.
143: 0x65E5, // CJK unified ideograph, 0x93FA of WINDOWS-31J, one of
144: // JISX0208.
145: 0x3231, // parenthesized ideograph stok, 0x878B of WINDOWS-31J, one
146: // of NEC special characters(row13).
147: 0x67BB, // CJK unified ideograph, 0xEDC6 of WINDOWS-31J, one of NEC
148: // selected IBM special characters.
149: 0x6D6F, // CJK unified ideograph, 0xFAFC of WINDOWS-31J, one of IBM
150: // special characters.
151: 0x8868, // one of CJK unified ideograph, 0x955C of WINDOWS-31J, one
152: // of '5c' characters.
153: 0x2225 // 0x8161 of SJIS/WINDOWS-31J, converted to differently
154: // to/from ucs2
155: };
156:
157: /**
158: * Test data of ujis. ujis consists of ASCII, JIS-Roman, JISX0201, JISX0208,
159: * JISX0212.
160: */
161: public static final char[] UJIS_CHARS = new char[] { 0xFF71, // halfwidth
162: // katakana
163: // letter A,
164: // 0x8EB1 of
165: // ujis, one
166: // of
167: // JISX0201.
168: 0x65E5, // CJK unified ideograph, 0xC6FC of ujis, one of JISX0208.
169: 0x7B5D, // CJK unified ideograph, 0xE4B882 of ujis, one of JISX0212
170: 0x301C // wave dash, 0xA1C1 of ujis, convertion rule is different
171: // from ujis
172: };
173:
174: /**
175: * Test data of eucjpms. ujis consists of ASCII, JIS-Roman, JISX0201,
176: * JISX0208, JISX0212, NEC special characters(row13)
177: */
178: public static final char[] EUCJPMS_CHARS = new char[] { 0xFF71, // halfwidth
179: // katakana
180: // letter A,
181: // 0x8EB1 of
182: // ujis, one
183: // of
184: // JISX0201.
185: 0x65E5, // CJK unified ideograph, 0xC6FC of ujis, one of JISX0208.
186: 0x7B5D, // CJK unified ideograph, 0xE4B882 of ujis, one of JISX0212
187: 0x3231, // parenthesized ideograph stok, 0x878A of WINDOWS-31J, one
188: // of NEC special characters(row13).
189: 0xFF5E // wave dash, 0xA1C1 of eucjpms, convertion rule is
190: // different from ujis
191: };
192:
193: public void testInsertCharStatement() throws Exception {
194: if (!isRunningOnJdk131()) {
195: if (versionMeetsMinimum(4, 1, 12)) {
196: Map testDataMap = new HashMap();
197:
198: List charsetList = new ArrayList();
199:
200: Map connectionMap = new HashMap();
201:
202: Map connectionWithResultMap = new HashMap();
203:
204: Map statementMap = new HashMap();
205:
206: Map statementWithResultMap = new HashMap();
207:
208: Map javaToMysqlCharsetMap = new HashMap();
209:
210: charsetList.add("SJIS");
211: testDataMap.put("SJIS", SJIS_CHARS);
212: javaToMysqlCharsetMap.put("SJIS", "sjis");
213:
214: charsetList.add("Shift_JIS");
215: testDataMap.put("Shift_JIS", SJIS_CHARS);
216: javaToMysqlCharsetMap.put("Shift_JIS", "sjis");
217:
218: charsetList.add("CP943");
219: testDataMap.put("CP943", SJIS_CHARS);
220: javaToMysqlCharsetMap.put("CP943", "sjis");
221:
222: if (versionMeetsMinimum(5, 0, 3)) {
223: charsetList.add("WINDOWS-31J");
224: testDataMap.put("WINDOWS-31J", CP932_CHARS);
225: javaToMysqlCharsetMap.put("WINDOWS-31J", "cp932");
226:
227: charsetList.add("MS932");
228: testDataMap.put("MS932", CP932_CHARS);
229: javaToMysqlCharsetMap.put("MS932", "cp932");
230:
231: charsetList.add("EUC_JP");
232: testDataMap.put("EUC_JP", UJIS_CHARS);
233: // testDataHexMap.put("EUC_JP", UJIS_CHARS_HEX);
234: javaToMysqlCharsetMap.put("EUC_JP", "ujis");
235:
236: charsetList.add("EUC_JP_Solaris");
237: testDataMap.put("EUC_JP_Solaris", EUCJPMS_CHARS);
238: // testDataHexMap.put("EUC_JP_Solaris", EUCJPMS_CHARS_HEX);
239: javaToMysqlCharsetMap.put("EUC_JP_Solaris",
240: "eucjpms");
241:
242: } else {
243: charsetList.add("EUC_JP");
244: testDataMap.put("EUC_JP", UJIS_CHARS);
245: javaToMysqlCharsetMap.put("EUC_JP", "ujis");
246: }
247:
248: Iterator charsetIterator = charsetList.iterator();
249:
250: while (charsetIterator.hasNext()) {
251: String charset = (String) charsetIterator.next();
252: Properties props = new Properties();
253:
254: props.put("useUnicode", "true");
255: props.put("characterEncoding", charset);
256: Connection conn2 = getConnectionWithProps(props);
257: connectionMap.put(charset
258: .toLowerCase(Locale.ENGLISH), conn2);
259: statementMap.put(charset
260: .toLowerCase(Locale.ENGLISH), conn2
261: .createStatement());
262:
263: props.put("characterSetResult", charset);
264: Connection connWithResult = getConnectionWithProps(props);
265: connectionWithResultMap
266: .put(charset, connWithResult);
267: statementWithResultMap.put(charset, connWithResult
268: .createStatement());
269: }
270:
271: charsetIterator = charsetList.iterator();
272: while (charsetIterator.hasNext()) {
273: String charset = (String) charsetIterator.next();
274:
275: String mysqlCharset = (String) javaToMysqlCharsetMap
276: .get(charset);
277: Statement stmt2 = (Statement) statementMap
278: .get(charset.toLowerCase(Locale.ENGLISH));
279: String query1 = "DROP TABLE IF EXISTS t1";
280: String query2 = "CREATE TABLE t1 (c1 int, c2 char(1)) "
281: + "DEFAULT CHARACTER SET = " + mysqlCharset;
282: stmt2.executeUpdate(query1);
283: stmt2.executeUpdate(query2);
284: char[] testData = (char[]) testDataMap.get(charset);
285: for (int i = 0; i < testData.length; i++) {
286: String query3 = "INSERT INTO t1 values(" + i
287: + ", '" + testData[i] + "')";
288: stmt2.executeUpdate(query3);
289: String query4 = "SELECT c2 FROM t1 WHERE c1 = "
290: + i;
291: this .rs = stmt2.executeQuery(query4);
292: this .rs.next();
293: String value = rs.getString(1);
294:
295: assertEquals("For character set " + charset
296: + "/ " + mysqlCharset, String
297: .valueOf(testData[i]), value);
298: }
299: String query5 = "DROP TABLE t1";
300: stmt2.executeUpdate(query5);
301: }
302: }
303: }
304: }
305:
306: public void testUtf8OutsideBMPInBlob() throws Exception {
307: createTable(
308: "utf8Test",
309: "(include_blob BLOB, include_tinyblob TINYBLOB, include_longblob LONGBLOB, exclude_tinyblob TINYBLOB, exclude_blob BLOB, exclude_longblob LONGBLOB)");
310:
311: // We know this gets truncated in MySQL currently, even though it's valid UTF-8, it's just 4 bytes encoded
312: String outsideBmp = new String(new byte[] { (byte) 0xF0,
313: (byte) 0x90, (byte) 0x80, (byte) 0x80 }, "UTF-8");
314: byte[] outsideBmpBytes = outsideBmp.getBytes("UTF-8");
315: System.out.println(outsideBmpBytes.length);
316:
317: Connection utf8Conn = getConnectionWithProps("useBlobToStoreUTF8OutsideBMP=true, characterEncoding=UTF-8");
318:
319: String insertStatement = "INSERT INTO utf8Test VALUES (?, ?, ?, ?, ?, ?)";
320:
321: this .pstmt = utf8Conn.prepareStatement(insertStatement);
322:
323: this .pstmt.setString(1, outsideBmp);
324: this .pstmt.setString(2, outsideBmp);
325: this .pstmt.setString(3, outsideBmp);
326: this .pstmt.setString(4, outsideBmp);
327: this .pstmt.setString(5, outsideBmp);
328: this .pstmt.setString(6, outsideBmp);
329: this .pstmt.executeUpdate();
330:
331: String query = "SELECT include_blob, include_tinyblob, include_longblob, exclude_tinyblob, exclude_blob, exclude_longblob FROM utf8Test";
332: this .rs = utf8Conn.createStatement().executeQuery(query);
333: this .rs.next();
334:
335: assertEquals(this .rs.getObject(1).toString(), outsideBmp);
336: assertEquals(this .rs.getObject(2).toString(), outsideBmp);
337: assertEquals(this .rs.getObject(3).toString(), outsideBmp);
338: assertEquals(this .rs.getObject(4).toString(), outsideBmp);
339: assertEquals(this .rs.getObject(5).toString(), outsideBmp);
340: assertEquals(this .rs.getObject(6).toString(), outsideBmp);
341:
342: assertEquals("java.lang.String", this .rs.getObject(1)
343: .getClass().getName());
344: assertEquals("java.lang.String", this .rs.getMetaData()
345: .getColumnClassName(1));
346: assertEquals(Types.VARCHAR, this .rs.getMetaData()
347: .getColumnType(1));
348:
349: assertEquals("java.lang.String", this .rs.getObject(2)
350: .getClass().getName());
351: assertEquals("java.lang.String", this .rs.getMetaData()
352: .getColumnClassName(2));
353: assertEquals(Types.VARCHAR, this .rs.getMetaData()
354: .getColumnType(2));
355:
356: assertEquals("java.lang.String", this .rs.getObject(3)
357: .getClass().getName());
358: assertEquals("java.lang.String", this .rs.getMetaData()
359: .getColumnClassName(3));
360: assertEquals(Types.LONGVARCHAR, this .rs.getMetaData()
361: .getColumnType(3));
362:
363: assertEquals("java.lang.String", this .rs.getObject(4)
364: .getClass().getName());
365: assertEquals("java.lang.String", this .rs.getMetaData()
366: .getColumnClassName(4));
367: assertEquals(Types.VARCHAR, this .rs.getMetaData()
368: .getColumnType(4));
369:
370: assertEquals("java.lang.String", this .rs.getObject(5)
371: .getClass().getName());
372: assertEquals("java.lang.String", this .rs.getMetaData()
373: .getColumnClassName(5));
374: assertEquals(Types.VARCHAR, this .rs.getMetaData()
375: .getColumnType(5));
376:
377: assertEquals("java.lang.String", this .rs.getObject(6)
378: .getClass().getName());
379: assertEquals("java.lang.String", this .rs.getMetaData()
380: .getColumnClassName(6));
381: assertEquals(Types.LONGVARCHAR, this .rs.getMetaData()
382: .getColumnType(6));
383:
384: utf8Conn = getConnectionWithProps("useBlobToStoreUTF8OutsideBMP=true, characterEncoding=UTF-8,utf8OutsideBmpIncludedColumnNamePattern=.*include.*,utf8OutsideBmpExcludedColumnNamePattern=.*blob");
385:
386: this .rs = utf8Conn.createStatement().executeQuery(query);
387: this .rs.next();
388:
389: // Should walk/talk like a string, encoded in utf-8 on the server (4-byte)
390: assertEquals(this .rs.getObject(1).toString(), outsideBmp);
391: assertEquals(this .rs.getObject(2).toString(), outsideBmp);
392: assertEquals(this .rs.getObject(3).toString(), outsideBmp);
393:
394: assertEquals("java.lang.String", this .rs.getObject(1)
395: .getClass().getName());
396: assertEquals("java.lang.String", this .rs.getMetaData()
397: .getColumnClassName(1));
398: assertEquals(Types.VARCHAR, this .rs.getMetaData()
399: .getColumnType(1));
400:
401: assertEquals("java.lang.String", this .rs.getObject(2)
402: .getClass().getName());
403: assertEquals("java.lang.String", this .rs.getMetaData()
404: .getColumnClassName(2));
405: assertEquals(Types.VARCHAR, this .rs.getMetaData()
406: .getColumnType(2));
407:
408: assertEquals("java.lang.String", this .rs.getObject(3)
409: .getClass().getName());
410: assertEquals("java.lang.String", this .rs.getMetaData()
411: .getColumnClassName(3));
412: assertEquals(Types.LONGVARCHAR, this .rs.getMetaData()
413: .getColumnType(3));
414:
415: // These should be left as a blob, since it matches the exclusion regex
416: assertTrue(bytesAreSame(this .rs.getBytes(4), outsideBmpBytes));
417: assertEquals("[B", this .rs.getObject(4).getClass().getName());
418: assertEquals("[B", this .rs.getMetaData().getColumnClassName(4));
419: assertEquals(Types.VARBINARY, this .rs.getMetaData()
420: .getColumnType(4));
421:
422: // Should behave types-wise just like BLOB, including LONGVARBINARY type mapping
423: assertTrue(bytesAreSame(this .rs.getBytes(5), outsideBmpBytes));
424: assertEquals("[B", this .rs.getObject(5).getClass().getName());
425: assertEquals("[B", this .rs.getMetaData().getColumnClassName(5));
426: assertEquals(Types.LONGVARBINARY, this .rs.getMetaData()
427: .getColumnType(5));
428:
429: assertTrue(bytesAreSame(this .rs.getBytes(6), outsideBmpBytes));
430: assertEquals("[B", this .rs.getObject(6).getClass().getName());
431: assertEquals("[B", this .rs.getMetaData().getColumnClassName(6));
432: assertEquals(Types.LONGVARBINARY, this .rs.getMetaData()
433: .getColumnType(6));
434:
435: //
436: // Check error handling
437: //
438:
439: utf8Conn = getConnectionWithProps("useBlobToStoreUTF8OutsideBMP=true, characterEncoding=UTF-8,utf8OutsideBmpIncludedColumnNamePattern={{,utf8OutsideBmpExcludedColumnNamePattern={{");
440:
441: try {
442: utf8Conn.createStatement().executeQuery(query);
443: fail("Expected an exception");
444: } catch (SQLException sqlEx) {
445: assertNotNull(sqlEx.getCause());
446: assertEquals("java.util.regex.PatternSyntaxException",
447: sqlEx.getCause().getClass().getName());
448: }
449:
450: utf8Conn = getConnectionWithProps("useBlobToStoreUTF8OutsideBMP=true, characterEncoding=UTF-8,utf8OutsideBmpIncludedColumnNamePattern={{,utf8OutsideBmpExcludedColumnNamePattern=.*");
451:
452: try {
453: utf8Conn.createStatement().executeQuery(query);
454: fail("Expected an exception");
455: } catch (SQLException sqlEx) {
456: assertNotNull(sqlEx.getCause());
457: assertEquals("java.util.regex.PatternSyntaxException",
458: sqlEx.getCause().getClass().getName());
459: }
460:
461: utf8Conn = getConnectionWithProps("useBlobToStoreUTF8OutsideBMP=true, characterEncoding=UTF-8,utf8OutsideBmpIncludedColumnNamePattern={{,utf8OutsideBmpExcludedColumnNamePattern={{,paranoid=true");
462:
463: try {
464: utf8Conn.createStatement().executeQuery(query);
465: fail("Expected an exception");
466: } catch (SQLException sqlEx) {
467: assertNull(sqlEx.getCause());
468: }
469:
470: utf8Conn = getConnectionWithProps("useBlobToStoreUTF8OutsideBMP=true, characterEncoding=UTF-8,utf8OutsideBmpIncludedColumnNamePattern={{,utf8OutsideBmpExcludedColumnNamePattern=.*,paranoid=true");
471:
472: try {
473: utf8Conn.createStatement().executeQuery(query);
474: fail("Expected an exception");
475: } catch (SQLException sqlEx) {
476: assertNull(sqlEx.getCause());
477: }
478:
479: }
480:
481: private boolean bytesAreSame(byte[] byte1, byte[] byte2) {
482: if (byte1.length != byte2.length) {
483: return false;
484: }
485:
486: for (int i = 0; i < byte1.length; i++) {
487: if (byte1[i] != byte2[i]) {
488: return false;
489: }
490: }
491:
492: return true;
493: }
494: }
|