1 /*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27
28 #include <locale.h>
29
DEFINE_TEST(test_zip_filename_encoding_UTF8)30 DEFINE_TEST(test_zip_filename_encoding_UTF8)
31 {
32 struct archive *a;
33 struct archive_entry *entry;
34 char buff[4096];
35 size_t used;
36
37 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38 skipping("en_US.UTF-8 locale not available on this system.");
39 return;
40 }
41
42 /*
43 * Verify that UTF-8 filenames are correctly stored with
44 * hdrcharset=UTF-8 option.
45 */
46 a = archive_write_new();
47 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
48 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
49 skipping("This system cannot convert character-set"
50 " for UTF-8.");
51 archive_write_free(a);
52 return;
53 }
54 assertEqualInt(ARCHIVE_OK,
55 archive_write_open_memory(a, buff, sizeof(buff), &used));
56
57 entry = archive_entry_new2(a);
58 /* Set a UTF-8 filename. */
59 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60 archive_entry_set_filetype(entry, AE_IFREG);
61 archive_entry_set_size(entry, 0);
62 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63 archive_entry_free(entry);
64 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
65
66 /* A bit 11 of general purpose flag should be 0x08,
67 * which indicates the filename charset is UTF-8. */
68 assertEqualInt(0x08, buff[7]);
69 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
70
71 /*
72 * Verify that UTF-8 filenames are correctly stored without
73 * hdrcharset=UTF-8 option.
74 */
75 a = archive_write_new();
76 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
77 assertEqualInt(ARCHIVE_OK,
78 archive_write_open_memory(a, buff, sizeof(buff), &used));
79
80 entry = archive_entry_new2(a);
81 /* Set a UTF-8 filename. */
82 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
83 archive_entry_set_filetype(entry, AE_IFREG);
84 archive_entry_set_size(entry, 0);
85 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
86 archive_entry_free(entry);
87 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
88
89 /* A bit 11 of general purpose flag should be 0x08,
90 * which indicates the filename charset is UTF-8. */
91 assertEqualInt(0x08, buff[7]);
92 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
93
94 /*
95 * Verify that A bit 11 of general purpose flag is not set
96 * when ASCII filenames are stored.
97 */
98 a = archive_write_new();
99 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
100 assertEqualInt(ARCHIVE_OK,
101 archive_write_open_memory(a, buff, sizeof(buff), &used));
102
103 entry = archive_entry_new2(a);
104 /* Set an ASCII filename. */
105 archive_entry_set_pathname(entry, "abcABC");
106 archive_entry_set_filetype(entry, AE_IFREG);
107 archive_entry_set_size(entry, 0);
108 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
109 archive_entry_free(entry);
110 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
111
112 /* A bit 11 of general purpose flag should be 0,
113 * which indicates the filename charset is unknown. */
114 assertEqualInt(0, buff[7]);
115 assertEqualMem(buff + 30, "abcABC", 6);
116 }
117
DEFINE_TEST(test_zip_filename_encoding_KOI8R)118 DEFINE_TEST(test_zip_filename_encoding_KOI8R)
119 {
120 struct archive *a;
121 struct archive_entry *entry;
122 char buff[4096];
123 size_t used;
124
125 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
126 skipping("KOI8-R locale not available on this system.");
127 return;
128 }
129
130 /*
131 * Verify that KOI8-R filenames are correctly translated to UTF-8.
132 */
133 a = archive_write_new();
134 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
135 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
136 skipping("This system cannot convert character-set"
137 " from KOI8-R to UTF-8.");
138 archive_write_free(a);
139 return;
140 }
141 assertEqualInt(ARCHIVE_OK,
142 archive_write_open_memory(a, buff, sizeof(buff), &used));
143
144 entry = archive_entry_new2(a);
145 /* Set a KOI8-R filename. */
146 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
147 archive_entry_set_filetype(entry, AE_IFREG);
148 archive_entry_set_size(entry, 0);
149 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
150 archive_entry_free(entry);
151 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
152
153 /* A bit 11 of general purpose flag should be 0x08,
154 * which indicates the filename charset is UTF-8. */
155 assertEqualInt(0x08, buff[7]);
156 /* Above three characters in KOI8-R should translate to the following
157 * three characters (two bytes each) in UTF-8. */
158 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
159
160 /*
161 * Verify that KOI8-R filenames are not translated to UTF-8.
162 */
163 a = archive_write_new();
164 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
165 assertEqualInt(ARCHIVE_OK,
166 archive_write_open_memory(a, buff, sizeof(buff), &used));
167
168 entry = archive_entry_new2(a);
169 /* Set a KOI8-R filename. */
170 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
171 archive_entry_set_filetype(entry, AE_IFREG);
172 archive_entry_set_size(entry, 0);
173 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
174 archive_entry_free(entry);
175 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
176
177 /* A bit 11 of general purpose flag should be 0,
178 * which indicates the filename charset is unknown. */
179 assertEqualInt(0, buff[7]);
180 /* Above three characters in KOI8-R should not translate to
181 * any character-set. */
182 assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3);
183
184 /*
185 * Verify that A bit 11 of general purpose flag is not set
186 * when ASCII filenames are stored even if hdrcharset=UTF-8
187 * is specified.
188 */
189 a = archive_write_new();
190 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
191 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
192 skipping("This system cannot convert character-set"
193 " from KOI8-R to UTF-8.");
194 archive_write_free(a);
195 return;
196 }
197 assertEqualInt(ARCHIVE_OK,
198 archive_write_open_memory(a, buff, sizeof(buff), &used));
199
200 entry = archive_entry_new2(a);
201 /* Set an ASCII filename. */
202 archive_entry_set_pathname(entry, "abcABC");
203 archive_entry_set_filetype(entry, AE_IFREG);
204 archive_entry_set_size(entry, 0);
205 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
206 archive_entry_free(entry);
207 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
208
209 /* A bit 11 of general purpose flag should be 0,
210 * which indicates the filename charset is unknown. */
211 assertEqualInt(0, buff[7]);
212 assertEqualMem(buff + 30, "abcABC", 6);
213 }
214
215 /*
216 * Do not translate CP1251 into CP866 if non Windows platform.
217 */
DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)218 DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)
219 {
220 struct archive *a;
221 struct archive_entry *entry;
222 char buff[4096];
223 size_t used;
224
225 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
226 skipping("Russian_Russia locale not available on this system.");
227 return;
228 }
229
230 /*
231 * Verify that CP1251 filenames are not translated into any
232 * other character-set, in particular, CP866.
233 */
234 a = archive_write_new();
235 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
236 assertEqualInt(ARCHIVE_OK,
237 archive_write_open_memory(a, buff, sizeof(buff), &used));
238
239 entry = archive_entry_new2(a);
240 /* Set a CP1251 filename. */
241 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
242 archive_entry_set_filetype(entry, AE_IFREG);
243 archive_entry_set_size(entry, 0);
244 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
245 archive_entry_free(entry);
246 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
247
248 /* A bit 11 of general purpose flag should be 0,
249 * which indicates the filename charset is unknown. */
250 assertEqualInt(0, buff[7]);
251 /* Above three characters in CP1251 should not translate into
252 * any other character-set. */
253 assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3);
254 }
255
256 /*
257 * Other archiver applications on Windows translate CP1251 filenames
258 * into CP866 filenames and store it in the zip file.
259 * Test above behavior works well.
260 */
DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)261 DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)
262 {
263 struct archive *a;
264 struct archive_entry *entry;
265 char buff[4096];
266 size_t used;
267
268 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
269 skipping("Russian_Russia locale not available on this system.");
270 return;
271 }
272
273 /*
274 * Verify that Russian_Russia(CP1251) filenames are correctly translated
275 * to UTF-8.
276 */
277 a = archive_write_new();
278 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
279 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
280 skipping("This system cannot convert character-set"
281 " from Russian_Russia.CP1251 to UTF-8.");
282 archive_write_free(a);
283 return;
284 }
285 assertEqualInt(ARCHIVE_OK,
286 archive_write_open_memory(a, buff, sizeof(buff), &used));
287
288 entry = archive_entry_new2(a);
289 /* Set a CP1251 filename. */
290 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
291 archive_entry_set_filetype(entry, AE_IFREG);
292 archive_entry_set_size(entry, 0);
293 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
294 archive_entry_free(entry);
295 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
296
297 /* A bit 11 of general purpose flag should be 0x08,
298 * which indicates the filename charset is UTF-8. */
299 assertEqualInt(0x08, buff[7]);
300 /* Above three characters in CP1251 should translate to the following
301 * three characters (two bytes each) in UTF-8. */
302 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
303
304 /*
305 * Verify that Russian_Russia(CP1251) filenames are correctly translated
306 * to CP866.
307 */
308 a = archive_write_new();
309 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
310 assertEqualInt(ARCHIVE_OK,
311 archive_write_open_memory(a, buff, sizeof(buff), &used));
312
313 entry = archive_entry_new2(a);
314 /* Set a CP1251 filename. */
315 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
316 archive_entry_set_filetype(entry, AE_IFREG);
317 archive_entry_set_size(entry, 0);
318 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
319 archive_entry_free(entry);
320 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
321
322 /* A bit 11 of general purpose flag should be 0,
323 * which indicates the filename charset is unknown. */
324 assertEqualInt(0, buff[7]);
325 /* Above three characters in CP1251 should translate to the following
326 * three characters in CP866. */
327 assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3);
328 }
329
DEFINE_TEST(test_zip_filename_encoding_EUCJP)330 DEFINE_TEST(test_zip_filename_encoding_EUCJP)
331 {
332 struct archive *a;
333 struct archive_entry *entry;
334 char buff[4096];
335 size_t used;
336
337 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
338 skipping("eucJP locale not available on this system.");
339 return;
340 }
341
342 /*
343 * Verify that EUC-JP filenames are correctly translated to UTF-8.
344 */
345 a = archive_write_new();
346 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
347 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
348 skipping("This system cannot convert character-set"
349 " from eucJP to UTF-8.");
350 archive_write_free(a);
351 return;
352 }
353 assertEqualInt(ARCHIVE_OK,
354 archive_write_open_memory(a, buff, sizeof(buff), &used));
355
356 entry = archive_entry_new2(a);
357 /* Set an EUC-JP filename. */
358 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
359 /* Check the Unicode version. */
360 archive_entry_set_filetype(entry, AE_IFREG);
361 archive_entry_set_size(entry, 0);
362 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
363 archive_entry_free(entry);
364 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
365
366 /* A bit 11 of general purpose flag should be 0x08,
367 * which indicates the filename charset is UTF-8. */
368 assertEqualInt(0x08, buff[7]);
369 /* Check UTF-8 version. */
370 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
371
372 /*
373 * Verify that EUC-JP filenames are not translated to UTF-8.
374 */
375 a = archive_write_new();
376 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
377 assertEqualInt(ARCHIVE_OK,
378 archive_write_open_memory(a, buff, sizeof(buff), &used));
379
380 entry = archive_entry_new2(a);
381 /* Set an EUC-JP filename. */
382 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
383 /* Check the Unicode version. */
384 archive_entry_set_filetype(entry, AE_IFREG);
385 archive_entry_set_size(entry, 0);
386 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387 archive_entry_free(entry);
388 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
389
390 /* A bit 11 of general purpose flag should be 0,
391 * which indicates the filename charset is unknown. */
392 assertEqualInt(0, buff[7]);
393 /* Above three characters in EUC-JP should not translate to
394 * any character-set. */
395 assertEqualMem(buff + 30, "\xC9\xBD.txt", 6);
396
397 /*
398 * Verify that A bit 11 of general purpose flag is not set
399 * when ASCII filenames are stored even if hdrcharset=UTF-8
400 * is specified.
401 */
402 a = archive_write_new();
403 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
404 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
405 skipping("This system cannot convert character-set"
406 " from eucJP to UTF-8.");
407 archive_write_free(a);
408 return;
409 }
410 assertEqualInt(ARCHIVE_OK,
411 archive_write_open_memory(a, buff, sizeof(buff), &used));
412
413 entry = archive_entry_new2(a);
414 /* Set an ASCII filename. */
415 archive_entry_set_pathname(entry, "abcABC");
416 /* Check the Unicode version. */
417 archive_entry_set_filetype(entry, AE_IFREG);
418 archive_entry_set_size(entry, 0);
419 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
420 archive_entry_free(entry);
421 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
422
423 /* A bit 11 of general purpose flag should be 0,
424 * which indicates the filename charset is unknown. */
425 assertEqualInt(0, buff[7]);
426 assertEqualMem(buff + 30, "abcABC", 6);
427 }
428
DEFINE_TEST(test_zip_filename_encoding_CP932)429 DEFINE_TEST(test_zip_filename_encoding_CP932)
430 {
431 struct archive *a;
432 struct archive_entry *entry;
433 char buff[4096];
434 size_t used;
435
436 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
437 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
438 skipping("CP932/SJIS locale not available on this system.");
439 return;
440 }
441
442 /*
443 * Verify that EUC-JP filenames are correctly translated to UTF-8.
444 */
445 a = archive_write_new();
446 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
447 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
448 skipping("This system cannot convert character-set"
449 " from CP932/SJIS to UTF-8.");
450 archive_write_free(a);
451 return;
452 }
453 assertEqualInt(ARCHIVE_OK,
454 archive_write_open_memory(a, buff, sizeof(buff), &used));
455
456 entry = archive_entry_new2(a);
457 /* Set a CP932/SJIS filename. */
458 archive_entry_set_pathname(entry, "\x95\x5C.txt");
459 /* Check the Unicode version. */
460 archive_entry_set_filetype(entry, AE_IFREG);
461 archive_entry_set_size(entry, 0);
462 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
463 archive_entry_free(entry);
464 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
465
466 /* A bit 11 of general purpose flag should be 0x08,
467 * which indicates the filename charset is UTF-8. */
468 assertEqualInt(0x08, buff[7]);
469 /* Check UTF-8 version. */
470 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
471
472 /*
473 * Verify that CP932/SJIS filenames are not translated to UTF-8.
474 */
475 a = archive_write_new();
476 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
477 assertEqualInt(ARCHIVE_OK,
478 archive_write_open_memory(a, buff, sizeof(buff), &used));
479
480 entry = archive_entry_new2(a);
481 /* Set a CP932/SJIS filename. */
482 archive_entry_set_pathname(entry, "\x95\x5C.txt");
483 /* Check the Unicode version. */
484 archive_entry_set_filetype(entry, AE_IFREG);
485 archive_entry_set_size(entry, 0);
486 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
487 archive_entry_free(entry);
488 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
489
490 /* A bit 11 of general purpose flag should be 0,
491 * which indicates the filename charset is unknown. */
492 assertEqualInt(0, buff[7]);
493 /* Above three characters in CP932/SJIS should not translate to
494 * any character-set. */
495 assertEqualMem(buff + 30, "\x95\x5C.txt", 6);
496
497 /*
498 * Verify that A bit 11 of general purpose flag is not set
499 * when ASCII filenames are stored even if hdrcharset=UTF-8
500 * is specified.
501 */
502 a = archive_write_new();
503 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
504 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
505 skipping("This system cannot convert character-set"
506 " from CP932/SJIS to UTF-8.");
507 archive_write_free(a);
508 return;
509 }
510 assertEqualInt(ARCHIVE_OK,
511 archive_write_open_memory(a, buff, sizeof(buff), &used));
512
513 entry = archive_entry_new2(a);
514 /* Set an ASCII filename. */
515 archive_entry_set_pathname(entry, "abcABC");
516 /* Check the Unicode version. */
517 archive_entry_set_filetype(entry, AE_IFREG);
518 archive_entry_set_size(entry, 0);
519 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
520 archive_entry_free(entry);
521 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
522
523 /* A bit 11 of general purpose flag should be 0,
524 * which indicates the filename charset is unknown. */
525 assertEqualInt(0, buff[7]);
526 assertEqualMem(buff + 30, "abcABC", 6);
527 }
528