1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36 #include <sys/zio_compress.h>
37
38 static int real_LZ4_compress(const char *source, char *dest, int isize,
39 int osize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41 int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43 int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46
47 static void *lz4_alloc(int flags);
48 static void lz4_free(void *ctx);
49
50 /*ARGSUSED*/
51 size_t
lz4_compress_zfs(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)52 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
53 size_t d_len, int n)
54 {
55 uint32_t bufsiz;
56 char *dest = d_start;
57
58 ASSERT(d_len >= sizeof (bufsiz));
59
60 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61 d_len - sizeof (bufsiz));
62
63 /* Signal an error if the compression routine returned zero. */
64 if (bufsiz == 0)
65 return (s_len);
66
67 /*
68 * The exact compressed size is needed by the decompression routine,
69 * so it is stored at the start of the buffer. Note that this may be
70 * less than the compressed block size, which is rounded up to a
71 * multiple of 1<<ashift.
72 */
73 *(uint32_t *)dest = BE_32(bufsiz);
74
75 return (bufsiz + sizeof (bufsiz));
76 }
77
78 /*ARGSUSED*/
79 int
lz4_decompress_zfs(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)80 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
81 size_t d_len, int n)
82 {
83 const char *src = s_start;
84 uint32_t bufsiz = BE_IN32(src);
85
86 /* invalid compressed buffer size encoded at start */
87 if (bufsiz + sizeof (bufsiz) > s_len)
88 return (1);
89
90 /*
91 * Returns 0 on success (decompression function returned non-negative)
92 * and non-zero on failure (decompression function returned negative).
93 */
94 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 d_start, bufsiz, d_len) < 0);
96 }
97
98 /*
99 * LZ4 API Description:
100 *
101 * Simple Functions:
102 * real_LZ4_compress() :
103 * isize : is the input size. Max supported value is ~1.9GB
104 * return : the number of bytes written in buffer dest
105 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106 * note : destination buffer must be already allocated.
107 * destination buffer must be sized to handle worst cases
108 * situations (input data not compressible) worst case size
109 * evaluation is provided by function LZ4_compressBound().
110 *
111 * real_LZ4_uncompress() :
112 * osize : is the output size, therefore the original size
113 * return : the number of bytes read in the source buffer.
114 * If the source stream is malformed, the function will stop
115 * decoding and return a negative result, indicating the byte
116 * position of the faulty instruction. This function never
117 * writes beyond dest + osize, and is therefore protected
118 * against malicious data packets.
119 * note : destination buffer must be already allocated
120 * note : real_LZ4_uncompress() is not used in ZFS so its code
121 * is not present here.
122 *
123 * Advanced Functions
124 *
125 * LZ4_compressBound() :
126 * Provides the maximum size that LZ4 may output in a "worst case"
127 * scenario (input data not compressible) primarily useful for memory
128 * allocation of output buffer.
129 *
130 * isize : is the input size. Max supported value is ~1.9GB
131 * return : maximum output size in a "worst case" scenario
132 * note : this function is limited by "int" range (2^31-1)
133 *
134 * LZ4_uncompress_unknownOutputSize() :
135 * isize : is the input size, therefore the compressed size
136 * maxOutputSize : is the size of the destination buffer (which must be
137 * already allocated)
138 * return : the number of bytes decoded in the destination buffer
139 * (necessarily <= maxOutputSize). If the source stream is
140 * malformed, the function will stop decoding and return a
141 * negative result, indicating the byte position of the faulty
142 * instruction. This function never writes beyond dest +
143 * maxOutputSize, and is therefore protected against malicious
144 * data packets.
145 * note : Destination buffer must be already allocated.
146 * This version is slightly slower than real_LZ4_uncompress()
147 *
148 * LZ4_compressCtx() :
149 * This function explicitly handles the CTX memory structure.
150 *
151 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
152 * by the caller (either on the stack or using kmem_cache_alloc). Passing
153 * NULL isn't valid.
154 *
155 * LZ4_compress64kCtx() :
156 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
157 * isize *Must* be <64KB, otherwise the output will be corrupted.
158 *
159 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
160 * by the caller (either on the stack or using kmem_cache_alloc). Passing
161 * NULL isn't valid.
162 */
163
164 /*
165 * Tuning parameters
166 */
167
168 /*
169 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
170 * Lowering this value reduces memory usage. Reduced memory usage
171 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
172 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
173 * (examples : 12 -> 16KB ; 17 -> 512KB)
174 */
175 #define COMPRESSIONLEVEL 12
176
177 /*
178 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
179 * algorithm skip faster data segments considered "incompressible".
180 * This may decrease compression ratio dramatically, but will be
181 * faster on incompressible data. Increasing this value will make
182 * the algorithm search more before declaring a segment "incompressible".
183 * This could improve compression a bit, but will be slower on
184 * incompressible data. The default value (6) is recommended.
185 */
186 #define NOTCOMPRESSIBLE_CONFIRMATION 6
187
188 /*
189 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
190 * performance for big endian cpu, but the resulting compressed stream
191 * will be incompatible with little-endian CPU. You can set this option
192 * to 1 in situations where data will stay within closed environment.
193 * This option is useless on Little_Endian CPU (such as x86).
194 */
195 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
196
197 /*
198 * CPU Feature Detection
199 */
200
201 /* 32 or 64 bits ? */
202 #if defined(_LP64)
203 #define LZ4_ARCH64 1
204 #else
205 #define LZ4_ARCH64 0
206 #endif
207
208 /*
209 * Little Endian or Big Endian?
210 * Note: overwrite the below #define if you know your architecture endianness.
211 */
212 #if defined(_ZFS_BIG_ENDIAN)
213 #define LZ4_BIG_ENDIAN 1
214 #else
215 /*
216 * Little Endian assumed. PDP Endian and other very rare endian format
217 * are unsupported.
218 */
219 #undef LZ4_BIG_ENDIAN
220 #endif
221
222 /*
223 * Unaligned memory access is automatically enabled for "common" CPU,
224 * such as x86. For others CPU, the compiler will be more cautious, and
225 * insert extra code to ensure aligned access is respected. If you know
226 * your target CPU supports unaligned memory access, you may want to
227 * force this option manually to improve performance
228 */
229 #if defined(__ARM_FEATURE_UNALIGNED)
230 #define LZ4_FORCE_UNALIGNED_ACCESS 1
231 #endif
232
233 /*
234 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
235 * kernel
236 * Linux : we can use GCC's __builtin_ctz family of builtins in the
237 * kernel
238 */
239 #undef LZ4_FORCE_SW_BITCOUNT
240 #if defined(__sparc)
241 #define LZ4_FORCE_SW_BITCOUNT
242 #endif
243
244 /*
245 * Compiler Options
246 */
247 /* Disable restrict */
248 #define restrict
249
250 /*
251 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
252 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
253 */
254 #ifdef GCC_VERSION
255 #undef GCC_VERSION
256 #endif
257
258 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
259
260 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
261 #define expect(expr, value) (__builtin_expect((expr), (value)))
262 #else
263 #define expect(expr, value) (expr)
264 #endif
265
266 #ifndef likely
267 #define likely(expr) expect((expr) != 0, 1)
268 #endif
269
270 #ifndef unlikely
271 #define unlikely(expr) expect((expr) != 0, 0)
272 #endif
273
274 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275 (((x) & 0xffu) << 8)))
276
277 /* Basic types */
278 #define BYTE uint8_t
279 #define U16 uint16_t
280 #define U32 uint32_t
281 #define S32 int32_t
282 #define U64 uint64_t
283
284 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
285 #pragma pack(1)
286 #endif
287
288 typedef struct _U16_S {
289 U16 v;
290 } U16_S;
291 typedef struct _U32_S {
292 U32 v;
293 } U32_S;
294 typedef struct _U64_S {
295 U64 v;
296 } U64_S;
297
298 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
299 #pragma pack()
300 #endif
301
302 #define A64(x) (((U64_S *)(x))->v)
303 #define A32(x) (((U32_S *)(x))->v)
304 #define A16(x) (((U16_S *)(x))->v)
305
306 /*
307 * Constants
308 */
309 #define MINMATCH 4
310
311 #define HASH_LOG COMPRESSIONLEVEL
312 #define HASHTABLESIZE (1 << HASH_LOG)
313 #define HASH_MASK (HASHTABLESIZE - 1)
314
315 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
316 NOTCOMPRESSIBLE_CONFIRMATION : 2)
317
318 #define COPYLENGTH 8
319 #define LASTLITERALS 5
320 #define MFLIMIT (COPYLENGTH + MINMATCH)
321 #define MINLENGTH (MFLIMIT + 1)
322
323 #define MAXD_LOG 16
324 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
325
326 #define ML_BITS 4
327 #define ML_MASK ((1U<<ML_BITS)-1)
328 #define RUN_BITS (8-ML_BITS)
329 #define RUN_MASK ((1U<<RUN_BITS)-1)
330
331
332 /*
333 * Architecture-specific macros
334 */
335 #if LZ4_ARCH64
336 #define STEPSIZE 8
337 #define UARCH U64
338 #define AARCH A64
339 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
340 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
341 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
342 #define HTYPE U32
343 #define INITBASE(base) const BYTE* const base = ip
344 #else /* !LZ4_ARCH64 */
345 #define STEPSIZE 4
346 #define UARCH U32
347 #define AARCH A32
348 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
349 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
350 #define LZ4_SECURECOPY LZ4_WILDCOPY
351 #define HTYPE const BYTE *
352 #define INITBASE(base) const int base = 0
353 #endif /* !LZ4_ARCH64 */
354
355 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
356 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
357 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
358 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
359 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
360 #else
361 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
362 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
363 #endif
364
365
366 /* Local structures */
367 struct refTables {
368 HTYPE hashTable[HASHTABLESIZE];
369 };
370
371
372 /* Macros */
373 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
374 HASH_LOG))
375 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
376 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
377 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
378 d = e; }
379
380
381 /* Private functions */
382 #if LZ4_ARCH64
383
384 static inline int
LZ4_NbCommonBytes(register U64 val)385 LZ4_NbCommonBytes(register U64 val)
386 {
387 #if defined(LZ4_BIG_ENDIAN)
388 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
389 !defined(LZ4_FORCE_SW_BITCOUNT)
390 return (__builtin_clzll(val) >> 3);
391 #else
392 int r;
393 if (!(val >> 32)) {
394 r = 4;
395 } else {
396 r = 0;
397 val >>= 32;
398 }
399 if (!(val >> 16)) {
400 r += 2;
401 val >>= 8;
402 } else {
403 val >>= 24;
404 }
405 r += (!val);
406 return (r);
407 #endif
408 #else
409 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
410 !defined(LZ4_FORCE_SW_BITCOUNT)
411 return (__builtin_ctzll(val) >> 3);
412 #else
413 static const int DeBruijnBytePos[64] =
414 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
415 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
416 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
417 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
418 };
419 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
420 58];
421 #endif
422 #endif
423 }
424
425 #else
426
427 static inline int
LZ4_NbCommonBytes(register U32 val)428 LZ4_NbCommonBytes(register U32 val)
429 {
430 #if defined(LZ4_BIG_ENDIAN)
431 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
432 !defined(LZ4_FORCE_SW_BITCOUNT)
433 return (__builtin_clz(val) >> 3);
434 #else
435 int r;
436 if (!(val >> 16)) {
437 r = 2;
438 val >>= 8;
439 } else {
440 r = 0;
441 val >>= 24;
442 }
443 r += (!val);
444 return (r);
445 #endif
446 #else
447 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
448 !defined(LZ4_FORCE_SW_BITCOUNT)
449 return (__builtin_ctz(val) >> 3);
450 #else
451 static const int DeBruijnBytePos[32] = {
452 0, 0, 3, 0, 3, 1, 3, 0,
453 3, 2, 2, 1, 3, 2, 0, 1,
454 3, 3, 1, 2, 2, 2, 2, 0,
455 3, 1, 2, 0, 1, 0, 1, 1
456 };
457 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
458 27];
459 #endif
460 #endif
461 }
462
463 #endif
464
465 /* Compression functions */
466
467 /*ARGSUSED*/
468 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)469 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
470 int osize)
471 {
472 struct refTables *srt = (struct refTables *)ctx;
473 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
474
475 const BYTE *ip = (BYTE *) source;
476 INITBASE(base);
477 const BYTE *anchor = ip;
478 const BYTE *const iend = ip + isize;
479 const BYTE *const oend = (BYTE *) dest + osize;
480 const BYTE *const mflimit = iend - MFLIMIT;
481 #define matchlimit (iend - LASTLITERALS)
482
483 BYTE *op = (BYTE *) dest;
484
485 int len, length;
486 const int skipStrength = SKIPSTRENGTH;
487 U32 forwardH;
488
489
490 /* Init */
491 if (isize < MINLENGTH)
492 goto _last_literals;
493
494 /* First Byte */
495 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
496 ip++;
497 forwardH = LZ4_HASH_VALUE(ip);
498
499 /* Main Loop */
500 for (;;) {
501 int findMatchAttempts = (1U << skipStrength) + 3;
502 const BYTE *forwardIp = ip;
503 const BYTE *ref;
504 BYTE *token;
505
506 /* Find a match */
507 do {
508 U32 h = forwardH;
509 int step = findMatchAttempts++ >> skipStrength;
510 ip = forwardIp;
511 forwardIp = ip + step;
512
513 if (unlikely(forwardIp > mflimit)) {
514 goto _last_literals;
515 }
516
517 forwardH = LZ4_HASH_VALUE(forwardIp);
518 ref = base + HashTable[h];
519 HashTable[h] = ip - base;
520
521 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
522
523 /* Catch up */
524 while ((ip > anchor) && (ref > (BYTE *) source) &&
525 unlikely(ip[-1] == ref[-1])) {
526 ip--;
527 ref--;
528 }
529
530 /* Encode Literal length */
531 length = ip - anchor;
532 token = op++;
533
534 /* Check output limit */
535 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
536 (length >> 8) > oend))
537 return (0);
538
539 if (length >= (int)RUN_MASK) {
540 *token = (RUN_MASK << ML_BITS);
541 len = length - RUN_MASK;
542 for (; len > 254; len -= 255)
543 *op++ = 255;
544 *op++ = (BYTE)len;
545 } else
546 *token = (length << ML_BITS);
547
548 /* Copy Literals */
549 LZ4_BLINDCOPY(anchor, op, length);
550
551 _next_match:
552 /* Encode Offset */
553 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
554
555 /* Start Counting */
556 ip += MINMATCH;
557 ref += MINMATCH; /* MinMatch verified */
558 anchor = ip;
559 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
560 UARCH diff = AARCH(ref) ^ AARCH(ip);
561 if (!diff) {
562 ip += STEPSIZE;
563 ref += STEPSIZE;
564 continue;
565 }
566 ip += LZ4_NbCommonBytes(diff);
567 goto _endCount;
568 }
569 #if LZ4_ARCH64
570 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
571 ip += 4;
572 ref += 4;
573 }
574 #endif
575 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
576 ip += 2;
577 ref += 2;
578 }
579 if ((ip < matchlimit) && (*ref == *ip))
580 ip++;
581 _endCount:
582
583 /* Encode MatchLength */
584 len = (ip - anchor);
585 /* Check output limit */
586 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
587 return (0);
588 if (len >= (int)ML_MASK) {
589 *token += ML_MASK;
590 len -= ML_MASK;
591 for (; len > 509; len -= 510) {
592 *op++ = 255;
593 *op++ = 255;
594 }
595 if (len > 254) {
596 len -= 255;
597 *op++ = 255;
598 }
599 *op++ = (BYTE)len;
600 } else
601 *token += len;
602
603 /* Test end of chunk */
604 if (ip > mflimit) {
605 anchor = ip;
606 break;
607 }
608 /* Fill table */
609 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
610
611 /* Test next position */
612 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
613 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
614 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
615 token = op++;
616 *token = 0;
617 goto _next_match;
618 }
619 /* Prepare next loop */
620 anchor = ip++;
621 forwardH = LZ4_HASH_VALUE(ip);
622 }
623
624 _last_literals:
625 /* Encode Last Literals */
626 {
627 int lastRun = iend - anchor;
628 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
629 oend)
630 return (0);
631 if (lastRun >= (int)RUN_MASK) {
632 *op++ = (RUN_MASK << ML_BITS);
633 lastRun -= RUN_MASK;
634 for (; lastRun > 254; lastRun -= 255) {
635 *op++ = 255;
636 }
637 *op++ = (BYTE)lastRun;
638 } else
639 *op++ = (lastRun << ML_BITS);
640 (void) memcpy(op, anchor, iend - anchor);
641 op += iend - anchor;
642 }
643
644 /* End */
645 return (int)(((char *)op) - dest);
646 }
647
648
649
650 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
651 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
652 #define HASHLOG64K (HASH_LOG + 1)
653 #define HASH64KTABLESIZE (1U << HASHLOG64K)
654 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
655 HASHLOG64K))
656 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
657
658 /*ARGSUSED*/
659 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)660 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
661 int osize)
662 {
663 struct refTables *srt = (struct refTables *)ctx;
664 U16 *HashTable = (U16 *) (srt->hashTable);
665
666 const BYTE *ip = (BYTE *) source;
667 const BYTE *anchor = ip;
668 const BYTE *const base = ip;
669 const BYTE *const iend = ip + isize;
670 const BYTE *const oend = (BYTE *) dest + osize;
671 const BYTE *const mflimit = iend - MFLIMIT;
672 #define matchlimit (iend - LASTLITERALS)
673
674 BYTE *op = (BYTE *) dest;
675
676 int len, length;
677 const int skipStrength = SKIPSTRENGTH;
678 U32 forwardH;
679
680 /* Init */
681 if (isize < MINLENGTH)
682 goto _last_literals;
683
684 /* First Byte */
685 ip++;
686 forwardH = LZ4_HASH64K_VALUE(ip);
687
688 /* Main Loop */
689 for (;;) {
690 int findMatchAttempts = (1U << skipStrength) + 3;
691 const BYTE *forwardIp = ip;
692 const BYTE *ref;
693 BYTE *token;
694
695 /* Find a match */
696 do {
697 U32 h = forwardH;
698 int step = findMatchAttempts++ >> skipStrength;
699 ip = forwardIp;
700 forwardIp = ip + step;
701
702 if (forwardIp > mflimit) {
703 goto _last_literals;
704 }
705
706 forwardH = LZ4_HASH64K_VALUE(forwardIp);
707 ref = base + HashTable[h];
708 HashTable[h] = ip - base;
709
710 } while (A32(ref) != A32(ip));
711
712 /* Catch up */
713 while ((ip > anchor) && (ref > (BYTE *) source) &&
714 (ip[-1] == ref[-1])) {
715 ip--;
716 ref--;
717 }
718
719 /* Encode Literal length */
720 length = ip - anchor;
721 token = op++;
722
723 /* Check output limit */
724 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
725 (length >> 8) > oend))
726 return (0);
727
728 if (length >= (int)RUN_MASK) {
729 *token = (RUN_MASK << ML_BITS);
730 len = length - RUN_MASK;
731 for (; len > 254; len -= 255)
732 *op++ = 255;
733 *op++ = (BYTE)len;
734 } else
735 *token = (length << ML_BITS);
736
737 /* Copy Literals */
738 LZ4_BLINDCOPY(anchor, op, length);
739
740 _next_match:
741 /* Encode Offset */
742 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
743
744 /* Start Counting */
745 ip += MINMATCH;
746 ref += MINMATCH; /* MinMatch verified */
747 anchor = ip;
748 while (ip < matchlimit - (STEPSIZE - 1)) {
749 UARCH diff = AARCH(ref) ^ AARCH(ip);
750 if (!diff) {
751 ip += STEPSIZE;
752 ref += STEPSIZE;
753 continue;
754 }
755 ip += LZ4_NbCommonBytes(diff);
756 goto _endCount;
757 }
758 #if LZ4_ARCH64
759 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
760 ip += 4;
761 ref += 4;
762 }
763 #endif
764 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
765 ip += 2;
766 ref += 2;
767 }
768 if ((ip < matchlimit) && (*ref == *ip))
769 ip++;
770 _endCount:
771
772 /* Encode MatchLength */
773 len = (ip - anchor);
774 /* Check output limit */
775 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
776 return (0);
777 if (len >= (int)ML_MASK) {
778 *token += ML_MASK;
779 len -= ML_MASK;
780 for (; len > 509; len -= 510) {
781 *op++ = 255;
782 *op++ = 255;
783 }
784 if (len > 254) {
785 len -= 255;
786 *op++ = 255;
787 }
788 *op++ = (BYTE)len;
789 } else
790 *token += len;
791
792 /* Test end of chunk */
793 if (ip > mflimit) {
794 anchor = ip;
795 break;
796 }
797 /* Fill table */
798 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
799
800 /* Test next position */
801 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
802 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
803 if (A32(ref) == A32(ip)) {
804 token = op++;
805 *token = 0;
806 goto _next_match;
807 }
808 /* Prepare next loop */
809 anchor = ip++;
810 forwardH = LZ4_HASH64K_VALUE(ip);
811 }
812
813 _last_literals:
814 /* Encode Last Literals */
815 {
816 int lastRun = iend - anchor;
817 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
818 oend)
819 return (0);
820 if (lastRun >= (int)RUN_MASK) {
821 *op++ = (RUN_MASK << ML_BITS);
822 lastRun -= RUN_MASK;
823 for (; lastRun > 254; lastRun -= 255)
824 *op++ = 255;
825 *op++ = (BYTE)lastRun;
826 } else
827 *op++ = (lastRun << ML_BITS);
828 (void) memcpy(op, anchor, iend - anchor);
829 op += iend - anchor;
830 }
831
832 /* End */
833 return (int)(((char *)op) - dest);
834 }
835
836 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)837 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
838 {
839 void *ctx;
840 int result;
841
842 ctx = lz4_alloc(KM_SLEEP);
843
844 /*
845 * out of kernel memory, gently fall through - this will disable
846 * compression in zio_compress_data
847 */
848 if (ctx == NULL)
849 return (0);
850
851 memset(ctx, 0, sizeof (struct refTables));
852
853 if (isize < LZ4_64KLIMIT)
854 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
855 else
856 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
857
858 lz4_free(ctx);
859 return (result);
860 }
861
862 /* Decompression functions */
863
864 /*
865 * Note: The decoding functions real_LZ4_uncompress() and
866 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
867 * attack type. They will never write nor read outside of the provided
868 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
869 * it will never read outside of the input buffer. A corrupted input
870 * will produce an error result, a negative int, indicating the position
871 * of the error within input stream.
872 *
873 * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
874 * its code is not present here.
875 */
876
877 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
878 #if LZ4_ARCH64
879 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
880 #endif
881
882 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884 int maxOutputSize)
885 {
886 /* Local Variables */
887 const BYTE *restrict ip = (const BYTE *) source;
888 const BYTE *const iend = ip + isize;
889 const BYTE *ref;
890
891 BYTE *op = (BYTE *) dest;
892 BYTE *const oend = op + maxOutputSize;
893 BYTE *cpy;
894
895 /* Main Loop */
896 while (ip < iend) {
897 unsigned token;
898 size_t length;
899
900 /* get runlength */
901 token = *ip++;
902 if ((length = (token >> ML_BITS)) == RUN_MASK) {
903 int s = 255;
904 while ((ip < iend) && (s == 255)) {
905 s = *ip++;
906 if (unlikely(length > (size_t)(length + s)))
907 goto _output_error;
908 length += s;
909 }
910 }
911 /* copy literals */
912 cpy = op + length;
913 /* CORNER-CASE: cpy might overflow. */
914 if (cpy < op)
915 goto _output_error; /* cpy was overflowed, bail! */
916 if ((cpy > oend - COPYLENGTH) ||
917 (ip + length > iend - COPYLENGTH)) {
918 if (cpy > oend)
919 /* Error: writes beyond output buffer */
920 goto _output_error;
921 if (ip + length != iend)
922 /*
923 * Error: LZ4 format requires to consume all
924 * input at this stage
925 */
926 goto _output_error;
927 (void) memcpy(op, ip, length);
928 op += length;
929 /* Necessarily EOF, due to parsing restrictions */
930 break;
931 }
932 LZ4_WILDCOPY(ip, op, cpy);
933 ip -= (op - cpy);
934 op = cpy;
935
936 /* get offset */
937 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938 ip += 2;
939 if (ref < (BYTE * const) dest)
940 /*
941 * Error: offset creates reference outside of
942 * destination buffer
943 */
944 goto _output_error;
945
946 /* get matchlength */
947 if ((length = (token & ML_MASK)) == ML_MASK) {
948 while (ip < iend) {
949 int s = *ip++;
950 if (unlikely(length > (size_t)(length + s)))
951 goto _output_error;
952 length += s;
953 if (s == 255)
954 continue;
955 break;
956 }
957 }
958 /* copy repeated sequence */
959 if (unlikely(op - ref < STEPSIZE)) {
960 #if LZ4_ARCH64
961 int dec64 = dec64table[op - ref];
962 #else
963 const int dec64 = 0;
964 #endif
965 op[0] = ref[0];
966 op[1] = ref[1];
967 op[2] = ref[2];
968 op[3] = ref[3];
969 op += 4;
970 ref += 4;
971 ref -= dec32table[op - ref];
972 A32(op) = A32(ref);
973 op += STEPSIZE - 4;
974 ref -= dec64;
975 } else {
976 LZ4_COPYSTEP(ref, op);
977 }
978 cpy = op + length - (STEPSIZE - 4);
979 if (cpy > oend - COPYLENGTH) {
980 if (cpy > oend)
981 /*
982 * Error: request to write outside of
983 * destination buffer
984 */
985 goto _output_error;
986 #if LZ4_ARCH64
987 if ((ref + COPYLENGTH) > oend)
988 #else
989 if ((ref + COPYLENGTH) > oend ||
990 (op + COPYLENGTH) > oend)
991 #endif
992 goto _output_error;
993 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
994 while (op < cpy)
995 *op++ = *ref++;
996 op = cpy;
997 if (op == oend)
998 /*
999 * Check EOF (should never happen, since
1000 * last 5 bytes are supposed to be literals)
1001 */
1002 goto _output_error;
1003 continue;
1004 }
1005 LZ4_SECURECOPY(ref, op, cpy);
1006 op = cpy; /* correction */
1007 }
1008
1009 /* end of decoding */
1010 return (int)(((char *)op) - dest);
1011
1012 /* write overflow error detected */
1013 _output_error:
1014 return (-1);
1015 }
1016
1017 #ifdef __FreeBSD__
1018 /*
1019 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
1020 * Should struct refTables get resized this may need to be revisited, hence
1021 * compiler-time asserts.
1022 */
1023 _Static_assert(sizeof(struct refTables) <= 16384,
1024 "refTables too big for malloc");
1025 _Static_assert((sizeof(struct refTables) % 4096) == 0,
1026 "refTables not a multiple of page size");
1027 #else
1028 #define ZFS_LZ4_USE_CACHE
1029 #endif
1030
1031 #ifdef ZFS_LZ4_USE_CACHE
1032 static kmem_cache_t *lz4_cache;
1033
1034 void
lz4_init(void)1035 lz4_init(void)
1036 {
1037 lz4_cache = kmem_cache_create("lz4_cache",
1038 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1039 }
1040
1041 void
lz4_fini(void)1042 lz4_fini(void)
1043 {
1044 if (lz4_cache) {
1045 kmem_cache_destroy(lz4_cache);
1046 lz4_cache = NULL;
1047 }
1048 }
1049
1050 static void *
lz4_alloc(int flags)1051 lz4_alloc(int flags)
1052 {
1053 ASSERT(lz4_cache != NULL);
1054 return (kmem_cache_alloc(lz4_cache, flags));
1055 }
1056
1057 static void
lz4_free(void * ctx)1058 lz4_free(void *ctx)
1059 {
1060 kmem_cache_free(lz4_cache, ctx);
1061 }
1062 #else
1063 void
lz4_init(void)1064 lz4_init(void)
1065 {
1066 }
1067
1068 void
lz4_fini(void)1069 lz4_fini(void)
1070 {
1071 }
1072
1073 static void *
lz4_alloc(int flags)1074 lz4_alloc(int flags)
1075 {
1076 return (kmem_alloc(sizeof (struct refTables), flags));
1077 }
1078
1079 static void
lz4_free(void * ctx)1080 lz4_free(void *ctx)
1081 {
1082 kmem_free(ctx, sizeof (struct refTables));
1083 }
1084 #endif
1085