1 /*
2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
31 */
32 #include "lz4.h"
33 #include "WKdm_new.h"
34 #include <vm/vm_compressor_algorithms_internal.h>
35 #include <vm/vm_compressor_internal.h>
36
37 #define MZV_MAGIC (17185)
38 #if defined(__arm64__)
39 #include <arm64/proc_reg.h>
40 #endif
41
42 #define LZ4_SCRATCH_ALIGN (64)
43 #define WKC_SCRATCH_ALIGN (64)
44
45 #define LZ4_SCRATCH_ALIGN (64)
46 #define WKC_SCRATCH_ALIGN (64)
47
48 typedef union {
49 uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN)));
50 uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO
51 } compressor_encode_scratch_t;
52
53 typedef union {
54 uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64)));
55 uint8_t wkdecompscratch[0] __attribute((aligned(64)));
56 } compressor_decode_scratch_t;
57
58 typedef struct {
59 uint16_t lz4_selection_run;
60 uint16_t lz4_run_length;
61 uint16_t lz4_preselects;
62 uint32_t lz4_total_preselects;
63 uint16_t lz4_failure_skips;
64 uint32_t lz4_total_failure_skips;
65 uint16_t lz4_failure_run_length;
66 uint16_t lz4_total_unprofitables;
67 uint32_t lz4_total_negatives;
68 uint32_t lz4_total_failures;
69 } compressor_state_t;
70
71 compressor_tuneables_t vmctune = {
72 .lz4_threshold = 2048,
73 .wkdm_reeval_threshold = 1536,
74 .lz4_max_failure_skips = 0,
75 .lz4_max_failure_run_length = ~0U,
76 .lz4_max_preselects = 0,
77 .lz4_run_preselection_threshold = ~0U,
78 .lz4_run_continue_bytes = 0,
79 .lz4_profitable_bytes = 0,
80 };
81
82 compressor_state_t vmcstate = {
83 .lz4_selection_run = 0,
84 .lz4_run_length = 0,
85 .lz4_preselects = 0,
86 .lz4_total_preselects = 0,
87 .lz4_failure_skips = 0,
88 .lz4_total_failure_skips = 0,
89 .lz4_failure_run_length = 0,
90 .lz4_total_unprofitables = 0,
91 .lz4_total_negatives = 0,
92 };
93
94 compressor_stats_t compressor_stats;
95
96 enum compressor_preselect_t {
97 CPRESELLZ4 = 0,
98 CSKIPLZ4 = 1,
99 CPRESELWK = 2,
100 };
101
102 /* changeable via sysctl */
103 vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC;
104
105 boolean_t vm_compressor_force_sw_wkdm = FALSE;
106
107 boolean_t verbose = FALSE;
108
109 #define VMDBGSTAT (DEBUG)
110 #if VMDBGSTATS
111 #define VM_COMPRESSOR_STAT_DBG(x...) \
112 do { \
113 (x); \
114 } while(0)
115 #else
116 #define VM_COMPRESSOR_STAT_DBG(x...) \
117 do { \
118 } while (0)
119 #endif
120
121 #define VMCSTATS (DEVELOPMENT || DEBUG)
122 #if VMCSTATS
123 #define VM_COMPRESSOR_STAT(x...) \
124 do { \
125 (x); \
126 } while(0)
127 //TODO make atomic where needed, decompression paths
128 #define VM_DECOMPRESSOR_STAT(x...) \
129 do { \
130 (x); \
131 } while(0)
132 #else
133 #define VM_COMPRESSOR_STAT(x...) \
134 do { \
135 }while (0)
136 #define VM_DECOMPRESSOR_STAT(x...) \
137 do { \
138 }while (0)
139 #endif
140
141 static inline enum compressor_preselect_t
compressor_preselect(void)142 compressor_preselect(void)
143 {
144 if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) {
145 vmcstate.lz4_failure_skips = 0;
146 vmcstate.lz4_failure_run_length = 0;
147 }
148
149 if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) {
150 vmcstate.lz4_failure_skips++;
151 vmcstate.lz4_total_failure_skips++;
152 return CSKIPLZ4;
153 }
154
155 if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) {
156 vmcstate.lz4_preselects = 0;
157 return CPRESELWK;
158 }
159
160 if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) {
161 vmcstate.lz4_preselects++;
162 vmcstate.lz4_total_preselects++;
163 return CPRESELLZ4;
164 }
165 return CPRESELWK;
166 }
167
168 static inline void
compressor_selector_update(int lz4sz,int didwk,int wksz)169 compressor_selector_update(int lz4sz, int didwk, int wksz)
170 {
171 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++);
172
173 if (lz4sz == 0) {
174 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += PAGE_SIZE);
175 VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++);
176 vmcstate.lz4_failure_run_length++;
177 VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++);
178 vmcstate.lz4_run_length = 0;
179 } else {
180 vmcstate.lz4_failure_run_length = 0;
181
182 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += lz4sz);
183
184 if (lz4sz <= vmctune.wkdm_reeval_threshold) {
185 vmcstate.lz4_run_length = 0;
186 } else {
187 if (!didwk) {
188 vmcstate.lz4_run_length++;
189 }
190 }
191
192 if (didwk) {
193 if (__probable(wksz > lz4sz)) {
194 uint32_t lz4delta = wksz - lz4sz;
195 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta += lz4delta);
196 if (lz4delta >= vmctune.lz4_run_continue_bytes) {
197 vmcstate.lz4_run_length++;
198 } else if (lz4delta <= vmctune.lz4_profitable_bytes) {
199 vmcstate.lz4_failure_run_length++;
200 VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++);
201 vmcstate.lz4_run_length = 0;
202 } else {
203 vmcstate.lz4_run_length = 0;
204 }
205 } else {
206 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta += (lz4sz - wksz));
207 vmcstate.lz4_failure_run_length++;
208 VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++);
209 vmcstate.lz4_run_length = 0;
210 }
211 }
212 }
213 }
214
215
216 static inline void
WKdm_hv(uint32_t * wkbuf)217 WKdm_hv(uint32_t *wkbuf)
218 {
219 #if DEVELOPMENT || DEBUG
220 uint32_t *inw = (uint32_t *) wkbuf;
221 if (*inw != MZV_MAGIC) {
222 if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) {
223 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x", wkbuf, *inw, *(inw + 1), *(inw + 2));
224 }
225 }
226 #else /* DEVELOPMENT || DEBUG */
227 (void) wkbuf;
228 #endif
229 }
230
231 //todo fix clang diagnostic
232 #pragma clang diagnostic push
233 #pragma clang diagnostic ignored "-Wincompatible-pointer-types"
234
235 #if defined(__arm64__)
236 #endif
237
238 static inline bool
WKdmD(WK_word * src_buf,WK_word * dest_buf,WK_word * scratch,unsigned int bytes,__unused uint32_t * pop_count)239 WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes,
240 __unused uint32_t *pop_count)
241 {
242 #if defined(__arm64__)
243 #endif
244 WKdm_hv(src_buf);
245 #if defined(__arm64__)
246 #ifndef __ARM_16K_PG__
247 if (PAGE_SIZE == 4096) {
248 WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes);
249 } else
250 #endif /* !____ARM_16K_PG__ */
251 {
252 __unused uint64_t wdsstart;
253
254 VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time());
255 WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes);
256
257 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart);
258 VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++);
259 }
260 #else /* !defined arm64 */
261 WKdm_decompress_new(src_buf, dest_buf, scratch, bytes);
262 #endif
263 return true;
264 }
265 #if DEVELOPMENT || DEBUG
266 int precompy, wkswhw;
267 #endif
268
269 static inline int
WKdmC(WK_word * src_buf,WK_word * dest_buf,WK_word * scratch,boolean_t * incomp_copy,unsigned int limit,__unused uint32_t * pop_count)270 WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch,
271 boolean_t *incomp_copy, unsigned int limit, __unused uint32_t *pop_count)
272 {
273 (void)incomp_copy;
274 int wkcval;
275 #if defined(__arm64__)
276 #ifndef __ARM_16K_PG__
277 if (PAGE_SIZE == 4096) {
278 wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit);
279 } else
280 #endif /* !____ARM_16K_PG__ */
281 {
282 __unused uint64_t wcswstart;
283
284 VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time());
285
286 int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit);
287
288 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart);
289 VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++);
290 wkcval = wkswsz;
291 }
292 #else
293 wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit);
294 #endif
295 return wkcval;
296 }
297
298
299 int
metacompressor(const uint8_t * in,uint8_t * cdst,int32_t outbufsz,uint16_t * codec,void * cscratchin,boolean_t * incomp_copy,uint32_t * pop_count_p)300 metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec,
301 void *cscratchin, boolean_t *incomp_copy, uint32_t *pop_count_p)
302 {
303 int sz = -1;
304 int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE;
305 int insize = PAGE_SIZE;
306 compressor_encode_scratch_t *cscratch = cscratchin;
307 /* Not all paths lead to an inline population count. */
308 uint32_t pop_count = C_SLOT_NO_POPCOUNT;
309
310 if (vm_compressor_current_codec == CMODE_WK) {
311 dowk = TRUE;
312 } else if (vm_compressor_current_codec == CMODE_LZ4) {
313 dolz4 = TRUE;
314 } else if (vm_compressor_current_codec == CMODE_HYB) {
315 enum compressor_preselect_t presel = compressor_preselect();
316 if (presel == CPRESELLZ4) {
317 dolz4 = TRUE;
318 goto lz4compress;
319 } else if (presel == CSKIPLZ4) {
320 dowk = TRUE;
321 skiplz4 = TRUE;
322 } else {
323 assert(presel == CPRESELWK);
324 dowk = TRUE;
325 }
326 }
327
328 if (dowk) {
329 *codec = CCWK;
330 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++);
331 sz = WKdmC(in, cdst, &cscratch->wkscratch[0], incomp_copy, outbufsz, &pop_count);
332
333 if (sz == -1) {
334 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += PAGE_SIZE);
335 VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++);
336
337 if (vm_compressor_current_codec == CMODE_HYB) {
338 goto lz4eval;
339 }
340 goto cexit;
341 } else if (sz == 0) {
342 VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++);
343 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += 4);
344 } else {
345 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += sz);
346 }
347 }
348 lz4eval:
349 if (vm_compressor_current_codec == CMODE_HYB) {
350 if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) {
351 dolz4 = TRUE;
352 } else {
353 #if DEVELOPMENT || DEBUG
354 int wkc = (sz == -1) ? PAGE_SIZE : sz;
355 #endif
356 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++);
357 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive += wkc);
358 goto cexit;
359 }
360 }
361
362 lz4compress:
363
364 if (dolz4) {
365 if (sz == -1) {
366 sz = PAGE_SIZE;
367 }
368 int wksz = sz;
369 *codec = CCLZ4;
370
371 sz = (int) lz4raw_encode_buffer(cdst, outbufsz, in, insize, &cscratch->lz4state[0]);
372
373 compressor_selector_update(sz, dowk, wksz);
374 if (sz == 0) {
375 sz = -1;
376 goto cexit;
377 }
378 }
379 cexit:
380 assert(pop_count_p != NULL);
381 *pop_count_p = pop_count;
382 return sz;
383 }
384
385 bool
metadecompressor(const uint8_t * source,uint8_t * dest,uint32_t csize,uint16_t ccodec,void * compressor_dscratchin,uint32_t * pop_count_p)386 metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize,
387 uint16_t ccodec, void *compressor_dscratchin, uint32_t *pop_count_p)
388 {
389 int dolz4 = (ccodec == CCLZ4);
390 int rval;
391 compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin;
392 /* Not all paths lead to an inline population count. */
393 uint32_t pop_count = C_SLOT_NO_POPCOUNT;
394 bool success;
395
396 if (dolz4) {
397 rval = (int)lz4raw_decode_buffer(dest, PAGE_SIZE, source, csize, &compressor_dscratch->lz4decodestate[0]);
398 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions += 1);
399 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes += csize);
400
401 __assert_only uint32_t *d32 = dest;
402 assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
403 rval, *d32, *(d32 + 1), *(d32 + 2));
404 success = (rval == PAGE_SIZE);
405 } else {
406 assert(ccodec == CCWK);
407
408 success = WKdmD(source, dest, &compressor_dscratch->wkdecompscratch[0], csize, &pop_count);
409
410 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions += 1);
411 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes += csize);
412 }
413
414 assert(pop_count_p != NULL);
415 *pop_count_p = pop_count;
416 return success;
417 }
418 #pragma clang diagnostic pop
419
420 uint32_t
vm_compressor_get_encode_scratch_size(void)421 vm_compressor_get_encode_scratch_size(void)
422 {
423 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
424 return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
425 } else {
426 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
427 }
428 }
429
430 uint32_t
vm_compressor_get_decode_scratch_size(void)431 vm_compressor_get_decode_scratch_size(void)
432 {
433 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
434 return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
435 } else {
436 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
437 }
438 }
439
440
441 int
vm_compressor_algorithm(void)442 vm_compressor_algorithm(void)
443 {
444 return vm_compressor_current_codec;
445 }
446
447 void
vm_compressor_algorithm_init(void)448 vm_compressor_algorithm_init(void)
449 {
450 vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
451
452 #if defined(__arm64__)
453 new_codec = CMODE_HYB;
454
455 if (PAGE_SIZE == 16384) {
456 vmctune.lz4_threshold = 12288;
457 }
458 #endif
459
460 PE_parse_boot_argn("vm_compressor_codec", &new_codec, sizeof(new_codec));
461 assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) ||
462 (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)),
463 "Invalid VM compression codec: %u", new_codec);
464
465 #if defined(__arm64__)
466 uint32_t tmpc;
467 if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc, sizeof(tmpc))) {
468 new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
469 } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc, sizeof(tmpc))) {
470 new_codec = CMODE_HYB;
471 }
472
473 vm_compressor_current_codec = new_codec;
474 #endif /* arm/arm64 */
475 }
476