1 #![cfg(not(miri))]
2 
3 use super::REALLOC_AND_FREE;
4 use wasmtime::Result;
5 use wasmtime::component::{Component, Linker};
6 use wasmtime::{Config, Engine, PoolingAllocationConfig, Store, StoreContextMut, Trap};
7 
8 const UTF16_TAG: u32 = 1 << 31;
9 
10 // Special cases that this tries to test:
11 //
12 // * utf8 -> utf8
13 //    * various code point sizes
14 //
15 // * utf8 -> utf16 - the adapter here will make a pessimistic allocation that's
16 //   twice the size of the utf8 encoding for the utf16 destination
17 //    * utf16 byte size is twice the utf8 size
18 //    * utf16 byte size is less than twice the utf8 size
19 //
20 // * utf8 -> latin1+utf16 - attempts to convert to latin1 then falls back to a
21 //   pessimistic utf16 allocation that's downsized if necessary
22 //    * utf8 fits exactly in latin1
23 //    * utf8 fits latin1 but is bigger byte-wise
24 //    * utf8 is not latin1 and fits utf16 allocation precisely (NOT POSSIBLE)
25 //    * utf8 is not latin1 and utf16 is smaller than allocation
26 //
27 // * utf16 -> utf8 - this starts with an optimistic size and then reallocates to
28 //   a pessimistic size, interesting cases are:
29 //    * utf8 size is 0.5x the utf16 byte size (perfect fit in initial alloc)
30 //    * utf8 size is 1.5x the utf16 byte size (perfect fit in larger alloc)
31 //    * utf8 size is 0.5x-1.5x the utf16 size (larger alloc is downsized)
32 //
33 // * utf16 -> utf16
34 //    * various code point sizes
35 //
36 // * utf16 -> latin1+utf16 - attempts to convert to latin1 then falls back to a
37 //   pessimistic utf16 allocation that's downsized if necessary
38 //    * utf16 fits exactly in latin1
39 //    * utf16 fits latin1 but is bigger byte-wise (NOT POSSIBLE)
40 //    * utf16 is not latin1 and fits utf16 allocation precisely
41 //    * utf16 is not latin1 and utf16 is smaller than allocation (NOT POSSIBLE)
42 //
43 // * compact-utf16 -> utf8 dynamically determines between one of
44 //    * latin1 -> utf8
45 //      * latin1 size matches utf8 size
46 //      * latin1 is smaller than utf8 size
47 //    * utf16 -> utf8
48 //      * covered above
49 //
50 // * compact-utf16 -> utf16 dynamically determines between one of
51 //    * latin1 -> utf16 - latin1 size always matches utf16
52 //      * test various code points
53 //    * utf16 -> utf16
54 //      * covered above
55 //
56 // * compact-utf16 -> compact-utf16 dynamically determines between one of
57 //    * latin1 -> latin1
58 //      * not much interesting here
59 //    * utf16 -> compact-utf16-to-compact-probably-utf16
60 //      * utf16 actually fits within latin1
61 //      * otherwise not more interesting than utf16 -> utf16
62 //
63 const STRINGS: &[&str] = &[
64     "",
65     // 1 byte in utf8, 2 bytes in utf16
66     "x",
67     "hello this is a particularly long string yes it is it keeps going",
68     // 35 bytes in utf8, 23 units in utf16, 23 bytes in latin1
69     "à á â ã ä å æ ç è é ê ë",
70     // 47 bytes in utf8, 31 units in utf16
71     "Ξ Ο Π Ρ Σ Τ Υ Φ Χ Ψ Ω Ϊ Ϋ ά έ ή",
72     // 24 bytes in utf8, 8 units in utf16
73     "STUVWXYZ",
74     // 16 bytes in utf8, 8 units in utf16
75     "ËÌÍÎÏÐÑÒ",
76     // 4 bytes in utf8, 1 unit in utf16
77     "\u{10000}",
78     // latin1-compatible prefix followed by utf8/16-requiring suffix
79     //
80     // 24 bytes in utf8, 13 units in utf16, first 8 usvs are latin1-compatible
81     "à ascii VWXYZ",
82 ];
83 
84 static ENCODINGS: [&str; 3] = ["utf8", "utf16", "latin1+utf16"];
85 
86 #[test]
roundtrip() -> Result<()>87 fn roundtrip() -> Result<()> {
88     for debug in [true, false] {
89         let mut config = wasmtime_test_util::component::config();
90         config.debug_adapter_modules(debug);
91         let engine = Engine::new(&config)?;
92         for src in ENCODINGS {
93             for dst in ENCODINGS {
94                 test_roundtrip(&engine, src, dst)?;
95             }
96         }
97     }
98     Ok(())
99 }
100 
test_roundtrip(engine: &Engine, src: &str, dst: &str) -> Result<()>101 fn test_roundtrip(engine: &Engine, src: &str, dst: &str) -> Result<()> {
102     println!("src={src} dst={dst}");
103 
104     let mk_echo = |name: &str, encoding: &str| {
105         format!(
106             r#"
107 (component {name}
108     (import "echo" (func $echo (param "a" string) (result string)))
109     (core instance $libc (instantiate $libc))
110     (core func $echo (canon lower (func $echo)
111         (memory $libc "memory")
112         (realloc (func $libc "realloc"))
113         string-encoding={encoding}
114     ))
115     (core instance $echo (instantiate $echo
116         (with "libc" (instance $libc))
117         (with "" (instance (export "echo" (func $echo))))
118     ))
119     (func (export "echo2") (param "a" string) (result string)
120         (canon lift
121             (core func $echo "echo")
122             (memory $libc "memory")
123             (realloc (func $libc "realloc"))
124             string-encoding={encoding}
125         )
126     )
127 )
128             "#
129         )
130     };
131 
132     let src = mk_echo("$src", src);
133     let dst = mk_echo("$dst", dst);
134     let component = format!(
135         r#"
136 (component
137     (import "host" (func $host (param "a" string) (result string)))
138 
139     (core module $libc
140         (memory (export "memory") 1)
141         {REALLOC_AND_FREE}
142     )
143     (core module $echo
144         (import "" "echo" (func $echo (param i32 i32 i32)))
145         (import "libc" "memory" (memory 0))
146         (import "libc" "realloc" (func $realloc (param i32 i32 i32 i32) (result i32)))
147 
148         (func (export "echo") (param i32 i32) (result i32)
149             (local $retptr i32)
150             (local.set $retptr
151                 (call $realloc
152                     (i32.const 0)
153                     (i32.const 0)
154                     (i32.const 4)
155                     (i32.const 8)))
156             (call $echo
157                 (local.get 0)
158                 (local.get 1)
159                 (local.get $retptr))
160             local.get $retptr
161         )
162     )
163 
164     {src}
165     {dst}
166 
167     (instance $dst (instantiate $dst (with "echo" (func $host))))
168     (instance $src (instantiate $src (with "echo" (func $dst "echo2"))))
169     (export "echo" (func $src "echo2"))
170 )
171 "#
172     );
173     let component = Component::new(engine, &component)?;
174     let mut store = Store::new(engine, String::new());
175     let mut linker = Linker::new(engine);
176     linker.root().func_wrap(
177         "host",
178         |store: StoreContextMut<String>, (arg,): (String,)| {
179             assert_eq!(*store.data(), arg);
180             Ok((arg,))
181         },
182     )?;
183     let instance = linker.instantiate(&mut store, &component)?;
184     let func = instance.get_typed_func::<(String,), (String,)>(&mut store, "echo")?;
185 
186     for string in STRINGS {
187         println!("testing string {string:?}");
188         *store.data_mut() = string.to_string();
189         let (ret,) = func.call(&mut store, (string.to_string(),))?;
190         assert_eq!(ret, *string);
191     }
192     Ok(())
193 }
194 
195 #[test]
ptr_out_of_bounds() -> Result<()>196 fn ptr_out_of_bounds() -> Result<()> {
197     let engine = wasmtime_test_util::component::engine();
198     for src in ENCODINGS {
199         for dst in ENCODINGS {
200             test_ptr_out_of_bounds(&engine, src, dst)?;
201         }
202     }
203     Ok(())
204 }
205 
test_ptr_out_of_bounds(engine: &Engine, src: &str, dst: &str) -> Result<()>206 fn test_ptr_out_of_bounds(engine: &Engine, src: &str, dst: &str) -> Result<()> {
207     let test = |len: u32| -> Result<()> {
208         let component = format!(
209             r#"
210 (component
211   (component $c
212     (core module $m
213       (func (export "") (param i32 i32))
214       (func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
215       (memory (export "memory") 1)
216     )
217     (core instance $m (instantiate $m))
218     (func (export "a") (param "a" string)
219       (canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
220         string-encoding={dst})
221     )
222   )
223 
224   (component $c2
225     (import "a" (func $f (param "a" string)))
226     (core module $libc
227       (memory (export "memory") 1)
228     )
229     (core instance $libc (instantiate $libc))
230     (core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
231     (core module $m
232       (import "" "" (func $f (param i32 i32)))
233 
234       (func $start (call $f (i32.const 0x8000_0000) (i32.const {len})))
235       (start $start)
236     )
237     (core instance (instantiate $m (with "" (instance (export "" (func $f))))))
238   )
239 
240   (instance $c (instantiate $c))
241   (instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
242 )
243 "#
244         );
245         let component = Component::new(engine, &component)?;
246         let mut store = Store::new(engine, ());
247         let trap = Linker::new(engine)
248             .instantiate(&mut store, &component)
249             .err()
250             .unwrap()
251             .downcast::<Trap>()?;
252         assert_eq!(trap, Trap::StringOutOfBounds);
253         Ok(())
254     };
255 
256     test(0)?;
257     test(1)?;
258 
259     Ok(())
260 }
261 
262 // Test that even if the ptr+len calculation overflows then a trap still
263 // happens.
264 #[test]
ptr_overflow() -> Result<()>265 fn ptr_overflow() -> Result<()> {
266     let engine = wasmtime_test_util::component::engine();
267     for src in ENCODINGS {
268         for dst in ENCODINGS {
269             test_ptr_overflow(&engine, src, dst)?;
270         }
271     }
272     Ok(())
273 }
274 
test_ptr_overflow(engine: &Engine, src: &str, dst: &str) -> Result<()>275 fn test_ptr_overflow(engine: &Engine, src: &str, dst: &str) -> Result<()> {
276     let component = format!(
277         r#"
278 (component
279   (component $c
280     (core module $m
281       (func (export "") (param i32 i32))
282       (func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
283       (memory (export "memory") 1)
284     )
285     (core instance $m (instantiate $m))
286     (func (export "a") (param "a" string)
287       (canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
288         string-encoding={dst})
289     )
290   )
291 
292   (component $c2
293     (import "a" (func $f (param "a" string)))
294     (core module $libc
295       (memory (export "memory") 1)
296     )
297     (core instance $libc (instantiate $libc))
298     (core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
299     (core module $m
300       (import "" "" (func $f (param i32 i32)))
301 
302       (func (export "f") (param i32) (call $f (i32.const 1000) (local.get 0)))
303     )
304     (core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
305     (func (export "f") (param "a" u32) (canon lift (core func $m "f")))
306   )
307 
308   (instance $c (instantiate $c))
309   (instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
310   (export "f" (func $c2 "f"))
311 )
312 "#
313     );
314 
315     let component = Component::new(engine, &component)?;
316 
317     let test_overflow = |size: u32| -> Result<()> {
318         println!("src={src} dst={dst} size={size:#x}");
319         let mut store = Store::new(engine, ());
320         let instance = Linker::new(engine).instantiate(&mut store, &component)?;
321         let func = instance.get_typed_func::<(u32,), ()>(&mut store, "f")?;
322         let trap = func
323             .call(&mut store, (size,))
324             .unwrap_err()
325             .downcast::<Trap>()?;
326         assert_eq!(trap, Trap::StringOutOfBounds);
327         Ok(())
328     };
329 
330     let max = 1 << 31;
331 
332     match src {
333         "utf8" => {
334             // This exceeds MAX_STRING_BYTE_LENGTH
335             test_overflow(max)?;
336 
337             if dst == "utf16" {
338                 // exceeds MAX_STRING_BYTE_LENGTH when multiplied
339                 test_overflow(max / 2)?;
340 
341                 // Technically this fails on the first string, not the second.
342                 // Ideally this would test the overflow check on the second
343                 // string though.
344                 test_overflow(max / 2 - 100)?;
345             } else {
346                 // This will point into unmapped memory
347                 test_overflow(max - 100)?;
348             }
349         }
350 
351         "utf16" => {
352             test_overflow(max / 2)?;
353             test_overflow(max / 2 - 100)?;
354         }
355 
356         "latin1+utf16" => {
357             test_overflow((max / 2) | UTF16_TAG)?;
358             // tag a utf16 string with the max length and it should overflow.
359             test_overflow((max / 2 - 100) | UTF16_TAG)?;
360         }
361 
362         _ => unreachable!(),
363     }
364 
365     Ok(())
366 }
367 
368 // Test that that the pointer returned from `realloc` is bounds-checked.
369 #[test]
realloc_oob() -> Result<()>370 fn realloc_oob() -> Result<()> {
371     let engine = wasmtime_test_util::component::engine();
372     for src in ENCODINGS {
373         for dst in ENCODINGS {
374             test_realloc_oob(&engine, src, dst)?;
375         }
376     }
377     Ok(())
378 }
379 
test_realloc_oob(engine: &Engine, src: &str, dst: &str) -> Result<()>380 fn test_realloc_oob(engine: &Engine, src: &str, dst: &str) -> Result<()> {
381     let component = format!(
382         r#"
383 (component
384   (component $c
385     (core module $m
386       (func (export "") (param i32 i32))
387       (func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 100_000)
388       (memory (export "memory") 1)
389     )
390     (core instance $m (instantiate $m))
391     (func (export "a") (param "a" string)
392       (canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
393         string-encoding={dst})
394     )
395   )
396 
397   (component $c2
398     (import "a" (func $f (param "a" string)))
399     (core module $libc
400       (memory (export "memory") 1)
401     )
402     (core instance $libc (instantiate $libc))
403     (core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
404     (core module $m
405       (import "" "" (func $f (param i32 i32)))
406 
407       (func (export "f") (call $f (i32.const 1000) (i32.const 10)))
408     )
409     (core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
410     (func (export "f") (canon lift (core func $m "f")))
411   )
412 
413   (instance $c (instantiate $c))
414   (instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
415   (export "f" (func $c2 "f"))
416 )
417 "#
418     );
419 
420     let component = Component::new(engine, &component)?;
421     let mut store = Store::new(engine, ());
422 
423     let instance = Linker::new(engine).instantiate(&mut store, &component)?;
424     let func = instance.get_typed_func::<(), ()>(&mut store, "f")?;
425     let trap = func.call(&mut store, ()).unwrap_err().downcast::<Trap>()?;
426     assert_eq!(trap, Trap::StringOutOfBounds);
427     Ok(())
428 }
429 
430 // Test that that the pointer returned from `realloc` is bounds-checked.
431 #[test]
raw_string_encodings() -> Result<()>432 fn raw_string_encodings() -> Result<()> {
433     let engine = wasmtime_test_util::component::engine();
434     test_invalid_string_encoding(&engine, "utf8", "utf8", &[0xff], 1)?;
435     let array = b"valid string until \xffthen valid again";
436     test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;
437     test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;
438     let array = b"symbol \xce\xa3 until \xffthen valid";
439     test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;
440     test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;
441     test_invalid_string_encoding(&engine, "utf8", "latin1+utf16", array, array.len() as u32)?;
442     test_invalid_string_encoding(&engine, "utf16", "utf8", &[0x01, 0xd8], 1)?;
443     test_invalid_string_encoding(&engine, "utf16", "utf16", &[0x01, 0xd8], 1)?;
444     test_invalid_string_encoding(
445         &engine,
446         "utf16",
447         "latin1+utf16",
448         &[0xff, 0xff, 0x01, 0xd8],
449         2,
450     )?;
451     test_invalid_string_encoding(
452         &engine,
453         "latin1+utf16",
454         "utf8",
455         &[0x01, 0xd8],
456         1 | UTF16_TAG,
457     )?;
458     test_invalid_string_encoding(
459         &engine,
460         "latin1+utf16",
461         "utf16",
462         &[0x01, 0xd8],
463         1 | UTF16_TAG,
464     )?;
465     test_invalid_string_encoding(
466         &engine,
467         "latin1+utf16",
468         "utf16",
469         &[0xff, 0xff, 0x01, 0xd8],
470         2 | UTF16_TAG,
471     )?;
472     test_invalid_string_encoding(
473         &engine,
474         "latin1+utf16",
475         "latin1+utf16",
476         &[0xab, 0x00, 0xff, 0xff, 0x01, 0xd8],
477         3 | UTF16_TAG,
478     )?;
479 
480     // This latin1+utf16 string should get compressed to latin1 across the
481     // boundary.
482     test_valid_string_encoding(
483         &engine,
484         "latin1+utf16",
485         "latin1+utf16",
486         &[0xab, 0x00, 0xff, 0x00],
487         2 | UTF16_TAG,
488     )?;
489     Ok(())
490 }
491 
test_invalid_string_encoding( engine: &Engine, src: &str, dst: &str, bytes: &[u8], len: u32, ) -> Result<()>492 fn test_invalid_string_encoding(
493     engine: &Engine,
494     src: &str,
495     dst: &str,
496     bytes: &[u8],
497     len: u32,
498 ) -> Result<()> {
499     let trap = test_raw_when_encoded(engine, src, dst, bytes, len)?.unwrap();
500     let src = src.replace("latin1+", "");
501     assert!(
502         format!("{trap:?}").contains(&format!("invalid {src} encoding")),
503         "bad error: {trap:?}",
504     );
505     Ok(())
506 }
507 
test_valid_string_encoding( engine: &Engine, src: &str, dst: &str, bytes: &[u8], len: u32, ) -> Result<()>508 fn test_valid_string_encoding(
509     engine: &Engine,
510     src: &str,
511     dst: &str,
512     bytes: &[u8],
513     len: u32,
514 ) -> Result<()> {
515     let err = test_raw_when_encoded(engine, src, dst, bytes, len)?;
516     assert!(err.is_none());
517     Ok(())
518 }
519 
test_raw_when_encoded( engine: &Engine, src: &str, dst: &str, bytes: &[u8], len: u32, ) -> Result<Option<wasmtime::Error>>520 fn test_raw_when_encoded(
521     engine: &Engine,
522     src: &str,
523     dst: &str,
524     bytes: &[u8],
525     len: u32,
526 ) -> Result<Option<wasmtime::Error>> {
527     let component = format!(
528         r#"
529 (component
530   (component $c
531     (core module $m
532       (func (export "") (param i32 i32))
533       (func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
534       (memory (export "memory") 1)
535     )
536     (core instance $m (instantiate $m))
537     (func (export "a") (param "a" string)
538       (canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
539         string-encoding={dst})
540     )
541   )
542 
543   (component $c2
544     (import "a" (func $f (param "a" string)))
545     (core module $libc
546       (memory (export "memory") 1)
547       (func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
548     )
549     (core instance $libc (instantiate $libc))
550     (core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
551     (core module $m
552       (import "" "" (func $f (param i32 i32)))
553 
554       (func (export "f") (param i32 i32 i32) (call $f (local.get 0) (local.get 2)))
555     )
556     (core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
557     (func (export "f") (param "a" (list u8)) (param "b" u32) (canon lift (core func $m "f")
558         (memory $libc "memory")
559         (realloc (func $libc "realloc"))))
560   )
561 
562   (instance $c (instantiate $c))
563   (instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
564   (export "f" (func $c2 "f"))
565 )
566 "#
567     );
568 
569     let component = Component::new(engine, &component)?;
570     let mut store = Store::new(engine, ());
571 
572     let instance = Linker::new(engine).instantiate(&mut store, &component)?;
573     let func = instance.get_typed_func::<(&[u8], u32), ()>(&mut store, "f")?;
574     match func.call(&mut store, (bytes, len)) {
575         Ok(_) => Ok(None),
576         Err(e) => Ok(Some(e)),
577     }
578 }
579 
580 #[test]
pass_string_on_component_boundary() -> Result<()>581 fn pass_string_on_component_boundary() -> Result<()> {
582     // Configure an engine such that linear memories are allocated right next
583     // to each other and are 1 wasm page large. This'll ensure that the string
584     // at the beginning of the second memory is at the end of the first memory.
585     let mut pooling_config = PoolingAllocationConfig::new();
586     pooling_config.total_component_instances(3);
587     pooling_config.total_memories(2);
588     pooling_config.total_tables(0);
589     pooling_config.total_stacks(0);
590     pooling_config.max_memory_size(65536);
591     let mut config = Config::new();
592     config.memory_guard_size(0);
593     config.memory_reservation(65536);
594     config.allocation_strategy(pooling_config);
595     let engine = Engine::new(&config)?;
596 
597     let component = r#"
598 (component
599   ;; This component is instantiated first so its allocation function returns a
600   ;; pointer at the end of memory which will be right up against the next
601   ;; linear memory.
602   (component $c
603     (core module $m
604       (func (export "") (param i32 i32))
605       (func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 65520)
606       (memory (export "memory") 1)
607     )
608     (core instance $m (instantiate $m))
609     (func (export "a") (param "a" string)
610       (canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory"))
611     )
612   )
613 
614   ;; This component is instantiated second meaning its memory is after the
615   ;; one above, so the string is placed first thing in linear memory.
616   (component $c2
617     (import "a" (func $f (param "a" string)))
618     (core module $libc
619       (memory (export "memory") 1)
620       (data (memory 0) (i32.const 0) "0123456789abcdef")
621     )
622     (core instance $libc (instantiate $libc))
623     (core func $f (canon lower (func $f) (memory $libc "memory")))
624     (core module $m
625       (import "" "" (func $f (param i32 i32)))
626       (func (export "f")
627         (call $f
628           (i32.const 0)   ;; ptr
629           (i32.const 16)) ;; len
630       )
631     )
632     (core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
633     (func (export "f") (canon lift (core func $m "f")))
634   )
635 
636   (instance $c (instantiate $c))
637   (instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
638   (export "f" (func $c2 "f"))
639 )
640 "#;
641 
642     let component = Component::new(&engine, &component)?;
643     let mut store = Store::new(&engine, ());
644     let instance = Linker::new(&engine).instantiate(&mut store, &component)?;
645     let func = instance.get_typed_func::<(), ()>(&mut store, "f")?;
646     func.call(&mut store, ())?;
647     Ok(())
648 }
649