1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <[email protected]>
25  *    Zou Nan hai <[email protected]>
26  *    Xiang Hai hao<[email protected]>
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <dev/drm2/drmP.h>
34 #include <dev/drm2/i915/i915_drv.h>
35 #include <dev/drm2/i915/i915_drm.h>
36 #include <dev/drm2/i915/intel_drv.h>
37 #include <sys/sched.h>
38 #include <sys/sf_buf.h>
39 
40 /*
41  * 965+ support PIPE_CONTROL commands, which provide finer grained control
42  * over cache flushing.
43  */
44 struct pipe_control {
45 	struct drm_i915_gem_object *obj;
46 	volatile u32 *cpu_page;
47 	u32 gtt_offset;
48 };
49 
ring_space(struct intel_ring_buffer * ring)50 static inline int ring_space(struct intel_ring_buffer *ring)
51 {
52 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
53 	if (space < 0)
54 		space += ring->size;
55 	return space;
56 }
57 
58 static int
gen2_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)59 gen2_render_ring_flush(struct intel_ring_buffer *ring,
60 		       u32	invalidate_domains,
61 		       u32	flush_domains)
62 {
63 	u32 cmd;
64 	int ret;
65 
66 	cmd = MI_FLUSH;
67 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
68 		cmd |= MI_NO_WRITE_FLUSH;
69 
70 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
71 		cmd |= MI_READ_FLUSH;
72 
73 	ret = intel_ring_begin(ring, 2);
74 	if (ret)
75 		return ret;
76 
77 	intel_ring_emit(ring, cmd);
78 	intel_ring_emit(ring, MI_NOOP);
79 	intel_ring_advance(ring);
80 
81 	return 0;
82 }
83 
84 static int
gen4_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)85 gen4_render_ring_flush(struct intel_ring_buffer *ring,
86 		       u32	invalidate_domains,
87 		       u32	flush_domains)
88 {
89 	struct drm_device *dev = ring->dev;
90 	u32 cmd;
91 	int ret;
92 
93 	/*
94 	 * read/write caches:
95 	 *
96 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
97 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
98 	 * also flushed at 2d versus 3d pipeline switches.
99 	 *
100 	 * read-only caches:
101 	 *
102 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
103 	 * MI_READ_FLUSH is set, and is always flushed on 965.
104 	 *
105 	 * I915_GEM_DOMAIN_COMMAND may not exist?
106 	 *
107 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
108 	 * invalidated when MI_EXE_FLUSH is set.
109 	 *
110 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
111 	 * invalidated with every MI_FLUSH.
112 	 *
113 	 * TLBs:
114 	 *
115 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
116 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
117 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
118 	 * are flushed at any MI_FLUSH.
119 	 */
120 
121 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
122 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
123 		cmd &= ~MI_NO_WRITE_FLUSH;
124 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
125 		cmd |= MI_EXE_FLUSH;
126 
127 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
128 	    (IS_G4X(dev) || IS_GEN5(dev)))
129 		cmd |= MI_INVALIDATE_ISP;
130 
131 	ret = intel_ring_begin(ring, 2);
132 	if (ret)
133 		return ret;
134 
135 	intel_ring_emit(ring, cmd);
136 	intel_ring_emit(ring, MI_NOOP);
137 	intel_ring_advance(ring);
138 
139 	return 0;
140 }
141 
142 /**
143  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
144  * implementing two workarounds on gen6.  From section 1.4.7.1
145  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
146  *
147  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
148  * produced by non-pipelined state commands), software needs to first
149  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
150  * 0.
151  *
152  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
153  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
154  *
155  * And the workaround for these two requires this workaround first:
156  *
157  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
158  * BEFORE the pipe-control with a post-sync op and no write-cache
159  * flushes.
160  *
161  * And this last workaround is tricky because of the requirements on
162  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
163  * volume 2 part 1:
164  *
165  *     "1 of the following must also be set:
166  *      - Render Target Cache Flush Enable ([12] of DW1)
167  *      - Depth Cache Flush Enable ([0] of DW1)
168  *      - Stall at Pixel Scoreboard ([1] of DW1)
169  *      - Depth Stall ([13] of DW1)
170  *      - Post-Sync Operation ([13] of DW1)
171  *      - Notify Enable ([8] of DW1)"
172  *
173  * The cache flushes require the workaround flush that triggered this
174  * one, so we can't use it.  Depth stall would trigger the same.
175  * Post-sync nonzero is what triggered this second workaround, so we
176  * can't use that one either.  Notify enable is IRQs, which aren't
177  * really our business.  That leaves only stall at scoreboard.
178  */
179 static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer * ring)180 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
181 {
182 	struct pipe_control *pc = ring->private;
183 	u32 scratch_addr = pc->gtt_offset + 128;
184 	int ret;
185 
186 
187 	ret = intel_ring_begin(ring, 6);
188 	if (ret)
189 		return ret;
190 
191 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
192 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
193 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
194 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
195 	intel_ring_emit(ring, 0); /* low dword */
196 	intel_ring_emit(ring, 0); /* high dword */
197 	intel_ring_emit(ring, MI_NOOP);
198 	intel_ring_advance(ring);
199 
200 	ret = intel_ring_begin(ring, 6);
201 	if (ret)
202 		return ret;
203 
204 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
205 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
206 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
207 	intel_ring_emit(ring, 0);
208 	intel_ring_emit(ring, 0);
209 	intel_ring_emit(ring, MI_NOOP);
210 	intel_ring_advance(ring);
211 
212 	return 0;
213 }
214 
215 static int
gen6_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)216 gen6_render_ring_flush(struct intel_ring_buffer *ring,
217                          u32 invalidate_domains, u32 flush_domains)
218 {
219 	u32 flags = 0;
220 	struct pipe_control *pc = ring->private;
221 	u32 scratch_addr = pc->gtt_offset + 128;
222 	int ret;
223 
224 	/* Force SNB workarounds for PIPE_CONTROL flushes */
225 	ret = intel_emit_post_sync_nonzero_flush(ring);
226 	if (ret)
227 		return ret;
228 
229 	/* Just flush everything.  Experiments have shown that reducing the
230 	 * number of bits based on the write domains has little performance
231 	 * impact.
232 	 */
233 	if (flush_domains) {
234 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
235 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
236 		/*
237 		 * Ensure that any following seqno writes only happen
238 		 * when the render cache is indeed flushed.
239 		 */
240 		flags |= PIPE_CONTROL_CS_STALL;
241 	}
242 	if (invalidate_domains) {
243 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
244 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
245 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
246 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
247 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
248 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
249 		/*
250 		 * TLB invalidate requires a post-sync write.
251 		 */
252 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
253 	}
254 
255 	ret = intel_ring_begin(ring, 4);
256 	if (ret)
257 		return ret;
258 
259 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
260 	intel_ring_emit(ring, flags);
261 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
262 	intel_ring_emit(ring, 0);
263 	intel_ring_advance(ring);
264 
265 	return 0;
266 }
267 
268 static int
gen7_render_ring_cs_stall_wa(struct intel_ring_buffer * ring)269 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
270 {
271 	int ret;
272 
273 	ret = intel_ring_begin(ring, 4);
274 	if (ret)
275 		return ret;
276 
277 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
278 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
279 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
280 	intel_ring_emit(ring, 0);
281 	intel_ring_emit(ring, 0);
282 	intel_ring_advance(ring);
283 
284 	return 0;
285 }
286 
287 static int
gen7_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)288 gen7_render_ring_flush(struct intel_ring_buffer *ring,
289 		       u32 invalidate_domains, u32 flush_domains)
290 {
291 	u32 flags = 0;
292 	struct pipe_control *pc = ring->private;
293 	u32 scratch_addr = pc->gtt_offset + 128;
294 	int ret;
295 
296 	/*
297 	 * Ensure that any following seqno writes only happen when the render
298 	 * cache is indeed flushed.
299 	 *
300 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
301 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
302 	 * don't try to be clever and just set it unconditionally.
303 	 */
304 	flags |= PIPE_CONTROL_CS_STALL;
305 
306 	/* Just flush everything.  Experiments have shown that reducing the
307 	 * number of bits based on the write domains has little performance
308 	 * impact.
309 	 */
310 	if (flush_domains) {
311 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
312 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
313 	}
314 	if (invalidate_domains) {
315 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
316 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
317 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
318 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
319 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
320 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
321 		/*
322 		 * TLB invalidate requires a post-sync write.
323 		 */
324 		flags |= PIPE_CONTROL_QW_WRITE;
325 
326 		/* Workaround: we must issue a pipe_control with CS-stall bit
327 		 * set before a pipe_control command that has the state cache
328 		 * invalidate bit set. */
329 		gen7_render_ring_cs_stall_wa(ring);
330 	}
331 
332 	ret = intel_ring_begin(ring, 4);
333 	if (ret)
334 		return ret;
335 
336 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
337 	intel_ring_emit(ring, flags);
338 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
339 	intel_ring_emit(ring, 0);
340 	intel_ring_advance(ring);
341 
342 	return 0;
343 }
344 
ring_write_tail(struct intel_ring_buffer * ring,u32 value)345 static void ring_write_tail(struct intel_ring_buffer *ring,
346 			    u32 value)
347 {
348 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
349 	I915_WRITE_TAIL(ring, value);
350 }
351 
intel_ring_get_active_head(struct intel_ring_buffer * ring)352 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
353 {
354 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
355 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
356 			RING_ACTHD(ring->mmio_base) : ACTHD;
357 
358 	return I915_READ(acthd_reg);
359 }
360 
init_ring_common(struct intel_ring_buffer * ring)361 static int init_ring_common(struct intel_ring_buffer *ring)
362 {
363 	struct drm_device *dev = ring->dev;
364 	drm_i915_private_t *dev_priv = dev->dev_private;
365 	struct drm_i915_gem_object *obj = ring->obj;
366 	int ret = 0;
367 	u32 head;
368 
369 	if (HAS_FORCE_WAKE(dev))
370 		gen6_gt_force_wake_get(dev_priv);
371 
372 	/* Stop the ring if it's running. */
373 	I915_WRITE_CTL(ring, 0);
374 	I915_WRITE_HEAD(ring, 0);
375 	ring->write_tail(ring, 0);
376 
377 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
378 
379 	/* G45 ring initialization fails to reset head to zero */
380 	if (head != 0) {
381 		DRM_DEBUG_KMS("%s head not reset to zero "
382 			      "ctl %08x head %08x tail %08x start %08x\n",
383 			      ring->name,
384 			      I915_READ_CTL(ring),
385 			      I915_READ_HEAD(ring),
386 			      I915_READ_TAIL(ring),
387 			      I915_READ_START(ring));
388 
389 		I915_WRITE_HEAD(ring, 0);
390 
391 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
392 			DRM_ERROR("failed to set %s head to zero "
393 				  "ctl %08x head %08x tail %08x start %08x\n",
394 				  ring->name,
395 				  I915_READ_CTL(ring),
396 				  I915_READ_HEAD(ring),
397 				  I915_READ_TAIL(ring),
398 				  I915_READ_START(ring));
399 		}
400 	}
401 
402 	/* Initialize the ring. This must happen _after_ we've cleared the ring
403 	 * registers with the above sequence (the readback of the HEAD registers
404 	 * also enforces ordering), otherwise the hw might lose the new ring
405 	 * register values. */
406 	I915_WRITE_START(ring, obj->gtt_offset);
407 	I915_WRITE_CTL(ring,
408 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
409 			| RING_VALID);
410 
411 	/* If the head is still not zero, the ring is dead */
412 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
413 		     I915_READ_START(ring) == obj->gtt_offset &&
414 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
415 		DRM_ERROR("%s initialization failed "
416 				"ctl %08x head %08x tail %08x start %08x\n",
417 				ring->name,
418 				I915_READ_CTL(ring),
419 				I915_READ_HEAD(ring),
420 				I915_READ_TAIL(ring),
421 				I915_READ_START(ring));
422 		ret = -EIO;
423 		goto out;
424 	}
425 
426 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
427 		i915_kernel_lost_context(ring->dev);
428 	else {
429 		ring->head = I915_READ_HEAD(ring);
430 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
431 		ring->space = ring_space(ring);
432 		ring->last_retired_head = -1;
433 	}
434 
435 out:
436 	if (HAS_FORCE_WAKE(dev))
437 		gen6_gt_force_wake_put(dev_priv);
438 
439 	return ret;
440 }
441 
442 static int
init_pipe_control(struct intel_ring_buffer * ring)443 init_pipe_control(struct intel_ring_buffer *ring)
444 {
445 	struct pipe_control *pc;
446 	struct drm_i915_gem_object *obj;
447 	int ret;
448 
449 	if (ring->private)
450 		return 0;
451 
452 	pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
453 	if (!pc)
454 		return -ENOMEM;
455 
456 	obj = i915_gem_alloc_object(ring->dev, 4096);
457 	if (obj == NULL) {
458 		DRM_ERROR("Failed to allocate seqno page\n");
459 		ret = -ENOMEM;
460 		goto err;
461 	}
462 
463 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
464 
465 	ret = i915_gem_object_pin(obj, 4096, true, false);
466 	if (ret)
467 		goto err_unref;
468 
469 	pc->gtt_offset = obj->gtt_offset;
470 	pc->cpu_page = (uint32_t *)kva_alloc(PAGE_SIZE);
471 	if (pc->cpu_page == NULL)
472 		goto err_unpin;
473 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
474 	pmap_force_invalidate_cache_range((vm_offset_t)pc->cpu_page,
475 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
476 
477 	pc->obj = obj;
478 	ring->private = pc;
479 	return 0;
480 
481 err_unpin:
482 	i915_gem_object_unpin(obj);
483 err_unref:
484 	drm_gem_object_unreference(&obj->base);
485 err:
486 	free(pc, DRM_I915_GEM);
487 	return ret;
488 }
489 
490 static void
cleanup_pipe_control(struct intel_ring_buffer * ring)491 cleanup_pipe_control(struct intel_ring_buffer *ring)
492 {
493 	struct pipe_control *pc = ring->private;
494 	struct drm_i915_gem_object *obj;
495 
496 	if (!ring->private)
497 		return;
498 
499 	obj = pc->obj;
500 
501 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
502 	kva_free((uintptr_t)pc->cpu_page, PAGE_SIZE);
503 	i915_gem_object_unpin(obj);
504 	drm_gem_object_unreference(&obj->base);
505 
506 	free(pc, DRM_I915_GEM);
507 	ring->private = NULL;
508 }
509 
init_render_ring(struct intel_ring_buffer * ring)510 static int init_render_ring(struct intel_ring_buffer *ring)
511 {
512 	struct drm_device *dev = ring->dev;
513 	struct drm_i915_private *dev_priv = dev->dev_private;
514 	int ret = init_ring_common(ring);
515 
516 	if (INTEL_INFO(dev)->gen > 3)
517 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
518 
519 	/* We need to disable the AsyncFlip performance optimisations in order
520 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
521 	 * programmed to '1' on all products.
522 	 */
523 	if (INTEL_INFO(dev)->gen >= 6)
524 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
525 
526 	/* Required for the hardware to program scanline values for waiting */
527 	if (INTEL_INFO(dev)->gen == 6)
528 		I915_WRITE(GFX_MODE,
529 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
530 
531 	if (IS_GEN7(dev))
532 		I915_WRITE(GFX_MODE_GEN7,
533 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
534 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
535 
536 	if (INTEL_INFO(dev)->gen >= 5) {
537 		ret = init_pipe_control(ring);
538 		if (ret)
539 			return ret;
540 	}
541 
542 	if (IS_GEN6(dev)) {
543 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
544 		 * "If this bit is set, STCunit will have LRA as replacement
545 		 *  policy. [...] This bit must be reset.  LRA replacement
546 		 *  policy is not supported."
547 		 */
548 		I915_WRITE(CACHE_MODE_0,
549 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
550 
551 		/* This is not explicitly set for GEN6, so read the register.
552 		 * see intel_ring_mi_set_context() for why we care.
553 		 * TODO: consider explicitly setting the bit for GEN5
554 		 */
555 		ring->itlb_before_ctx_switch =
556 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
557 	}
558 
559 	if (INTEL_INFO(dev)->gen >= 6)
560 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
561 
562 	if (HAS_L3_GPU_CACHE(dev))
563 		I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
564 
565 	return ret;
566 }
567 
render_ring_cleanup(struct intel_ring_buffer * ring)568 static void render_ring_cleanup(struct intel_ring_buffer *ring)
569 {
570 	struct drm_device *dev = ring->dev;
571 
572 	if (!ring->private)
573 		return;
574 
575 	if (HAS_BROKEN_CS_TLB(dev))
576 		drm_gem_object_unreference(to_gem_object(ring->private));
577 
578 	cleanup_pipe_control(ring);
579 }
580 
581 static void
update_mboxes(struct intel_ring_buffer * ring,u32 mmio_offset)582 update_mboxes(struct intel_ring_buffer *ring,
583 	      u32 mmio_offset)
584 {
585 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
586 	intel_ring_emit(ring, mmio_offset);
587 	intel_ring_emit(ring, ring->outstanding_lazy_request);
588 }
589 
590 /**
591  * gen6_add_request - Update the semaphore mailbox registers
592  *
593  * @ring - ring that is adding a request
594  * @seqno - return seqno stuck into the ring
595  *
596  * Update the mailbox registers in the *other* rings with the current seqno.
597  * This acts like a signal in the canonical semaphore.
598  */
599 static int
gen6_add_request(struct intel_ring_buffer * ring)600 gen6_add_request(struct intel_ring_buffer *ring)
601 {
602 	u32 mbox1_reg;
603 	u32 mbox2_reg;
604 	int ret;
605 
606 	ret = intel_ring_begin(ring, 10);
607 	if (ret)
608 		return ret;
609 
610 	mbox1_reg = ring->signal_mbox[0];
611 	mbox2_reg = ring->signal_mbox[1];
612 
613 	update_mboxes(ring, mbox1_reg);
614 	update_mboxes(ring, mbox2_reg);
615 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
616 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
617 	intel_ring_emit(ring, ring->outstanding_lazy_request);
618 	intel_ring_emit(ring, MI_USER_INTERRUPT);
619 	intel_ring_advance(ring);
620 
621 	return 0;
622 }
623 
624 /**
625  * intel_ring_sync - sync the waiter to the signaller on seqno
626  *
627  * @waiter - ring that is waiting
628  * @signaller - ring which has, or will signal
629  * @seqno - seqno which the waiter will block on
630  */
631 static int
gen6_ring_sync(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)632 gen6_ring_sync(struct intel_ring_buffer *waiter,
633 	       struct intel_ring_buffer *signaller,
634 	       u32 seqno)
635 {
636 	int ret;
637 	u32 dw1 = MI_SEMAPHORE_MBOX |
638 		  MI_SEMAPHORE_COMPARE |
639 		  MI_SEMAPHORE_REGISTER;
640 
641 	/* Throughout all of the GEM code, seqno passed implies our current
642 	 * seqno is >= the last seqno executed. However for hardware the
643 	 * comparison is strictly greater than.
644 	 */
645 	seqno -= 1;
646 
647 	WARN_ON(signaller->semaphore_register[waiter->id] ==
648 		MI_SEMAPHORE_SYNC_INVALID);
649 
650 	ret = intel_ring_begin(waiter, 4);
651 	if (ret)
652 		return ret;
653 
654 	intel_ring_emit(waiter,
655 			dw1 | signaller->semaphore_register[waiter->id]);
656 	intel_ring_emit(waiter, seqno);
657 	intel_ring_emit(waiter, 0);
658 	intel_ring_emit(waiter, MI_NOOP);
659 	intel_ring_advance(waiter);
660 
661 	return 0;
662 }
663 
664 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
665 do {									\
666 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
667 		 PIPE_CONTROL_DEPTH_STALL);				\
668 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
669 	intel_ring_emit(ring__, 0);							\
670 	intel_ring_emit(ring__, 0);							\
671 } while (0)
672 
673 static int
pc_render_add_request(struct intel_ring_buffer * ring)674 pc_render_add_request(struct intel_ring_buffer *ring)
675 {
676 	struct pipe_control *pc = ring->private;
677 	u32 scratch_addr = pc->gtt_offset + 128;
678 	int ret;
679 
680 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
681 	 * incoherent with writes to memory, i.e. completely fubar,
682 	 * so we need to use PIPE_NOTIFY instead.
683 	 *
684 	 * However, we also need to workaround the qword write
685 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
686 	 * memory before requesting an interrupt.
687 	 */
688 	ret = intel_ring_begin(ring, 32);
689 	if (ret)
690 		return ret;
691 
692 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
693 			PIPE_CONTROL_WRITE_FLUSH |
694 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
695 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
696 	intel_ring_emit(ring, ring->outstanding_lazy_request);
697 	intel_ring_emit(ring, 0);
698 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
699 	scratch_addr += 128; /* write to separate cachelines */
700 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
701 	scratch_addr += 128;
702 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
703 	scratch_addr += 128;
704 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
705 	scratch_addr += 128;
706 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
707 	scratch_addr += 128;
708 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
709 
710 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
711 			PIPE_CONTROL_WRITE_FLUSH |
712 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
713 			PIPE_CONTROL_NOTIFY);
714 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
715 	intel_ring_emit(ring, ring->outstanding_lazy_request);
716 	intel_ring_emit(ring, 0);
717 	intel_ring_advance(ring);
718 
719 	return 0;
720 }
721 
722 static u32
gen6_ring_get_seqno(struct intel_ring_buffer * ring,bool lazy_coherency)723 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
724 {
725 	/* Workaround to force correct ordering between irq and seqno writes on
726 	 * ivb (and maybe also on snb) by reading from a CS register (like
727 	 * ACTHD) before reading the status page. */
728 	if (!lazy_coherency)
729 		intel_ring_get_active_head(ring);
730 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
731 }
732 
733 static u32
ring_get_seqno(struct intel_ring_buffer * ring,bool lazy_coherency)734 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
735 {
736 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
737 }
738 
739 static u32
pc_render_get_seqno(struct intel_ring_buffer * ring,bool lazy_coherency)740 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
741 {
742 	struct pipe_control *pc = ring->private;
743 	return pc->cpu_page[0];
744 }
745 
746 static bool
gen5_ring_get_irq(struct intel_ring_buffer * ring)747 gen5_ring_get_irq(struct intel_ring_buffer *ring)
748 {
749 	struct drm_device *dev = ring->dev;
750 	drm_i915_private_t *dev_priv = dev->dev_private;
751 
752 	if (!dev->irq_enabled)
753 		return false;
754 
755 	mtx_lock(&dev_priv->irq_lock);
756 	if (ring->irq_refcount++ == 0) {
757 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
758 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
759 		POSTING_READ(GTIMR);
760 	}
761 	mtx_unlock(&dev_priv->irq_lock);
762 
763 	return true;
764 }
765 
766 static void
gen5_ring_put_irq(struct intel_ring_buffer * ring)767 gen5_ring_put_irq(struct intel_ring_buffer *ring)
768 {
769 	struct drm_device *dev = ring->dev;
770 	drm_i915_private_t *dev_priv = dev->dev_private;
771 
772 	mtx_lock(&dev_priv->irq_lock);
773 	if (--ring->irq_refcount == 0) {
774 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
775 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
776 		POSTING_READ(GTIMR);
777 	}
778 	mtx_unlock(&dev_priv->irq_lock);
779 }
780 
781 static bool
i9xx_ring_get_irq(struct intel_ring_buffer * ring)782 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
783 {
784 	struct drm_device *dev = ring->dev;
785 	drm_i915_private_t *dev_priv = dev->dev_private;
786 
787 	if (!dev->irq_enabled)
788 		return false;
789 
790 	mtx_lock(&dev_priv->irq_lock);
791 	if (ring->irq_refcount++ == 0) {
792 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
793 		I915_WRITE(IMR, dev_priv->irq_mask);
794 		POSTING_READ(IMR);
795 	}
796 	mtx_unlock(&dev_priv->irq_lock);
797 
798 	return true;
799 }
800 
801 static void
i9xx_ring_put_irq(struct intel_ring_buffer * ring)802 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
803 {
804 	struct drm_device *dev = ring->dev;
805 	drm_i915_private_t *dev_priv = dev->dev_private;
806 
807 	mtx_lock(&dev_priv->irq_lock);
808 	if (--ring->irq_refcount == 0) {
809 		dev_priv->irq_mask |= ring->irq_enable_mask;
810 		I915_WRITE(IMR, dev_priv->irq_mask);
811 		POSTING_READ(IMR);
812 	}
813 	mtx_unlock(&dev_priv->irq_lock);
814 }
815 
816 static bool
i8xx_ring_get_irq(struct intel_ring_buffer * ring)817 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
818 {
819 	struct drm_device *dev = ring->dev;
820 	drm_i915_private_t *dev_priv = dev->dev_private;
821 
822 	if (!dev->irq_enabled)
823 		return false;
824 
825 	mtx_lock(&dev_priv->irq_lock);
826 	if (ring->irq_refcount++ == 0) {
827 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
828 		I915_WRITE16(IMR, dev_priv->irq_mask);
829 		POSTING_READ16(IMR);
830 	}
831 	mtx_unlock(&dev_priv->irq_lock);
832 
833 	return true;
834 }
835 
836 static void
i8xx_ring_put_irq(struct intel_ring_buffer * ring)837 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
838 {
839 	struct drm_device *dev = ring->dev;
840 	drm_i915_private_t *dev_priv = dev->dev_private;
841 
842 	mtx_lock(&dev_priv->irq_lock);
843 	if (--ring->irq_refcount == 0) {
844 		dev_priv->irq_mask |= ring->irq_enable_mask;
845 		I915_WRITE16(IMR, dev_priv->irq_mask);
846 		POSTING_READ16(IMR);
847 	}
848 	mtx_unlock(&dev_priv->irq_lock);
849 }
850 
intel_ring_setup_status_page(struct intel_ring_buffer * ring)851 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
852 {
853 	struct drm_device *dev = ring->dev;
854 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
855 	u32 mmio = 0;
856 
857 	/* The ring status page addresses are no longer next to the rest of
858 	 * the ring registers as of gen7.
859 	 */
860 	if (IS_GEN7(dev)) {
861 		switch (ring->id) {
862 		case RCS:
863 			mmio = RENDER_HWS_PGA_GEN7;
864 			break;
865 		case BCS:
866 			mmio = BLT_HWS_PGA_GEN7;
867 			break;
868 		case VCS:
869 			mmio = BSD_HWS_PGA_GEN7;
870 			break;
871 		}
872 	} else if (IS_GEN6(ring->dev)) {
873 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
874 	} else {
875 		mmio = RING_HWS_PGA(ring->mmio_base);
876 	}
877 
878 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
879 	POSTING_READ(mmio);
880 }
881 
882 static int
bsd_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)883 bsd_ring_flush(struct intel_ring_buffer *ring,
884 	       u32     invalidate_domains,
885 	       u32     flush_domains)
886 {
887 	int ret;
888 
889 	ret = intel_ring_begin(ring, 2);
890 	if (ret)
891 		return ret;
892 
893 	intel_ring_emit(ring, MI_FLUSH);
894 	intel_ring_emit(ring, MI_NOOP);
895 	intel_ring_advance(ring);
896 	return 0;
897 }
898 
899 static int
i9xx_add_request(struct intel_ring_buffer * ring)900 i9xx_add_request(struct intel_ring_buffer *ring)
901 {
902 	int ret;
903 
904 	ret = intel_ring_begin(ring, 4);
905 	if (ret)
906 		return ret;
907 
908 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
909 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
910 	intel_ring_emit(ring, ring->outstanding_lazy_request);
911 	intel_ring_emit(ring, MI_USER_INTERRUPT);
912 	intel_ring_advance(ring);
913 
914 	return 0;
915 }
916 
917 static bool
gen6_ring_get_irq(struct intel_ring_buffer * ring)918 gen6_ring_get_irq(struct intel_ring_buffer *ring)
919 {
920 	struct drm_device *dev = ring->dev;
921 	drm_i915_private_t *dev_priv = dev->dev_private;
922 
923 	if (!dev->irq_enabled)
924 	       return false;
925 
926 	/* It looks like we need to prevent the gt from suspending while waiting
927 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
928 	 * blt/bsd rings on ivb. */
929 	gen6_gt_force_wake_get(dev_priv);
930 
931 	mtx_lock(&dev_priv->irq_lock);
932 	if (ring->irq_refcount++ == 0) {
933 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
934 			I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
935 						GEN6_RENDER_L3_PARITY_ERROR));
936 		else
937 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
938 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
939 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
940 		POSTING_READ(GTIMR);
941 	}
942 	mtx_unlock(&dev_priv->irq_lock);
943 
944 	return true;
945 }
946 
947 static void
gen6_ring_put_irq(struct intel_ring_buffer * ring)948 gen6_ring_put_irq(struct intel_ring_buffer *ring)
949 {
950 	struct drm_device *dev = ring->dev;
951 	drm_i915_private_t *dev_priv = dev->dev_private;
952 
953 	mtx_lock(&dev_priv->irq_lock);
954 	if (--ring->irq_refcount == 0) {
955 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
956 			I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
957 		else
958 			I915_WRITE_IMR(ring, ~0);
959 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
960 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
961 		POSTING_READ(GTIMR);
962 	}
963 	mtx_unlock(&dev_priv->irq_lock);
964 
965 	gen6_gt_force_wake_put(dev_priv);
966 }
967 
968 static int
i965_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 length,unsigned flags)969 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
970 			 u32 offset, u32 length,
971 			 unsigned flags)
972 {
973 	int ret;
974 
975 	ret = intel_ring_begin(ring, 2);
976 	if (ret)
977 		return ret;
978 
979 	intel_ring_emit(ring,
980 			MI_BATCH_BUFFER_START |
981 			MI_BATCH_GTT |
982 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
983 	intel_ring_emit(ring, offset);
984 	intel_ring_advance(ring);
985 
986 	return 0;
987 }
988 
989 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
990 #define I830_BATCH_LIMIT (256*1024)
991 static int
i830_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)992 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
993 				u32 offset, u32 len,
994 				unsigned flags)
995 {
996 	int ret;
997 
998 	if (flags & I915_DISPATCH_PINNED) {
999 		ret = intel_ring_begin(ring, 4);
1000 		if (ret)
1001 			return ret;
1002 
1003 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1004 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1005 		intel_ring_emit(ring, offset + len - 8);
1006 		intel_ring_emit(ring, MI_NOOP);
1007 		intel_ring_advance(ring);
1008 	} else {
1009 		struct drm_i915_gem_object *obj = ring->private;
1010 		u32 cs_offset = obj->gtt_offset;
1011 
1012 		if (len > I830_BATCH_LIMIT)
1013 			return -ENOSPC;
1014 
1015 		ret = intel_ring_begin(ring, 9+3);
1016 		if (ret)
1017 			return ret;
1018 		/* Blit the batch (which has now all relocs applied) to the stable batch
1019 		 * scratch bo area (so that the CS never stumbles over its tlb
1020 		 * invalidation bug) ... */
1021 		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1022 				XY_SRC_COPY_BLT_WRITE_ALPHA |
1023 				XY_SRC_COPY_BLT_WRITE_RGB);
1024 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1025 		intel_ring_emit(ring, 0);
1026 		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1027 		intel_ring_emit(ring, cs_offset);
1028 		intel_ring_emit(ring, 0);
1029 		intel_ring_emit(ring, 4096);
1030 		intel_ring_emit(ring, offset);
1031 		intel_ring_emit(ring, MI_FLUSH);
1032 
1033 		/* ... and execute it. */
1034 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1035 		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1036 		intel_ring_emit(ring, cs_offset + len - 8);
1037 		intel_ring_advance(ring);
1038 	}
1039 
1040 	return 0;
1041 }
1042 
1043 static int
i915_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)1044 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1045 			 u32 offset, u32 len,
1046 			 unsigned flags)
1047 {
1048 	int ret;
1049 
1050 	ret = intel_ring_begin(ring, 2);
1051 	if (ret)
1052 		return ret;
1053 
1054 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1055 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1056 	intel_ring_advance(ring);
1057 
1058 	return 0;
1059 }
1060 
cleanup_status_page(struct intel_ring_buffer * ring)1061 static void cleanup_status_page(struct intel_ring_buffer *ring)
1062 {
1063 	struct drm_i915_gem_object *obj;
1064 
1065 	obj = ring->status_page.obj;
1066 	if (obj == NULL)
1067 		return;
1068 
1069 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
1070 	kva_free((vm_offset_t)ring->status_page.page_addr,
1071 	    PAGE_SIZE);
1072 	i915_gem_object_unpin(obj);
1073 	drm_gem_object_unreference(&obj->base);
1074 	ring->status_page.obj = NULL;
1075 }
1076 
init_status_page(struct intel_ring_buffer * ring)1077 static int init_status_page(struct intel_ring_buffer *ring)
1078 {
1079 	struct drm_device *dev = ring->dev;
1080 	struct drm_i915_gem_object *obj;
1081 	int ret;
1082 
1083 	obj = i915_gem_alloc_object(dev, 4096);
1084 	if (obj == NULL) {
1085 		DRM_ERROR("Failed to allocate status page\n");
1086 		ret = -ENOMEM;
1087 		goto err;
1088 	}
1089 
1090 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1091 
1092 	ret = i915_gem_object_pin(obj, 4096, true, false);
1093 	if (ret != 0) {
1094 		goto err_unref;
1095 	}
1096 
1097 	ring->status_page.gfx_addr = obj->gtt_offset;
1098 	ring->status_page.page_addr = (void *)kva_alloc(PAGE_SIZE);
1099 	if (ring->status_page.page_addr == NULL) {
1100 		ret = -ENOMEM;
1101 		goto err_unpin;
1102 	}
1103 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1104 	    1);
1105 	pmap_force_invalidate_cache_range(
1106 	    (vm_offset_t)ring->status_page.page_addr,
1107 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1108 	ring->status_page.obj = obj;
1109 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1110 
1111 	intel_ring_setup_status_page(ring);
1112 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1113 			ring->name, ring->status_page.gfx_addr);
1114 
1115 	return 0;
1116 
1117 err_unpin:
1118 	i915_gem_object_unpin(obj);
1119 err_unref:
1120 	drm_gem_object_unreference(&obj->base);
1121 err:
1122 	return ret;
1123 }
1124 
init_phys_hws_pga(struct intel_ring_buffer * ring)1125 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1126 {
1127 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1128 	u32 addr;
1129 
1130 	if (!dev_priv->status_page_dmah) {
1131 		dev_priv->status_page_dmah =
1132 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, BUS_SPACE_MAXADDR);
1133 		if (!dev_priv->status_page_dmah)
1134 			return -ENOMEM;
1135 	}
1136 
1137 	addr = dev_priv->status_page_dmah->busaddr;
1138 	if (INTEL_INFO(ring->dev)->gen >= 4)
1139 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1140 	I915_WRITE(HWS_PGA, addr);
1141 
1142 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1143 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1144 
1145 	return 0;
1146 }
1147 
intel_init_ring_buffer(struct drm_device * dev,struct intel_ring_buffer * ring)1148 static int intel_init_ring_buffer(struct drm_device *dev,
1149 				  struct intel_ring_buffer *ring)
1150 {
1151 	struct drm_i915_gem_object *obj;
1152 	struct drm_i915_private *dev_priv = dev->dev_private;
1153 	int ret;
1154 
1155 	ring->dev = dev;
1156 	INIT_LIST_HEAD(&ring->active_list);
1157 	INIT_LIST_HEAD(&ring->request_list);
1158 	ring->size = 32 * PAGE_SIZE;
1159 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1160 
1161 #ifdef __linux__
1162 	init_waitqueue_head(&ring->irq_queue);
1163 #endif
1164 
1165 	if (I915_NEED_GFX_HWS(dev)) {
1166 		ret = init_status_page(ring);
1167 		if (ret)
1168 			return ret;
1169 	} else {
1170 		BUG_ON(ring->id != RCS);
1171 		ret = init_phys_hws_pga(ring);
1172 		if (ret)
1173 			return ret;
1174 	}
1175 
1176 	obj = i915_gem_alloc_object(dev, ring->size);
1177 	if (obj == NULL) {
1178 		DRM_ERROR("Failed to allocate ringbuffer\n");
1179 		ret = -ENOMEM;
1180 		goto err_hws;
1181 	}
1182 
1183 	ring->obj = obj;
1184 
1185 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1186 	if (ret)
1187 		goto err_unref;
1188 
1189 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1190 	if (ret)
1191 		goto err_unpin;
1192 
1193 	ring->virtual_start =
1194 		pmap_mapdev_attr(
1195 		    dev_priv->mm.gtt->gma_bus_addr + obj->gtt_offset, ring->size,
1196 		    VM_MEMATTR_WRITE_COMBINING);
1197 	if (ring->virtual_start == NULL) {
1198 		DRM_ERROR("Failed to map ringbuffer.\n");
1199 		ret = -EINVAL;
1200 		goto err_unpin;
1201 	}
1202 
1203 	ret = ring->init(ring);
1204 	if (ret)
1205 		goto err_unmap;
1206 
1207 	/* Workaround an erratum on the i830 which causes a hang if
1208 	 * the TAIL pointer points to within the last 2 cachelines
1209 	 * of the buffer.
1210 	 */
1211 	ring->effective_size = ring->size;
1212 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1213 		ring->effective_size -= 128;
1214 
1215 	return 0;
1216 
1217 err_unmap:
1218 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1219 err_unpin:
1220 	i915_gem_object_unpin(obj);
1221 err_unref:
1222 	drm_gem_object_unreference(&obj->base);
1223 	ring->obj = NULL;
1224 err_hws:
1225 	cleanup_status_page(ring);
1226 	return ret;
1227 }
1228 
intel_cleanup_ring_buffer(struct intel_ring_buffer * ring)1229 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1230 {
1231 	struct drm_i915_private *dev_priv;
1232 	int ret;
1233 
1234 	if (ring->obj == NULL)
1235 		return;
1236 
1237 	/* Disable the ring buffer. The ring must be idle at this point */
1238 	dev_priv = ring->dev->dev_private;
1239 	ret = intel_ring_idle(ring);
1240 	if (ret)
1241 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1242 			  ring->name, ret);
1243 
1244 	I915_WRITE_CTL(ring, 0);
1245 
1246 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1247 
1248 	i915_gem_object_unpin(ring->obj);
1249 	drm_gem_object_unreference(&ring->obj->base);
1250 	ring->obj = NULL;
1251 
1252 	if (ring->cleanup)
1253 		ring->cleanup(ring);
1254 
1255 	cleanup_status_page(ring);
1256 }
1257 
intel_ring_wait_seqno(struct intel_ring_buffer * ring,u32 seqno)1258 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1259 {
1260 	int ret;
1261 
1262 	ret = i915_wait_seqno(ring, seqno);
1263 	if (!ret)
1264 		i915_gem_retire_requests_ring(ring);
1265 
1266 	return ret;
1267 }
1268 
intel_ring_wait_request(struct intel_ring_buffer * ring,int n)1269 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1270 {
1271 	struct drm_i915_gem_request *request;
1272 	u32 seqno = 0;
1273 	int ret;
1274 
1275 	i915_gem_retire_requests_ring(ring);
1276 
1277 	if (ring->last_retired_head != -1) {
1278 		ring->head = ring->last_retired_head;
1279 		ring->last_retired_head = -1;
1280 		ring->space = ring_space(ring);
1281 		if (ring->space >= n)
1282 			return 0;
1283 	}
1284 
1285 	list_for_each_entry(request, &ring->request_list, list) {
1286 		int space;
1287 
1288 		if (request->tail == -1)
1289 			continue;
1290 
1291 		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1292 		if (space < 0)
1293 			space += ring->size;
1294 		if (space >= n) {
1295 			seqno = request->seqno;
1296 			break;
1297 		}
1298 
1299 		/* Consume this request in case we need more space than
1300 		 * is available and so need to prevent a race between
1301 		 * updating last_retired_head and direct reads of
1302 		 * I915_RING_HEAD. It also provides a nice sanity check.
1303 		 */
1304 		request->tail = -1;
1305 	}
1306 
1307 	if (seqno == 0)
1308 		return -ENOSPC;
1309 
1310 	ret = intel_ring_wait_seqno(ring, seqno);
1311 	if (ret)
1312 		return ret;
1313 
1314 	if (WARN_ON(ring->last_retired_head == -1))
1315 		return -ENOSPC;
1316 
1317 	ring->head = ring->last_retired_head;
1318 	ring->last_retired_head = -1;
1319 	ring->space = ring_space(ring);
1320 	if (WARN_ON(ring->space < n))
1321 		return -ENOSPC;
1322 
1323 	return 0;
1324 }
1325 
ring_wait_for_space(struct intel_ring_buffer * ring,int n)1326 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1327 {
1328 	struct drm_device *dev = ring->dev;
1329 	struct drm_i915_private *dev_priv = dev->dev_private;
1330 	unsigned long end;
1331 	int ret;
1332 
1333 	ret = intel_ring_wait_request(ring, n);
1334 	if (ret != -ENOSPC)
1335 		return ret;
1336 
1337 	CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1338 	/* With GEM the hangcheck timer should kick us out of the loop,
1339 	 * leaving it early runs the risk of corrupting GEM state (due
1340 	 * to running on almost untested codepaths). But on resume
1341 	 * timers don't work yet, so prevent a complete hang in that
1342 	 * case by choosing an insanely large timeout. */
1343 	end = jiffies + 60 * HZ;
1344 
1345 	do {
1346 		ring->head = I915_READ_HEAD(ring);
1347 		ring->space = ring_space(ring);
1348 		if (ring->space >= n) {
1349 			CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1350 			return 0;
1351 		}
1352 
1353 		if (dev->primary->master) {
1354 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1355 			if (master_priv->sarea_priv)
1356 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1357 		}
1358 
1359 		DRM_MSLEEP(1);
1360 
1361 		ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1362 		if (ret) {
1363 			CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1364 			return ret;
1365 		}
1366 	} while (!time_after(jiffies, end));
1367 	CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1368 	return -EBUSY;
1369 }
1370 
intel_wrap_ring_buffer(struct intel_ring_buffer * ring)1371 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1372 {
1373 	uint32_t __iomem *virt;
1374 	int rem = ring->size - ring->tail;
1375 
1376 	if (ring->space < rem) {
1377 		int ret = ring_wait_for_space(ring, rem);
1378 		if (ret)
1379 			return ret;
1380 	}
1381 
1382 	virt = (uint32_t *)((char *)ring->virtual_start + ring->tail);
1383 	rem /= 4;
1384 	while (rem--)
1385 		iowrite32(MI_NOOP, virt++);
1386 
1387 	ring->tail = 0;
1388 	ring->space = ring_space(ring);
1389 
1390 	return 0;
1391 }
1392 
intel_ring_idle(struct intel_ring_buffer * ring)1393 int intel_ring_idle(struct intel_ring_buffer *ring)
1394 {
1395 	u32 seqno;
1396 	int ret;
1397 
1398 	/* We need to add any requests required to flush the objects and ring */
1399 	if (ring->outstanding_lazy_request) {
1400 		ret = i915_add_request(ring, NULL, NULL);
1401 		if (ret)
1402 			return ret;
1403 	}
1404 
1405 	/* Wait upon the last request to be completed */
1406 	if (list_empty(&ring->request_list))
1407 		return 0;
1408 
1409 	seqno = list_entry(ring->request_list.prev,
1410 			   struct drm_i915_gem_request,
1411 			   list)->seqno;
1412 
1413 	return i915_wait_seqno(ring, seqno);
1414 }
1415 
1416 static int
intel_ring_alloc_seqno(struct intel_ring_buffer * ring)1417 intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1418 {
1419 	if (ring->outstanding_lazy_request)
1420 		return 0;
1421 
1422 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1423 }
1424 
intel_ring_begin(struct intel_ring_buffer * ring,int num_dwords)1425 int intel_ring_begin(struct intel_ring_buffer *ring,
1426 		     int num_dwords)
1427 {
1428 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1429 	int n = 4*num_dwords;
1430 	int ret;
1431 
1432 	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1433 	if (ret)
1434 		return ret;
1435 
1436 	/* Preallocate the olr before touching the ring */
1437 	ret = intel_ring_alloc_seqno(ring);
1438 	if (ret)
1439 		return ret;
1440 
1441 	if (unlikely(ring->tail + n > ring->effective_size)) {
1442 		ret = intel_wrap_ring_buffer(ring);
1443 		if (unlikely(ret))
1444 			return ret;
1445 	}
1446 
1447 	if (unlikely(ring->space < n)) {
1448 		ret = ring_wait_for_space(ring, n);
1449 		if (unlikely(ret))
1450 			return ret;
1451 	}
1452 
1453 	ring->space -= n;
1454 	return 0;
1455 }
1456 
intel_ring_advance(struct intel_ring_buffer * ring)1457 void intel_ring_advance(struct intel_ring_buffer *ring)
1458 {
1459 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1460 
1461 	ring->tail &= ring->size - 1;
1462 	if (dev_priv->stop_rings & intel_ring_flag(ring))
1463 		return;
1464 	ring->write_tail(ring, ring->tail);
1465 }
1466 
1467 
gen6_bsd_ring_write_tail(struct intel_ring_buffer * ring,u32 value)1468 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1469 				     u32 value)
1470 {
1471 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1472 
1473        /* Every tail move must follow the sequence below */
1474 
1475 	/* Disable notification that the ring is IDLE. The GT
1476 	 * will then assume that it is busy and bring it out of rc6.
1477 	 */
1478 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1479 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1480 
1481 	/* Clear the context id. Here be magic! */
1482 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1483 
1484 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1485 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1486 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1487 		     50))
1488 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1489 
1490 	/* Now that the ring is fully powered up, update the tail */
1491 	I915_WRITE_TAIL(ring, value);
1492 	POSTING_READ(RING_TAIL(ring->mmio_base));
1493 
1494 	/* Let the ring send IDLE messages to the GT again,
1495 	 * and so let it sleep to conserve power when idle.
1496 	 */
1497 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1498 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1499 }
1500 
gen6_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1501 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1502 			   u32 invalidate, u32 flush)
1503 {
1504 	uint32_t cmd;
1505 	int ret;
1506 
1507 	ret = intel_ring_begin(ring, 4);
1508 	if (ret)
1509 		return ret;
1510 
1511 	cmd = MI_FLUSH_DW;
1512 	/*
1513 	 * Bspec vol 1c.5 - video engine command streamer:
1514 	 * "If ENABLED, all TLBs will be invalidated once the flush
1515 	 * operation is complete. This bit is only valid when the
1516 	 * Post-Sync Operation field is a value of 1h or 3h."
1517 	 */
1518 	if (invalidate & I915_GEM_GPU_DOMAINS)
1519 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1520 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1521 	intel_ring_emit(ring, cmd);
1522 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1523 	intel_ring_emit(ring, 0);
1524 	intel_ring_emit(ring, MI_NOOP);
1525 	intel_ring_advance(ring);
1526 	return 0;
1527 }
1528 
1529 static int
hsw_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)1530 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1531 			      u32 offset, u32 len,
1532 			      unsigned flags)
1533 {
1534 	int ret;
1535 
1536 	ret = intel_ring_begin(ring, 2);
1537 	if (ret)
1538 		return ret;
1539 
1540 	intel_ring_emit(ring,
1541 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1542 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1543 	/* bit0-7 is the length on GEN6+ */
1544 	intel_ring_emit(ring, offset);
1545 	intel_ring_advance(ring);
1546 
1547 	return 0;
1548 }
1549 
1550 static int
gen6_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)1551 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1552 			      u32 offset, u32 len,
1553 			      unsigned flags)
1554 {
1555 	int ret;
1556 
1557 	ret = intel_ring_begin(ring, 2);
1558 	if (ret)
1559 		return ret;
1560 
1561 	intel_ring_emit(ring,
1562 			MI_BATCH_BUFFER_START |
1563 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1564 	/* bit0-7 is the length on GEN6+ */
1565 	intel_ring_emit(ring, offset);
1566 	intel_ring_advance(ring);
1567 
1568 	return 0;
1569 }
1570 
1571 /* Blitter support (SandyBridge+) */
1572 
blt_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1573 static int blt_ring_flush(struct intel_ring_buffer *ring,
1574 			  u32 invalidate, u32 flush)
1575 {
1576 	uint32_t cmd;
1577 	int ret;
1578 
1579 	ret = intel_ring_begin(ring, 4);
1580 	if (ret)
1581 		return ret;
1582 
1583 	cmd = MI_FLUSH_DW;
1584 	/*
1585 	 * Bspec vol 1c.3 - blitter engine command streamer:
1586 	 * "If ENABLED, all TLBs will be invalidated once the flush
1587 	 * operation is complete. This bit is only valid when the
1588 	 * Post-Sync Operation field is a value of 1h or 3h."
1589 	 */
1590 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1591 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1592 			MI_FLUSH_DW_OP_STOREDW;
1593 	intel_ring_emit(ring, cmd);
1594 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1595 	intel_ring_emit(ring, 0);
1596 	intel_ring_emit(ring, MI_NOOP);
1597 	intel_ring_advance(ring);
1598 	return 0;
1599 }
1600 
intel_init_render_ring_buffer(struct drm_device * dev)1601 int intel_init_render_ring_buffer(struct drm_device *dev)
1602 {
1603 	drm_i915_private_t *dev_priv = dev->dev_private;
1604 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1605 
1606 	ring->name = "render ring";
1607 	ring->id = RCS;
1608 	ring->mmio_base = RENDER_RING_BASE;
1609 
1610 	if (INTEL_INFO(dev)->gen >= 6) {
1611 		ring->add_request = gen6_add_request;
1612 		ring->flush = gen7_render_ring_flush;
1613 		if (INTEL_INFO(dev)->gen == 6)
1614 			ring->flush = gen6_render_ring_flush;
1615 		ring->irq_get = gen6_ring_get_irq;
1616 		ring->irq_put = gen6_ring_put_irq;
1617 		ring->irq_enable_mask = GT_USER_INTERRUPT;
1618 		ring->get_seqno = gen6_ring_get_seqno;
1619 		ring->sync_to = gen6_ring_sync;
1620 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1621 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1622 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1623 		ring->signal_mbox[0] = GEN6_VRSYNC;
1624 		ring->signal_mbox[1] = GEN6_BRSYNC;
1625 	} else if (IS_GEN5(dev)) {
1626 		ring->add_request = pc_render_add_request;
1627 		ring->flush = gen4_render_ring_flush;
1628 		ring->get_seqno = pc_render_get_seqno;
1629 		ring->irq_get = gen5_ring_get_irq;
1630 		ring->irq_put = gen5_ring_put_irq;
1631 		ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1632 	} else {
1633 		ring->add_request = i9xx_add_request;
1634 		if (INTEL_INFO(dev)->gen < 4)
1635 			ring->flush = gen2_render_ring_flush;
1636 		else
1637 			ring->flush = gen4_render_ring_flush;
1638 		ring->get_seqno = ring_get_seqno;
1639 		if (IS_GEN2(dev)) {
1640 			ring->irq_get = i8xx_ring_get_irq;
1641 			ring->irq_put = i8xx_ring_put_irq;
1642 		} else {
1643 			ring->irq_get = i9xx_ring_get_irq;
1644 			ring->irq_put = i9xx_ring_put_irq;
1645 		}
1646 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1647 	}
1648 	ring->write_tail = ring_write_tail;
1649 	if (IS_HASWELL(dev))
1650 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1651 	else if (INTEL_INFO(dev)->gen >= 6)
1652 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1653 	else if (INTEL_INFO(dev)->gen >= 4)
1654 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1655 	else if (IS_I830(dev) || IS_845G(dev))
1656 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1657 	else
1658 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1659 	ring->init = init_render_ring;
1660 	ring->cleanup = render_ring_cleanup;
1661 
1662 	/* Workaround batchbuffer to combat CS tlb bug. */
1663 	if (HAS_BROKEN_CS_TLB(dev)) {
1664 		struct drm_i915_gem_object *obj;
1665 		int ret;
1666 
1667 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1668 		if (obj == NULL) {
1669 			DRM_ERROR("Failed to allocate batch bo\n");
1670 			return -ENOMEM;
1671 		}
1672 
1673 		ret = i915_gem_object_pin(obj, 0, true, false);
1674 		if (ret != 0) {
1675 			drm_gem_object_unreference(&obj->base);
1676 			DRM_ERROR("Failed to ping batch bo\n");
1677 			return ret;
1678 		}
1679 
1680 		ring->private = obj;
1681 	}
1682 
1683 	return intel_init_ring_buffer(dev, ring);
1684 }
1685 
intel_render_ring_init_dri(struct drm_device * dev,u64 start,u32 size)1686 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1687 {
1688 	drm_i915_private_t *dev_priv = dev->dev_private;
1689 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1690 	int ret;
1691 
1692 	ring->name = "render ring";
1693 	ring->id = RCS;
1694 	ring->mmio_base = RENDER_RING_BASE;
1695 
1696 	if (INTEL_INFO(dev)->gen >= 6) {
1697 		/* non-kms not supported on gen6+ */
1698 		return -ENODEV;
1699 	}
1700 
1701 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1702 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1703 	 * the special gen5 functions. */
1704 	ring->add_request = i9xx_add_request;
1705 	if (INTEL_INFO(dev)->gen < 4)
1706 		ring->flush = gen2_render_ring_flush;
1707 	else
1708 		ring->flush = gen4_render_ring_flush;
1709 	ring->get_seqno = ring_get_seqno;
1710 	if (IS_GEN2(dev)) {
1711 		ring->irq_get = i8xx_ring_get_irq;
1712 		ring->irq_put = i8xx_ring_put_irq;
1713 	} else {
1714 		ring->irq_get = i9xx_ring_get_irq;
1715 		ring->irq_put = i9xx_ring_put_irq;
1716 	}
1717 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1718 	ring->write_tail = ring_write_tail;
1719 	if (INTEL_INFO(dev)->gen >= 4)
1720 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1721 	else if (IS_I830(dev) || IS_845G(dev))
1722 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1723 	else
1724 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1725 	ring->init = init_render_ring;
1726 	ring->cleanup = render_ring_cleanup;
1727 
1728 	ring->dev = dev;
1729 	INIT_LIST_HEAD(&ring->active_list);
1730 	INIT_LIST_HEAD(&ring->request_list);
1731 
1732 	ring->size = size;
1733 	ring->effective_size = ring->size;
1734 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1735 		ring->effective_size -= 128;
1736 
1737 	ring->virtual_start = pmap_mapdev_attr(start, size,
1738 	    VM_MEMATTR_WRITE_COMBINING);
1739 	if (ring->virtual_start == NULL) {
1740 		DRM_ERROR("can not ioremap virtual address for"
1741 			  " ring buffer\n");
1742 		return -ENOMEM;
1743 	}
1744 
1745 	if (!I915_NEED_GFX_HWS(dev)) {
1746 		ret = init_phys_hws_pga(ring);
1747 		if (ret)
1748 			return ret;
1749 	}
1750 
1751 	return 0;
1752 }
1753 
intel_init_bsd_ring_buffer(struct drm_device * dev)1754 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1755 {
1756 	drm_i915_private_t *dev_priv = dev->dev_private;
1757 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1758 
1759 	ring->name = "bsd ring";
1760 	ring->id = VCS;
1761 
1762 	ring->write_tail = ring_write_tail;
1763 	if (IS_GEN6(dev) || IS_GEN7(dev)) {
1764 		ring->mmio_base = GEN6_BSD_RING_BASE;
1765 		/* gen6 bsd needs a special wa for tail updates */
1766 		if (IS_GEN6(dev))
1767 			ring->write_tail = gen6_bsd_ring_write_tail;
1768 		ring->flush = gen6_ring_flush;
1769 		ring->add_request = gen6_add_request;
1770 		ring->get_seqno = gen6_ring_get_seqno;
1771 		ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1772 		ring->irq_get = gen6_ring_get_irq;
1773 		ring->irq_put = gen6_ring_put_irq;
1774 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1775 		ring->sync_to = gen6_ring_sync;
1776 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1777 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1778 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1779 		ring->signal_mbox[0] = GEN6_RVSYNC;
1780 		ring->signal_mbox[1] = GEN6_BVSYNC;
1781 	} else {
1782 		ring->mmio_base = BSD_RING_BASE;
1783 		ring->flush = bsd_ring_flush;
1784 		ring->add_request = i9xx_add_request;
1785 		ring->get_seqno = ring_get_seqno;
1786 		if (IS_GEN5(dev)) {
1787 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1788 			ring->irq_get = gen5_ring_get_irq;
1789 			ring->irq_put = gen5_ring_put_irq;
1790 		} else {
1791 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1792 			ring->irq_get = i9xx_ring_get_irq;
1793 			ring->irq_put = i9xx_ring_put_irq;
1794 		}
1795 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1796 	}
1797 	ring->init = init_ring_common;
1798 
1799 	return intel_init_ring_buffer(dev, ring);
1800 }
1801 
intel_init_blt_ring_buffer(struct drm_device * dev)1802 int intel_init_blt_ring_buffer(struct drm_device *dev)
1803 {
1804 	drm_i915_private_t *dev_priv = dev->dev_private;
1805 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1806 
1807 	ring->name = "blitter ring";
1808 	ring->id = BCS;
1809 
1810 	ring->mmio_base = BLT_RING_BASE;
1811 	ring->write_tail = ring_write_tail;
1812 	ring->flush = blt_ring_flush;
1813 	ring->add_request = gen6_add_request;
1814 	ring->get_seqno = gen6_ring_get_seqno;
1815 	ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1816 	ring->irq_get = gen6_ring_get_irq;
1817 	ring->irq_put = gen6_ring_put_irq;
1818 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1819 	ring->sync_to = gen6_ring_sync;
1820 	ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1821 	ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1822 	ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1823 	ring->signal_mbox[0] = GEN6_RBSYNC;
1824 	ring->signal_mbox[1] = GEN6_VBSYNC;
1825 	ring->init = init_ring_common;
1826 
1827 	return intel_init_ring_buffer(dev, ring);
1828 }
1829 
1830 int
intel_ring_flush_all_caches(struct intel_ring_buffer * ring)1831 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1832 {
1833 	int ret;
1834 
1835 	if (!ring->gpu_caches_dirty)
1836 		return 0;
1837 
1838 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1839 	if (ret)
1840 		return ret;
1841 
1842 	ring->gpu_caches_dirty = false;
1843 	return 0;
1844 }
1845 
1846 int
intel_ring_invalidate_all_caches(struct intel_ring_buffer * ring)1847 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1848 {
1849 	uint32_t flush_domains;
1850 	int ret;
1851 
1852 	flush_domains = 0;
1853 	if (ring->gpu_caches_dirty)
1854 		flush_domains = I915_GEM_GPU_DOMAINS;
1855 
1856 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1857 	if (ret)
1858 		return ret;
1859 
1860 	ring->gpu_caches_dirty = false;
1861 	return 0;
1862 }
1863