xref: /linux-6.15/kernel/trace/ring_buffer.c (revision dc4e2801)
17a8e76a3SSteven Rostedt /*
27a8e76a3SSteven Rostedt  * Generic ring buffer
37a8e76a3SSteven Rostedt  *
47a8e76a3SSteven Rostedt  * Copyright (C) 2008 Steven Rostedt <[email protected]>
57a8e76a3SSteven Rostedt  */
6af658dcaSSteven Rostedt (Red Hat) #include <linux/trace_events.h>
77a8e76a3SSteven Rostedt #include <linux/ring_buffer.h>
814131f2fSIngo Molnar #include <linux/trace_clock.h>
9e6017571SIngo Molnar #include <linux/sched/clock.h>
100b07436dSSteven Rostedt #include <linux/trace_seq.h>
117a8e76a3SSteven Rostedt #include <linux/spinlock.h>
1215693458SSteven Rostedt (Red Hat) #include <linux/irq_work.h>
137a8e76a3SSteven Rostedt #include <linux/uaccess.h>
14a81bd80aSSteven Rostedt #include <linux/hardirq.h>
156c43e554SSteven Rostedt (Red Hat) #include <linux/kthread.h>	/* for self test */
167a8e76a3SSteven Rostedt #include <linux/module.h>
177a8e76a3SSteven Rostedt #include <linux/percpu.h>
187a8e76a3SSteven Rostedt #include <linux/mutex.h>
196c43e554SSteven Rostedt (Red Hat) #include <linux/delay.h>
205a0e3ad6STejun Heo #include <linux/slab.h>
217a8e76a3SSteven Rostedt #include <linux/init.h>
227a8e76a3SSteven Rostedt #include <linux/hash.h>
237a8e76a3SSteven Rostedt #include <linux/list.h>
24554f786eSSteven Rostedt #include <linux/cpu.h>
257a8e76a3SSteven Rostedt 
2679615760SChristoph Lameter #include <asm/local.h>
27182e9f5fSSteven Rostedt 
2883f40318SVaibhav Nagarnaik static void update_pages_handler(struct work_struct *work);
2983f40318SVaibhav Nagarnaik 
30033601a3SSteven Rostedt /*
31d1b182a8SSteven Rostedt  * The ring buffer header is special. We must manually up keep it.
32d1b182a8SSteven Rostedt  */
33d1b182a8SSteven Rostedt int ring_buffer_print_entry_header(struct trace_seq *s)
34d1b182a8SSteven Rostedt {
35c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "# compressed entry header\n");
36c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "\ttype_len    :    5 bits\n");
37c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "\ttime_delta  :   27 bits\n");
38c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "\tarray       :   32 bits\n");
39c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_putc(s, '\n');
40c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tpadding     : type == %d\n",
41d1b182a8SSteven Rostedt 			 RINGBUF_TYPE_PADDING);
42c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\ttime_extend : type == %d\n",
43d1b182a8SSteven Rostedt 			 RINGBUF_TYPE_TIME_EXTEND);
44*dc4e2801STom Zanussi 	trace_seq_printf(s, "\ttime_stamp : type == %d\n",
45*dc4e2801STom Zanussi 			 RINGBUF_TYPE_TIME_STAMP);
46c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tdata max type_len  == %d\n",
47334d4169SLai Jiangshan 			 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
48d1b182a8SSteven Rostedt 
49c0cd93aaSSteven Rostedt (Red Hat) 	return !trace_seq_has_overflowed(s);
50d1b182a8SSteven Rostedt }
51d1b182a8SSteven Rostedt 
52d1b182a8SSteven Rostedt /*
535cc98548SSteven Rostedt  * The ring buffer is made up of a list of pages. A separate list of pages is
545cc98548SSteven Rostedt  * allocated for each CPU. A writer may only write to a buffer that is
555cc98548SSteven Rostedt  * associated with the CPU it is currently executing on.  A reader may read
565cc98548SSteven Rostedt  * from any per cpu buffer.
575cc98548SSteven Rostedt  *
585cc98548SSteven Rostedt  * The reader is special. For each per cpu buffer, the reader has its own
595cc98548SSteven Rostedt  * reader page. When a reader has read the entire reader page, this reader
605cc98548SSteven Rostedt  * page is swapped with another page in the ring buffer.
615cc98548SSteven Rostedt  *
625cc98548SSteven Rostedt  * Now, as long as the writer is off the reader page, the reader can do what
635cc98548SSteven Rostedt  * ever it wants with that page. The writer will never write to that page
645cc98548SSteven Rostedt  * again (as long as it is out of the ring buffer).
655cc98548SSteven Rostedt  *
665cc98548SSteven Rostedt  * Here's some silly ASCII art.
675cc98548SSteven Rostedt  *
685cc98548SSteven Rostedt  *   +------+
695cc98548SSteven Rostedt  *   |reader|          RING BUFFER
705cc98548SSteven Rostedt  *   |page  |
715cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
725cc98548SSteven Rostedt  *                   |   |-->|   |-->|   |
735cc98548SSteven Rostedt  *                   +---+   +---+   +---+
745cc98548SSteven Rostedt  *                     ^               |
755cc98548SSteven Rostedt  *                     |               |
765cc98548SSteven Rostedt  *                     +---------------+
775cc98548SSteven Rostedt  *
785cc98548SSteven Rostedt  *
795cc98548SSteven Rostedt  *   +------+
805cc98548SSteven Rostedt  *   |reader|          RING BUFFER
815cc98548SSteven Rostedt  *   |page  |------------------v
825cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
835cc98548SSteven Rostedt  *                   |   |-->|   |-->|   |
845cc98548SSteven Rostedt  *                   +---+   +---+   +---+
855cc98548SSteven Rostedt  *                     ^               |
865cc98548SSteven Rostedt  *                     |               |
875cc98548SSteven Rostedt  *                     +---------------+
885cc98548SSteven Rostedt  *
895cc98548SSteven Rostedt  *
905cc98548SSteven Rostedt  *   +------+
915cc98548SSteven Rostedt  *   |reader|          RING BUFFER
925cc98548SSteven Rostedt  *   |page  |------------------v
935cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
945cc98548SSteven Rostedt  *      ^            |   |-->|   |-->|   |
955cc98548SSteven Rostedt  *      |            +---+   +---+   +---+
965cc98548SSteven Rostedt  *      |                              |
975cc98548SSteven Rostedt  *      |                              |
985cc98548SSteven Rostedt  *      +------------------------------+
995cc98548SSteven Rostedt  *
1005cc98548SSteven Rostedt  *
1015cc98548SSteven Rostedt  *   +------+
1025cc98548SSteven Rostedt  *   |buffer|          RING BUFFER
1035cc98548SSteven Rostedt  *   |page  |------------------v
1045cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
1055cc98548SSteven Rostedt  *      ^            |   |   |   |-->|   |
1065cc98548SSteven Rostedt  *      |   New      +---+   +---+   +---+
1075cc98548SSteven Rostedt  *      |  Reader------^               |
1085cc98548SSteven Rostedt  *      |   page                       |
1095cc98548SSteven Rostedt  *      +------------------------------+
1105cc98548SSteven Rostedt  *
1115cc98548SSteven Rostedt  *
1125cc98548SSteven Rostedt  * After we make this swap, the reader can hand this page off to the splice
1135cc98548SSteven Rostedt  * code and be done with it. It can even allocate a new page if it needs to
1145cc98548SSteven Rostedt  * and swap that into the ring buffer.
1155cc98548SSteven Rostedt  *
1165cc98548SSteven Rostedt  * We will be using cmpxchg soon to make all this lockless.
1175cc98548SSteven Rostedt  *
1185cc98548SSteven Rostedt  */
1195cc98548SSteven Rostedt 
120499e5470SSteven Rostedt /* Used for individual buffers (after the counter) */
121499e5470SSteven Rostedt #define RB_BUFFER_OFF		(1 << 20)
122499e5470SSteven Rostedt 
123474d32b6SSteven Rostedt #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
124474d32b6SSteven Rostedt 
125e3d6bf0aSSteven Rostedt #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
12667d34724SAndrew Morton #define RB_ALIGNMENT		4U
127334d4169SLai Jiangshan #define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
128c7b09308SSteven Rostedt #define RB_EVNT_MIN_SIZE	8U	/* two 32bit words */
129334d4169SLai Jiangshan 
130649508f6SJames Hogan #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
1312271048dSSteven Rostedt # define RB_FORCE_8BYTE_ALIGNMENT	0
1322271048dSSteven Rostedt # define RB_ARCH_ALIGNMENT		RB_ALIGNMENT
1332271048dSSteven Rostedt #else
1342271048dSSteven Rostedt # define RB_FORCE_8BYTE_ALIGNMENT	1
1352271048dSSteven Rostedt # define RB_ARCH_ALIGNMENT		8U
1362271048dSSteven Rostedt #endif
1372271048dSSteven Rostedt 
138649508f6SJames Hogan #define RB_ALIGN_DATA		__aligned(RB_ARCH_ALIGNMENT)
139649508f6SJames Hogan 
140334d4169SLai Jiangshan /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
141334d4169SLai Jiangshan #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
1427a8e76a3SSteven Rostedt 
1437a8e76a3SSteven Rostedt enum {
1447a8e76a3SSteven Rostedt 	RB_LEN_TIME_EXTEND = 8,
145*dc4e2801STom Zanussi 	RB_LEN_TIME_STAMP =  8,
1467a8e76a3SSteven Rostedt };
1477a8e76a3SSteven Rostedt 
14869d1b839SSteven Rostedt #define skip_time_extend(event) \
14969d1b839SSteven Rostedt 	((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
15069d1b839SSteven Rostedt 
151*dc4e2801STom Zanussi #define extended_time(event) \
152*dc4e2801STom Zanussi 	(event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
153*dc4e2801STom Zanussi 
1542d622719STom Zanussi static inline int rb_null_event(struct ring_buffer_event *event)
1552d622719STom Zanussi {
156a1863c21SSteven Rostedt 	return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
1572d622719STom Zanussi }
1582d622719STom Zanussi 
1592d622719STom Zanussi static void rb_event_set_padding(struct ring_buffer_event *event)
1602d622719STom Zanussi {
161a1863c21SSteven Rostedt 	/* padding has a NULL time_delta */
162334d4169SLai Jiangshan 	event->type_len = RINGBUF_TYPE_PADDING;
1632d622719STom Zanussi 	event->time_delta = 0;
1642d622719STom Zanussi }
1652d622719STom Zanussi 
1662d622719STom Zanussi static unsigned
1672d622719STom Zanussi rb_event_data_length(struct ring_buffer_event *event)
1682d622719STom Zanussi {
1692d622719STom Zanussi 	unsigned length;
1702d622719STom Zanussi 
171334d4169SLai Jiangshan 	if (event->type_len)
172334d4169SLai Jiangshan 		length = event->type_len * RB_ALIGNMENT;
1732d622719STom Zanussi 	else
1742d622719STom Zanussi 		length = event->array[0];
1752d622719STom Zanussi 	return length + RB_EVNT_HDR_SIZE;
1762d622719STom Zanussi }
1772d622719STom Zanussi 
17869d1b839SSteven Rostedt /*
17969d1b839SSteven Rostedt  * Return the length of the given event. Will return
18069d1b839SSteven Rostedt  * the length of the time extend if the event is a
18169d1b839SSteven Rostedt  * time extend.
18269d1b839SSteven Rostedt  */
18369d1b839SSteven Rostedt static inline unsigned
1847a8e76a3SSteven Rostedt rb_event_length(struct ring_buffer_event *event)
1857a8e76a3SSteven Rostedt {
186334d4169SLai Jiangshan 	switch (event->type_len) {
1877a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
1882d622719STom Zanussi 		if (rb_null_event(event))
1897a8e76a3SSteven Rostedt 			/* undefined */
1907a8e76a3SSteven Rostedt 			return -1;
191334d4169SLai Jiangshan 		return  event->array[0] + RB_EVNT_HDR_SIZE;
1927a8e76a3SSteven Rostedt 
1937a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
1947a8e76a3SSteven Rostedt 		return RB_LEN_TIME_EXTEND;
1957a8e76a3SSteven Rostedt 
1967a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
1977a8e76a3SSteven Rostedt 		return RB_LEN_TIME_STAMP;
1987a8e76a3SSteven Rostedt 
1997a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
2002d622719STom Zanussi 		return rb_event_data_length(event);
2017a8e76a3SSteven Rostedt 	default:
2027a8e76a3SSteven Rostedt 		BUG();
2037a8e76a3SSteven Rostedt 	}
2047a8e76a3SSteven Rostedt 	/* not hit */
2057a8e76a3SSteven Rostedt 	return 0;
2067a8e76a3SSteven Rostedt }
2077a8e76a3SSteven Rostedt 
20869d1b839SSteven Rostedt /*
20969d1b839SSteven Rostedt  * Return total length of time extend and data,
21069d1b839SSteven Rostedt  *   or just the event length for all other events.
21169d1b839SSteven Rostedt  */
21269d1b839SSteven Rostedt static inline unsigned
21369d1b839SSteven Rostedt rb_event_ts_length(struct ring_buffer_event *event)
21469d1b839SSteven Rostedt {
21569d1b839SSteven Rostedt 	unsigned len = 0;
21669d1b839SSteven Rostedt 
217*dc4e2801STom Zanussi 	if (extended_time(event)) {
21869d1b839SSteven Rostedt 		/* time extends include the data event after it */
21969d1b839SSteven Rostedt 		len = RB_LEN_TIME_EXTEND;
22069d1b839SSteven Rostedt 		event = skip_time_extend(event);
22169d1b839SSteven Rostedt 	}
22269d1b839SSteven Rostedt 	return len + rb_event_length(event);
22369d1b839SSteven Rostedt }
22469d1b839SSteven Rostedt 
2257a8e76a3SSteven Rostedt /**
2267a8e76a3SSteven Rostedt  * ring_buffer_event_length - return the length of the event
2277a8e76a3SSteven Rostedt  * @event: the event to get the length of
22869d1b839SSteven Rostedt  *
22969d1b839SSteven Rostedt  * Returns the size of the data load of a data event.
23069d1b839SSteven Rostedt  * If the event is something other than a data event, it
23169d1b839SSteven Rostedt  * returns the size of the event itself. With the exception
23269d1b839SSteven Rostedt  * of a TIME EXTEND, where it still returns the size of the
23369d1b839SSteven Rostedt  * data load of the data event after it.
2347a8e76a3SSteven Rostedt  */
2357a8e76a3SSteven Rostedt unsigned ring_buffer_event_length(struct ring_buffer_event *event)
2367a8e76a3SSteven Rostedt {
23769d1b839SSteven Rostedt 	unsigned length;
23869d1b839SSteven Rostedt 
239*dc4e2801STom Zanussi 	if (extended_time(event))
24069d1b839SSteven Rostedt 		event = skip_time_extend(event);
24169d1b839SSteven Rostedt 
24269d1b839SSteven Rostedt 	length = rb_event_length(event);
243334d4169SLai Jiangshan 	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
244465634adSRobert Richter 		return length;
245465634adSRobert Richter 	length -= RB_EVNT_HDR_SIZE;
246465634adSRobert Richter 	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
247465634adSRobert Richter                 length -= sizeof(event->array[0]);
248465634adSRobert Richter 	return length;
2497a8e76a3SSteven Rostedt }
250c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_event_length);
2517a8e76a3SSteven Rostedt 
2527a8e76a3SSteven Rostedt /* inline for ring buffer fast paths */
253929ddbf3SSteven Rostedt (Red Hat) static __always_inline void *
2547a8e76a3SSteven Rostedt rb_event_data(struct ring_buffer_event *event)
2557a8e76a3SSteven Rostedt {
256*dc4e2801STom Zanussi 	if (extended_time(event))
25769d1b839SSteven Rostedt 		event = skip_time_extend(event);
258334d4169SLai Jiangshan 	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
2597a8e76a3SSteven Rostedt 	/* If length is in len field, then array[0] has the data */
260334d4169SLai Jiangshan 	if (event->type_len)
2617a8e76a3SSteven Rostedt 		return (void *)&event->array[0];
2627a8e76a3SSteven Rostedt 	/* Otherwise length is in array[0] and array[1] has the data */
2637a8e76a3SSteven Rostedt 	return (void *)&event->array[1];
2647a8e76a3SSteven Rostedt }
2657a8e76a3SSteven Rostedt 
2667a8e76a3SSteven Rostedt /**
2677a8e76a3SSteven Rostedt  * ring_buffer_event_data - return the data of the event
2687a8e76a3SSteven Rostedt  * @event: the event to get the data from
2697a8e76a3SSteven Rostedt  */
2707a8e76a3SSteven Rostedt void *ring_buffer_event_data(struct ring_buffer_event *event)
2717a8e76a3SSteven Rostedt {
2727a8e76a3SSteven Rostedt 	return rb_event_data(event);
2737a8e76a3SSteven Rostedt }
274c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_event_data);
2757a8e76a3SSteven Rostedt 
2767a8e76a3SSteven Rostedt #define for_each_buffer_cpu(buffer, cpu)		\
2779e01c1b7SRusty Russell 	for_each_cpu(cpu, buffer->cpumask)
2787a8e76a3SSteven Rostedt 
2797a8e76a3SSteven Rostedt #define TS_SHIFT	27
2807a8e76a3SSteven Rostedt #define TS_MASK		((1ULL << TS_SHIFT) - 1)
2817a8e76a3SSteven Rostedt #define TS_DELTA_TEST	(~TS_MASK)
2827a8e76a3SSteven Rostedt 
283*dc4e2801STom Zanussi /**
284*dc4e2801STom Zanussi  * ring_buffer_event_time_stamp - return the event's extended timestamp
285*dc4e2801STom Zanussi  * @event: the event to get the timestamp of
286*dc4e2801STom Zanussi  *
287*dc4e2801STom Zanussi  * Returns the extended timestamp associated with a data event.
288*dc4e2801STom Zanussi  * An extended time_stamp is a 64-bit timestamp represented
289*dc4e2801STom Zanussi  * internally in a special way that makes the best use of space
290*dc4e2801STom Zanussi  * contained within a ring buffer event.  This function decodes
291*dc4e2801STom Zanussi  * it and maps it to a straight u64 value.
292*dc4e2801STom Zanussi  */
293*dc4e2801STom Zanussi u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
294*dc4e2801STom Zanussi {
295*dc4e2801STom Zanussi 	u64 ts;
296*dc4e2801STom Zanussi 
297*dc4e2801STom Zanussi 	ts = event->array[0];
298*dc4e2801STom Zanussi 	ts <<= TS_SHIFT;
299*dc4e2801STom Zanussi 	ts += event->time_delta;
300*dc4e2801STom Zanussi 
301*dc4e2801STom Zanussi 	return ts;
302*dc4e2801STom Zanussi }
303*dc4e2801STom Zanussi 
30466a8cb95SSteven Rostedt /* Flag when events were overwritten */
30566a8cb95SSteven Rostedt #define RB_MISSED_EVENTS	(1 << 31)
306ff0ff84aSSteven Rostedt /* Missed count stored at end */
307ff0ff84aSSteven Rostedt #define RB_MISSED_STORED	(1 << 30)
30866a8cb95SSteven Rostedt 
30945d8b80cSSteven Rostedt (VMware) #define RB_MISSED_FLAGS		(RB_MISSED_EVENTS|RB_MISSED_STORED)
31045d8b80cSSteven Rostedt (VMware) 
311abc9b56dSSteven Rostedt struct buffer_data_page {
3127a8e76a3SSteven Rostedt 	u64		 time_stamp;	/* page time stamp */
313c3706f00SWenji Huang 	local_t		 commit;	/* write committed index */
314649508f6SJames Hogan 	unsigned char	 data[] RB_ALIGN_DATA;	/* data of buffer page */
315abc9b56dSSteven Rostedt };
316abc9b56dSSteven Rostedt 
31777ae365eSSteven Rostedt /*
31877ae365eSSteven Rostedt  * Note, the buffer_page list must be first. The buffer pages
31977ae365eSSteven Rostedt  * are allocated in cache lines, which means that each buffer
32077ae365eSSteven Rostedt  * page will be at the beginning of a cache line, and thus
32177ae365eSSteven Rostedt  * the least significant bits will be zero. We use this to
32277ae365eSSteven Rostedt  * add flags in the list struct pointers, to make the ring buffer
32377ae365eSSteven Rostedt  * lockless.
32477ae365eSSteven Rostedt  */
325abc9b56dSSteven Rostedt struct buffer_page {
326778c55d4SSteven Rostedt 	struct list_head list;		/* list of buffer pages */
327abc9b56dSSteven Rostedt 	local_t		 write;		/* index for next write */
3286f807acdSSteven Rostedt 	unsigned	 read;		/* index for next read */
329778c55d4SSteven Rostedt 	local_t		 entries;	/* entries on this page */
330ff0ff84aSSteven Rostedt 	unsigned long	 real_end;	/* real end of data */
331abc9b56dSSteven Rostedt 	struct buffer_data_page *page;	/* Actual data page */
3327a8e76a3SSteven Rostedt };
3337a8e76a3SSteven Rostedt 
33477ae365eSSteven Rostedt /*
33577ae365eSSteven Rostedt  * The buffer page counters, write and entries, must be reset
33677ae365eSSteven Rostedt  * atomically when crossing page boundaries. To synchronize this
33777ae365eSSteven Rostedt  * update, two counters are inserted into the number. One is
33877ae365eSSteven Rostedt  * the actual counter for the write position or count on the page.
33977ae365eSSteven Rostedt  *
34077ae365eSSteven Rostedt  * The other is a counter of updaters. Before an update happens
34177ae365eSSteven Rostedt  * the update partition of the counter is incremented. This will
34277ae365eSSteven Rostedt  * allow the updater to update the counter atomically.
34377ae365eSSteven Rostedt  *
34477ae365eSSteven Rostedt  * The counter is 20 bits, and the state data is 12.
34577ae365eSSteven Rostedt  */
34677ae365eSSteven Rostedt #define RB_WRITE_MASK		0xfffff
34777ae365eSSteven Rostedt #define RB_WRITE_INTCNT		(1 << 20)
34877ae365eSSteven Rostedt 
349044fa782SSteven Rostedt static void rb_init_page(struct buffer_data_page *bpage)
350abc9b56dSSteven Rostedt {
351044fa782SSteven Rostedt 	local_set(&bpage->commit, 0);
352abc9b56dSSteven Rostedt }
353abc9b56dSSteven Rostedt 
354474d32b6SSteven Rostedt /**
355474d32b6SSteven Rostedt  * ring_buffer_page_len - the size of data on the page.
356474d32b6SSteven Rostedt  * @page: The page to read
357474d32b6SSteven Rostedt  *
358474d32b6SSteven Rostedt  * Returns the amount of data on the page, including buffer page header.
359474d32b6SSteven Rostedt  */
360ef7a4a16SSteven Rostedt size_t ring_buffer_page_len(void *page)
361ef7a4a16SSteven Rostedt {
36245d8b80cSSteven Rostedt (VMware) 	struct buffer_data_page *bpage = page;
36345d8b80cSSteven Rostedt (VMware) 
36445d8b80cSSteven Rostedt (VMware) 	return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
365474d32b6SSteven Rostedt 		+ BUF_PAGE_HDR_SIZE;
366ef7a4a16SSteven Rostedt }
367ef7a4a16SSteven Rostedt 
3687a8e76a3SSteven Rostedt /*
369ed56829cSSteven Rostedt  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
370ed56829cSSteven Rostedt  * this issue out.
371ed56829cSSteven Rostedt  */
37234a148bfSAndrew Morton static void free_buffer_page(struct buffer_page *bpage)
373ed56829cSSteven Rostedt {
3746ae2a076SSteven Rostedt 	free_page((unsigned long)bpage->page);
375e4c2ce82SSteven Rostedt 	kfree(bpage);
376ed56829cSSteven Rostedt }
377ed56829cSSteven Rostedt 
378ed56829cSSteven Rostedt /*
3797a8e76a3SSteven Rostedt  * We need to fit the time_stamp delta into 27 bits.
3807a8e76a3SSteven Rostedt  */
3817a8e76a3SSteven Rostedt static inline int test_time_stamp(u64 delta)
3827a8e76a3SSteven Rostedt {
3837a8e76a3SSteven Rostedt 	if (delta & TS_DELTA_TEST)
3847a8e76a3SSteven Rostedt 		return 1;
3857a8e76a3SSteven Rostedt 	return 0;
3867a8e76a3SSteven Rostedt }
3877a8e76a3SSteven Rostedt 
388474d32b6SSteven Rostedt #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
3897a8e76a3SSteven Rostedt 
390be957c44SSteven Rostedt /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
391be957c44SSteven Rostedt #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
392be957c44SSteven Rostedt 
393d1b182a8SSteven Rostedt int ring_buffer_print_page_header(struct trace_seq *s)
394d1b182a8SSteven Rostedt {
395d1b182a8SSteven Rostedt 	struct buffer_data_page field;
396d1b182a8SSteven Rostedt 
397c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: u64 timestamp;\t"
39826a50744STom Zanussi 			 "offset:0;\tsize:%u;\tsigned:%u;\n",
39926a50744STom Zanussi 			 (unsigned int)sizeof(field.time_stamp),
40026a50744STom Zanussi 			 (unsigned int)is_signed_type(u64));
401d1b182a8SSteven Rostedt 
402c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: local_t commit;\t"
40326a50744STom Zanussi 			 "offset:%u;\tsize:%u;\tsigned:%u;\n",
404d1b182a8SSteven Rostedt 			 (unsigned int)offsetof(typeof(field), commit),
40526a50744STom Zanussi 			 (unsigned int)sizeof(field.commit),
40626a50744STom Zanussi 			 (unsigned int)is_signed_type(long));
407d1b182a8SSteven Rostedt 
408c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: int overwrite;\t"
40966a8cb95SSteven Rostedt 			 "offset:%u;\tsize:%u;\tsigned:%u;\n",
41066a8cb95SSteven Rostedt 			 (unsigned int)offsetof(typeof(field), commit),
41166a8cb95SSteven Rostedt 			 1,
41266a8cb95SSteven Rostedt 			 (unsigned int)is_signed_type(long));
41366a8cb95SSteven Rostedt 
414c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: char data;\t"
41526a50744STom Zanussi 			 "offset:%u;\tsize:%u;\tsigned:%u;\n",
416d1b182a8SSteven Rostedt 			 (unsigned int)offsetof(typeof(field), data),
41726a50744STom Zanussi 			 (unsigned int)BUF_PAGE_SIZE,
41826a50744STom Zanussi 			 (unsigned int)is_signed_type(char));
419d1b182a8SSteven Rostedt 
420c0cd93aaSSteven Rostedt (Red Hat) 	return !trace_seq_has_overflowed(s);
421d1b182a8SSteven Rostedt }
422d1b182a8SSteven Rostedt 
42315693458SSteven Rostedt (Red Hat) struct rb_irq_work {
42415693458SSteven Rostedt (Red Hat) 	struct irq_work			work;
42515693458SSteven Rostedt (Red Hat) 	wait_queue_head_t		waiters;
4261e0d6714SSteven Rostedt (Red Hat) 	wait_queue_head_t		full_waiters;
42715693458SSteven Rostedt (Red Hat) 	bool				waiters_pending;
4281e0d6714SSteven Rostedt (Red Hat) 	bool				full_waiters_pending;
4291e0d6714SSteven Rostedt (Red Hat) 	bool				wakeup_full;
43015693458SSteven Rostedt (Red Hat) };
43115693458SSteven Rostedt (Red Hat) 
4327a8e76a3SSteven Rostedt /*
433fcc742eaSSteven Rostedt (Red Hat)  * Structure to hold event state and handle nested events.
434fcc742eaSSteven Rostedt (Red Hat)  */
435fcc742eaSSteven Rostedt (Red Hat) struct rb_event_info {
436fcc742eaSSteven Rostedt (Red Hat) 	u64			ts;
437fcc742eaSSteven Rostedt (Red Hat) 	u64			delta;
438fcc742eaSSteven Rostedt (Red Hat) 	unsigned long		length;
439fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page	*tail_page;
440fcc742eaSSteven Rostedt (Red Hat) 	int			add_timestamp;
441fcc742eaSSteven Rostedt (Red Hat) };
442fcc742eaSSteven Rostedt (Red Hat) 
443fcc742eaSSteven Rostedt (Red Hat) /*
444a497adb4SSteven Rostedt (Red Hat)  * Used for which event context the event is in.
445a497adb4SSteven Rostedt (Red Hat)  *  NMI     = 0
446a497adb4SSteven Rostedt (Red Hat)  *  IRQ     = 1
447a497adb4SSteven Rostedt (Red Hat)  *  SOFTIRQ = 2
448a497adb4SSteven Rostedt (Red Hat)  *  NORMAL  = 3
449a497adb4SSteven Rostedt (Red Hat)  *
450a497adb4SSteven Rostedt (Red Hat)  * See trace_recursive_lock() comment below for more details.
451a497adb4SSteven Rostedt (Red Hat)  */
452a497adb4SSteven Rostedt (Red Hat) enum {
453a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_NMI,
454a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_IRQ,
455a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_SOFTIRQ,
456a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_NORMAL,
457a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_MAX
458a497adb4SSteven Rostedt (Red Hat) };
459a497adb4SSteven Rostedt (Red Hat) 
460a497adb4SSteven Rostedt (Red Hat) /*
4617a8e76a3SSteven Rostedt  * head_page == tail_page && head == tail then buffer is empty.
4627a8e76a3SSteven Rostedt  */
4637a8e76a3SSteven Rostedt struct ring_buffer_per_cpu {
4647a8e76a3SSteven Rostedt 	int				cpu;
465985023deSRichard Kennedy 	atomic_t			record_disabled;
4667a8e76a3SSteven Rostedt 	struct ring_buffer		*buffer;
4675389f6faSThomas Gleixner 	raw_spinlock_t			reader_lock;	/* serialize readers */
468445c8951SThomas Gleixner 	arch_spinlock_t			lock;
4697a8e76a3SSteven Rostedt 	struct lock_class_key		lock_key;
47073a757e6SSteven Rostedt (VMware) 	struct buffer_data_page		*free_page;
4719b94a8fbSSteven Rostedt (Red Hat) 	unsigned long			nr_pages;
47258a09ec6SSteven Rostedt (Red Hat) 	unsigned int			current_context;
4733adc54faSSteven Rostedt 	struct list_head		*pages;
4746f807acdSSteven Rostedt 	struct buffer_page		*head_page;	/* read from head */
4756f807acdSSteven Rostedt 	struct buffer_page		*tail_page;	/* write to tail */
476c3706f00SWenji Huang 	struct buffer_page		*commit_page;	/* committed pages */
477d769041fSSteven Rostedt 	struct buffer_page		*reader_page;
47866a8cb95SSteven Rostedt 	unsigned long			lost_events;
47966a8cb95SSteven Rostedt 	unsigned long			last_overrun;
480c64e148aSVaibhav Nagarnaik 	local_t				entries_bytes;
481e4906effSSteven Rostedt 	local_t				entries;
482884bfe89SSlava Pestov 	local_t				overrun;
483884bfe89SSlava Pestov 	local_t				commit_overrun;
484884bfe89SSlava Pestov 	local_t				dropped_events;
485fa743953SSteven Rostedt 	local_t				committing;
486fa743953SSteven Rostedt 	local_t				commits;
48777ae365eSSteven Rostedt 	unsigned long			read;
488c64e148aSVaibhav Nagarnaik 	unsigned long			read_bytes;
4897a8e76a3SSteven Rostedt 	u64				write_stamp;
4907a8e76a3SSteven Rostedt 	u64				read_stamp;
491438ced17SVaibhav Nagarnaik 	/* ring buffer pages to update, > 0 to add, < 0 to remove */
4929b94a8fbSSteven Rostedt (Red Hat) 	long				nr_pages_to_update;
493438ced17SVaibhav Nagarnaik 	struct list_head		new_pages; /* new pages to add */
49483f40318SVaibhav Nagarnaik 	struct work_struct		update_pages_work;
49505fdd70dSVaibhav Nagarnaik 	struct completion		update_done;
49615693458SSteven Rostedt (Red Hat) 
49715693458SSteven Rostedt (Red Hat) 	struct rb_irq_work		irq_work;
4987a8e76a3SSteven Rostedt };
4997a8e76a3SSteven Rostedt 
5007a8e76a3SSteven Rostedt struct ring_buffer {
5017a8e76a3SSteven Rostedt 	unsigned			flags;
5027a8e76a3SSteven Rostedt 	int				cpus;
5037a8e76a3SSteven Rostedt 	atomic_t			record_disabled;
50483f40318SVaibhav Nagarnaik 	atomic_t			resize_disabled;
50500f62f61SArnaldo Carvalho de Melo 	cpumask_var_t			cpumask;
5067a8e76a3SSteven Rostedt 
5071f8a6a10SPeter Zijlstra 	struct lock_class_key		*reader_lock_key;
5081f8a6a10SPeter Zijlstra 
5097a8e76a3SSteven Rostedt 	struct mutex			mutex;
5107a8e76a3SSteven Rostedt 
5117a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu	**buffers;
512554f786eSSteven Rostedt 
513b32614c0SSebastian Andrzej Siewior 	struct hlist_node		node;
51437886f6aSSteven Rostedt 	u64				(*clock)(void);
51515693458SSteven Rostedt (Red Hat) 
51615693458SSteven Rostedt (Red Hat) 	struct rb_irq_work		irq_work;
51700b41452STom Zanussi 	bool				time_stamp_abs;
5187a8e76a3SSteven Rostedt };
5197a8e76a3SSteven Rostedt 
5207a8e76a3SSteven Rostedt struct ring_buffer_iter {
5217a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu	*cpu_buffer;
5227a8e76a3SSteven Rostedt 	unsigned long			head;
5237a8e76a3SSteven Rostedt 	struct buffer_page		*head_page;
524492a74f4SSteven Rostedt 	struct buffer_page		*cache_reader_page;
525492a74f4SSteven Rostedt 	unsigned long			cache_read;
5267a8e76a3SSteven Rostedt 	u64				read_stamp;
5277a8e76a3SSteven Rostedt };
5287a8e76a3SSteven Rostedt 
52915693458SSteven Rostedt (Red Hat) /*
53015693458SSteven Rostedt (Red Hat)  * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
53115693458SSteven Rostedt (Red Hat)  *
53215693458SSteven Rostedt (Red Hat)  * Schedules a delayed work to wake up any task that is blocked on the
53315693458SSteven Rostedt (Red Hat)  * ring buffer waiters queue.
53415693458SSteven Rostedt (Red Hat)  */
53515693458SSteven Rostedt (Red Hat) static void rb_wake_up_waiters(struct irq_work *work)
53615693458SSteven Rostedt (Red Hat) {
53715693458SSteven Rostedt (Red Hat) 	struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
53815693458SSteven Rostedt (Red Hat) 
53915693458SSteven Rostedt (Red Hat) 	wake_up_all(&rbwork->waiters);
5401e0d6714SSteven Rostedt (Red Hat) 	if (rbwork->wakeup_full) {
5411e0d6714SSteven Rostedt (Red Hat) 		rbwork->wakeup_full = false;
5421e0d6714SSteven Rostedt (Red Hat) 		wake_up_all(&rbwork->full_waiters);
5431e0d6714SSteven Rostedt (Red Hat) 	}
54415693458SSteven Rostedt (Red Hat) }
54515693458SSteven Rostedt (Red Hat) 
54615693458SSteven Rostedt (Red Hat) /**
54715693458SSteven Rostedt (Red Hat)  * ring_buffer_wait - wait for input to the ring buffer
54815693458SSteven Rostedt (Red Hat)  * @buffer: buffer to wait on
54915693458SSteven Rostedt (Red Hat)  * @cpu: the cpu buffer to wait on
550e30f53aaSRabin Vincent  * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
55115693458SSteven Rostedt (Red Hat)  *
55215693458SSteven Rostedt (Red Hat)  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
55315693458SSteven Rostedt (Red Hat)  * as data is added to any of the @buffer's cpu buffers. Otherwise
55415693458SSteven Rostedt (Red Hat)  * it will wait for data to be added to a specific cpu buffer.
55515693458SSteven Rostedt (Red Hat)  */
556e30f53aaSRabin Vincent int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
55715693458SSteven Rostedt (Red Hat) {
558e30f53aaSRabin Vincent 	struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
55915693458SSteven Rostedt (Red Hat) 	DEFINE_WAIT(wait);
56015693458SSteven Rostedt (Red Hat) 	struct rb_irq_work *work;
561e30f53aaSRabin Vincent 	int ret = 0;
56215693458SSteven Rostedt (Red Hat) 
56315693458SSteven Rostedt (Red Hat) 	/*
56415693458SSteven Rostedt (Red Hat) 	 * Depending on what the caller is waiting for, either any
56515693458SSteven Rostedt (Red Hat) 	 * data in any cpu buffer, or a specific buffer, put the
56615693458SSteven Rostedt (Red Hat) 	 * caller on the appropriate wait queue.
56715693458SSteven Rostedt (Red Hat) 	 */
5681e0d6714SSteven Rostedt (Red Hat) 	if (cpu == RING_BUFFER_ALL_CPUS) {
56915693458SSteven Rostedt (Red Hat) 		work = &buffer->irq_work;
5701e0d6714SSteven Rostedt (Red Hat) 		/* Full only makes sense on per cpu reads */
5711e0d6714SSteven Rostedt (Red Hat) 		full = false;
5721e0d6714SSteven Rostedt (Red Hat) 	} else {
5738b8b3683SSteven Rostedt (Red Hat) 		if (!cpumask_test_cpu(cpu, buffer->cpumask))
5748b8b3683SSteven Rostedt (Red Hat) 			return -ENODEV;
57515693458SSteven Rostedt (Red Hat) 		cpu_buffer = buffer->buffers[cpu];
57615693458SSteven Rostedt (Red Hat) 		work = &cpu_buffer->irq_work;
57715693458SSteven Rostedt (Red Hat) 	}
57815693458SSteven Rostedt (Red Hat) 
57915693458SSteven Rostedt (Red Hat) 
580e30f53aaSRabin Vincent 	while (true) {
5811e0d6714SSteven Rostedt (Red Hat) 		if (full)
5821e0d6714SSteven Rostedt (Red Hat) 			prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
5831e0d6714SSteven Rostedt (Red Hat) 		else
58415693458SSteven Rostedt (Red Hat) 			prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
58515693458SSteven Rostedt (Red Hat) 
58615693458SSteven Rostedt (Red Hat) 		/*
58715693458SSteven Rostedt (Red Hat) 		 * The events can happen in critical sections where
58815693458SSteven Rostedt (Red Hat) 		 * checking a work queue can cause deadlocks.
58915693458SSteven Rostedt (Red Hat) 		 * After adding a task to the queue, this flag is set
59015693458SSteven Rostedt (Red Hat) 		 * only to notify events to try to wake up the queue
59115693458SSteven Rostedt (Red Hat) 		 * using irq_work.
59215693458SSteven Rostedt (Red Hat) 		 *
59315693458SSteven Rostedt (Red Hat) 		 * We don't clear it even if the buffer is no longer
59415693458SSteven Rostedt (Red Hat) 		 * empty. The flag only causes the next event to run
59515693458SSteven Rostedt (Red Hat) 		 * irq_work to do the work queue wake up. The worse
59615693458SSteven Rostedt (Red Hat) 		 * that can happen if we race with !trace_empty() is that
59715693458SSteven Rostedt (Red Hat) 		 * an event will cause an irq_work to try to wake up
59815693458SSteven Rostedt (Red Hat) 		 * an empty queue.
59915693458SSteven Rostedt (Red Hat) 		 *
60015693458SSteven Rostedt (Red Hat) 		 * There's no reason to protect this flag either, as
60115693458SSteven Rostedt (Red Hat) 		 * the work queue and irq_work logic will do the necessary
60215693458SSteven Rostedt (Red Hat) 		 * synchronization for the wake ups. The only thing
60315693458SSteven Rostedt (Red Hat) 		 * that is necessary is that the wake up happens after
60415693458SSteven Rostedt (Red Hat) 		 * a task has been queued. It's OK for spurious wake ups.
60515693458SSteven Rostedt (Red Hat) 		 */
6061e0d6714SSteven Rostedt (Red Hat) 		if (full)
6071e0d6714SSteven Rostedt (Red Hat) 			work->full_waiters_pending = true;
6081e0d6714SSteven Rostedt (Red Hat) 		else
60915693458SSteven Rostedt (Red Hat) 			work->waiters_pending = true;
61015693458SSteven Rostedt (Red Hat) 
611e30f53aaSRabin Vincent 		if (signal_pending(current)) {
612e30f53aaSRabin Vincent 			ret = -EINTR;
613e30f53aaSRabin Vincent 			break;
614e30f53aaSRabin Vincent 		}
615e30f53aaSRabin Vincent 
616e30f53aaSRabin Vincent 		if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
617e30f53aaSRabin Vincent 			break;
618e30f53aaSRabin Vincent 
619e30f53aaSRabin Vincent 		if (cpu != RING_BUFFER_ALL_CPUS &&
620e30f53aaSRabin Vincent 		    !ring_buffer_empty_cpu(buffer, cpu)) {
621e30f53aaSRabin Vincent 			unsigned long flags;
622e30f53aaSRabin Vincent 			bool pagebusy;
623e30f53aaSRabin Vincent 
624e30f53aaSRabin Vincent 			if (!full)
625e30f53aaSRabin Vincent 				break;
626e30f53aaSRabin Vincent 
627e30f53aaSRabin Vincent 			raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
628e30f53aaSRabin Vincent 			pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
629e30f53aaSRabin Vincent 			raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
630e30f53aaSRabin Vincent 
631e30f53aaSRabin Vincent 			if (!pagebusy)
632e30f53aaSRabin Vincent 				break;
633e30f53aaSRabin Vincent 		}
634e30f53aaSRabin Vincent 
63515693458SSteven Rostedt (Red Hat) 		schedule();
636e30f53aaSRabin Vincent 	}
63715693458SSteven Rostedt (Red Hat) 
6381e0d6714SSteven Rostedt (Red Hat) 	if (full)
6391e0d6714SSteven Rostedt (Red Hat) 		finish_wait(&work->full_waiters, &wait);
6401e0d6714SSteven Rostedt (Red Hat) 	else
64115693458SSteven Rostedt (Red Hat) 		finish_wait(&work->waiters, &wait);
642e30f53aaSRabin Vincent 
643e30f53aaSRabin Vincent 	return ret;
64415693458SSteven Rostedt (Red Hat) }
64515693458SSteven Rostedt (Red Hat) 
64615693458SSteven Rostedt (Red Hat) /**
64715693458SSteven Rostedt (Red Hat)  * ring_buffer_poll_wait - poll on buffer input
64815693458SSteven Rostedt (Red Hat)  * @buffer: buffer to wait on
64915693458SSteven Rostedt (Red Hat)  * @cpu: the cpu buffer to wait on
65015693458SSteven Rostedt (Red Hat)  * @filp: the file descriptor
65115693458SSteven Rostedt (Red Hat)  * @poll_table: The poll descriptor
65215693458SSteven Rostedt (Red Hat)  *
65315693458SSteven Rostedt (Red Hat)  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
65415693458SSteven Rostedt (Red Hat)  * as data is added to any of the @buffer's cpu buffers. Otherwise
65515693458SSteven Rostedt (Red Hat)  * it will wait for data to be added to a specific cpu buffer.
65615693458SSteven Rostedt (Red Hat)  *
657a9a08845SLinus Torvalds  * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers,
65815693458SSteven Rostedt (Red Hat)  * zero otherwise.
65915693458SSteven Rostedt (Red Hat)  */
660ecf92700SAl Viro __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
66115693458SSteven Rostedt (Red Hat) 			  struct file *filp, poll_table *poll_table)
66215693458SSteven Rostedt (Red Hat) {
66315693458SSteven Rostedt (Red Hat) 	struct ring_buffer_per_cpu *cpu_buffer;
66415693458SSteven Rostedt (Red Hat) 	struct rb_irq_work *work;
66515693458SSteven Rostedt (Red Hat) 
66615693458SSteven Rostedt (Red Hat) 	if (cpu == RING_BUFFER_ALL_CPUS)
66715693458SSteven Rostedt (Red Hat) 		work = &buffer->irq_work;
66815693458SSteven Rostedt (Red Hat) 	else {
6696721cb60SSteven Rostedt (Red Hat) 		if (!cpumask_test_cpu(cpu, buffer->cpumask))
6706721cb60SSteven Rostedt (Red Hat) 			return -EINVAL;
6716721cb60SSteven Rostedt (Red Hat) 
67215693458SSteven Rostedt (Red Hat) 		cpu_buffer = buffer->buffers[cpu];
67315693458SSteven Rostedt (Red Hat) 		work = &cpu_buffer->irq_work;
67415693458SSteven Rostedt (Red Hat) 	}
67515693458SSteven Rostedt (Red Hat) 
67615693458SSteven Rostedt (Red Hat) 	poll_wait(filp, &work->waiters, poll_table);
6774ce97dbfSJosef Bacik 	work->waiters_pending = true;
6784ce97dbfSJosef Bacik 	/*
6794ce97dbfSJosef Bacik 	 * There's a tight race between setting the waiters_pending and
6804ce97dbfSJosef Bacik 	 * checking if the ring buffer is empty.  Once the waiters_pending bit
6814ce97dbfSJosef Bacik 	 * is set, the next event will wake the task up, but we can get stuck
6824ce97dbfSJosef Bacik 	 * if there's only a single event in.
6834ce97dbfSJosef Bacik 	 *
6844ce97dbfSJosef Bacik 	 * FIXME: Ideally, we need a memory barrier on the writer side as well,
6854ce97dbfSJosef Bacik 	 * but adding a memory barrier to all events will cause too much of a
6864ce97dbfSJosef Bacik 	 * performance hit in the fast path.  We only need a memory barrier when
6874ce97dbfSJosef Bacik 	 * the buffer goes from empty to having content.  But as this race is
6884ce97dbfSJosef Bacik 	 * extremely small, and it's not a problem if another event comes in, we
6894ce97dbfSJosef Bacik 	 * will fix it later.
6904ce97dbfSJosef Bacik 	 */
6914ce97dbfSJosef Bacik 	smp_mb();
69215693458SSteven Rostedt (Red Hat) 
69315693458SSteven Rostedt (Red Hat) 	if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
69415693458SSteven Rostedt (Red Hat) 	    (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
695a9a08845SLinus Torvalds 		return EPOLLIN | EPOLLRDNORM;
69615693458SSteven Rostedt (Red Hat) 	return 0;
69715693458SSteven Rostedt (Red Hat) }
69815693458SSteven Rostedt (Red Hat) 
699f536aafcSSteven Rostedt /* buffer may be either ring_buffer or ring_buffer_per_cpu */
700077c5407SSteven Rostedt #define RB_WARN_ON(b, cond)						\
7013e89c7bbSSteven Rostedt 	({								\
7023e89c7bbSSteven Rostedt 		int _____ret = unlikely(cond);				\
7033e89c7bbSSteven Rostedt 		if (_____ret) {						\
704077c5407SSteven Rostedt 			if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
705077c5407SSteven Rostedt 				struct ring_buffer_per_cpu *__b =	\
706077c5407SSteven Rostedt 					(void *)b;			\
707077c5407SSteven Rostedt 				atomic_inc(&__b->buffer->record_disabled); \
708077c5407SSteven Rostedt 			} else						\
709077c5407SSteven Rostedt 				atomic_inc(&b->record_disabled);	\
710bf41a158SSteven Rostedt 			WARN_ON(1);					\
711bf41a158SSteven Rostedt 		}							\
7123e89c7bbSSteven Rostedt 		_____ret;						\
7133e89c7bbSSteven Rostedt 	})
714f536aafcSSteven Rostedt 
71537886f6aSSteven Rostedt /* Up this if you want to test the TIME_EXTENTS and normalization */
71637886f6aSSteven Rostedt #define DEBUG_SHIFT 0
71737886f6aSSteven Rostedt 
7186d3f1e12SJiri Olsa static inline u64 rb_time_stamp(struct ring_buffer *buffer)
71988eb0125SSteven Rostedt {
72088eb0125SSteven Rostedt 	/* shift to debug/test normalization and TIME_EXTENTS */
72188eb0125SSteven Rostedt 	return buffer->clock() << DEBUG_SHIFT;
72288eb0125SSteven Rostedt }
72388eb0125SSteven Rostedt 
72437886f6aSSteven Rostedt u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
72537886f6aSSteven Rostedt {
72637886f6aSSteven Rostedt 	u64 time;
72737886f6aSSteven Rostedt 
72837886f6aSSteven Rostedt 	preempt_disable_notrace();
7296d3f1e12SJiri Olsa 	time = rb_time_stamp(buffer);
73037886f6aSSteven Rostedt 	preempt_enable_no_resched_notrace();
73137886f6aSSteven Rostedt 
73237886f6aSSteven Rostedt 	return time;
73337886f6aSSteven Rostedt }
73437886f6aSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
73537886f6aSSteven Rostedt 
73637886f6aSSteven Rostedt void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
73737886f6aSSteven Rostedt 				      int cpu, u64 *ts)
73837886f6aSSteven Rostedt {
73937886f6aSSteven Rostedt 	/* Just stupid testing the normalize function and deltas */
74037886f6aSSteven Rostedt 	*ts >>= DEBUG_SHIFT;
74137886f6aSSteven Rostedt }
74237886f6aSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
74337886f6aSSteven Rostedt 
74477ae365eSSteven Rostedt /*
74577ae365eSSteven Rostedt  * Making the ring buffer lockless makes things tricky.
74677ae365eSSteven Rostedt  * Although writes only happen on the CPU that they are on,
74777ae365eSSteven Rostedt  * and they only need to worry about interrupts. Reads can
74877ae365eSSteven Rostedt  * happen on any CPU.
74977ae365eSSteven Rostedt  *
75077ae365eSSteven Rostedt  * The reader page is always off the ring buffer, but when the
75177ae365eSSteven Rostedt  * reader finishes with a page, it needs to swap its page with
75277ae365eSSteven Rostedt  * a new one from the buffer. The reader needs to take from
75377ae365eSSteven Rostedt  * the head (writes go to the tail). But if a writer is in overwrite
75477ae365eSSteven Rostedt  * mode and wraps, it must push the head page forward.
75577ae365eSSteven Rostedt  *
75677ae365eSSteven Rostedt  * Here lies the problem.
75777ae365eSSteven Rostedt  *
75877ae365eSSteven Rostedt  * The reader must be careful to replace only the head page, and
75977ae365eSSteven Rostedt  * not another one. As described at the top of the file in the
76077ae365eSSteven Rostedt  * ASCII art, the reader sets its old page to point to the next
76177ae365eSSteven Rostedt  * page after head. It then sets the page after head to point to
76277ae365eSSteven Rostedt  * the old reader page. But if the writer moves the head page
76377ae365eSSteven Rostedt  * during this operation, the reader could end up with the tail.
76477ae365eSSteven Rostedt  *
76577ae365eSSteven Rostedt  * We use cmpxchg to help prevent this race. We also do something
76677ae365eSSteven Rostedt  * special with the page before head. We set the LSB to 1.
76777ae365eSSteven Rostedt  *
76877ae365eSSteven Rostedt  * When the writer must push the page forward, it will clear the
76977ae365eSSteven Rostedt  * bit that points to the head page, move the head, and then set
77077ae365eSSteven Rostedt  * the bit that points to the new head page.
77177ae365eSSteven Rostedt  *
77277ae365eSSteven Rostedt  * We also don't want an interrupt coming in and moving the head
77377ae365eSSteven Rostedt  * page on another writer. Thus we use the second LSB to catch
77477ae365eSSteven Rostedt  * that too. Thus:
77577ae365eSSteven Rostedt  *
77677ae365eSSteven Rostedt  * head->list->prev->next        bit 1          bit 0
77777ae365eSSteven Rostedt  *                              -------        -------
77877ae365eSSteven Rostedt  * Normal page                     0              0
77977ae365eSSteven Rostedt  * Points to head page             0              1
78077ae365eSSteven Rostedt  * New head page                   1              0
78177ae365eSSteven Rostedt  *
78277ae365eSSteven Rostedt  * Note we can not trust the prev pointer of the head page, because:
78377ae365eSSteven Rostedt  *
78477ae365eSSteven Rostedt  * +----+       +-----+        +-----+
78577ae365eSSteven Rostedt  * |    |------>|  T  |---X--->|  N  |
78677ae365eSSteven Rostedt  * |    |<------|     |        |     |
78777ae365eSSteven Rostedt  * +----+       +-----+        +-----+
78877ae365eSSteven Rostedt  *   ^                           ^ |
78977ae365eSSteven Rostedt  *   |          +-----+          | |
79077ae365eSSteven Rostedt  *   +----------|  R  |----------+ |
79177ae365eSSteven Rostedt  *              |     |<-----------+
79277ae365eSSteven Rostedt  *              +-----+
79377ae365eSSteven Rostedt  *
79477ae365eSSteven Rostedt  * Key:  ---X-->  HEAD flag set in pointer
79577ae365eSSteven Rostedt  *         T      Tail page
79677ae365eSSteven Rostedt  *         R      Reader page
79777ae365eSSteven Rostedt  *         N      Next page
79877ae365eSSteven Rostedt  *
79977ae365eSSteven Rostedt  * (see __rb_reserve_next() to see where this happens)
80077ae365eSSteven Rostedt  *
80177ae365eSSteven Rostedt  *  What the above shows is that the reader just swapped out
80277ae365eSSteven Rostedt  *  the reader page with a page in the buffer, but before it
80377ae365eSSteven Rostedt  *  could make the new header point back to the new page added
80477ae365eSSteven Rostedt  *  it was preempted by a writer. The writer moved forward onto
80577ae365eSSteven Rostedt  *  the new page added by the reader and is about to move forward
80677ae365eSSteven Rostedt  *  again.
80777ae365eSSteven Rostedt  *
80877ae365eSSteven Rostedt  *  You can see, it is legitimate for the previous pointer of
80977ae365eSSteven Rostedt  *  the head (or any page) not to point back to itself. But only
81077ae365eSSteven Rostedt  *  temporarially.
81177ae365eSSteven Rostedt  */
81277ae365eSSteven Rostedt 
81377ae365eSSteven Rostedt #define RB_PAGE_NORMAL		0UL
81477ae365eSSteven Rostedt #define RB_PAGE_HEAD		1UL
81577ae365eSSteven Rostedt #define RB_PAGE_UPDATE		2UL
81677ae365eSSteven Rostedt 
81777ae365eSSteven Rostedt 
81877ae365eSSteven Rostedt #define RB_FLAG_MASK		3UL
81977ae365eSSteven Rostedt 
82077ae365eSSteven Rostedt /* PAGE_MOVED is not part of the mask */
82177ae365eSSteven Rostedt #define RB_PAGE_MOVED		4UL
82277ae365eSSteven Rostedt 
82377ae365eSSteven Rostedt /*
82477ae365eSSteven Rostedt  * rb_list_head - remove any bit
82577ae365eSSteven Rostedt  */
82677ae365eSSteven Rostedt static struct list_head *rb_list_head(struct list_head *list)
82777ae365eSSteven Rostedt {
82877ae365eSSteven Rostedt 	unsigned long val = (unsigned long)list;
82977ae365eSSteven Rostedt 
83077ae365eSSteven Rostedt 	return (struct list_head *)(val & ~RB_FLAG_MASK);
83177ae365eSSteven Rostedt }
83277ae365eSSteven Rostedt 
83377ae365eSSteven Rostedt /*
8346d3f1e12SJiri Olsa  * rb_is_head_page - test if the given page is the head page
83577ae365eSSteven Rostedt  *
83677ae365eSSteven Rostedt  * Because the reader may move the head_page pointer, we can
83777ae365eSSteven Rostedt  * not trust what the head page is (it may be pointing to
83877ae365eSSteven Rostedt  * the reader page). But if the next page is a header page,
83977ae365eSSteven Rostedt  * its flags will be non zero.
84077ae365eSSteven Rostedt  */
84142b16b3fSJesper Juhl static inline int
84277ae365eSSteven Rostedt rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
84377ae365eSSteven Rostedt 		struct buffer_page *page, struct list_head *list)
84477ae365eSSteven Rostedt {
84577ae365eSSteven Rostedt 	unsigned long val;
84677ae365eSSteven Rostedt 
84777ae365eSSteven Rostedt 	val = (unsigned long)list->next;
84877ae365eSSteven Rostedt 
84977ae365eSSteven Rostedt 	if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
85077ae365eSSteven Rostedt 		return RB_PAGE_MOVED;
85177ae365eSSteven Rostedt 
85277ae365eSSteven Rostedt 	return val & RB_FLAG_MASK;
85377ae365eSSteven Rostedt }
85477ae365eSSteven Rostedt 
85577ae365eSSteven Rostedt /*
85677ae365eSSteven Rostedt  * rb_is_reader_page
85777ae365eSSteven Rostedt  *
85877ae365eSSteven Rostedt  * The unique thing about the reader page, is that, if the
85977ae365eSSteven Rostedt  * writer is ever on it, the previous pointer never points
86077ae365eSSteven Rostedt  * back to the reader page.
86177ae365eSSteven Rostedt  */
86206ca3209SYaowei Bai static bool rb_is_reader_page(struct buffer_page *page)
86377ae365eSSteven Rostedt {
86477ae365eSSteven Rostedt 	struct list_head *list = page->list.prev;
86577ae365eSSteven Rostedt 
86677ae365eSSteven Rostedt 	return rb_list_head(list->next) != &page->list;
86777ae365eSSteven Rostedt }
86877ae365eSSteven Rostedt 
86977ae365eSSteven Rostedt /*
87077ae365eSSteven Rostedt  * rb_set_list_to_head - set a list_head to be pointing to head.
87177ae365eSSteven Rostedt  */
87277ae365eSSteven Rostedt static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
87377ae365eSSteven Rostedt 				struct list_head *list)
87477ae365eSSteven Rostedt {
87577ae365eSSteven Rostedt 	unsigned long *ptr;
87677ae365eSSteven Rostedt 
87777ae365eSSteven Rostedt 	ptr = (unsigned long *)&list->next;
87877ae365eSSteven Rostedt 	*ptr |= RB_PAGE_HEAD;
87977ae365eSSteven Rostedt 	*ptr &= ~RB_PAGE_UPDATE;
88077ae365eSSteven Rostedt }
88177ae365eSSteven Rostedt 
88277ae365eSSteven Rostedt /*
88377ae365eSSteven Rostedt  * rb_head_page_activate - sets up head page
88477ae365eSSteven Rostedt  */
88577ae365eSSteven Rostedt static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
88677ae365eSSteven Rostedt {
88777ae365eSSteven Rostedt 	struct buffer_page *head;
88877ae365eSSteven Rostedt 
88977ae365eSSteven Rostedt 	head = cpu_buffer->head_page;
89077ae365eSSteven Rostedt 	if (!head)
89177ae365eSSteven Rostedt 		return;
89277ae365eSSteven Rostedt 
89377ae365eSSteven Rostedt 	/*
89477ae365eSSteven Rostedt 	 * Set the previous list pointer to have the HEAD flag.
89577ae365eSSteven Rostedt 	 */
89677ae365eSSteven Rostedt 	rb_set_list_to_head(cpu_buffer, head->list.prev);
89777ae365eSSteven Rostedt }
89877ae365eSSteven Rostedt 
89977ae365eSSteven Rostedt static void rb_list_head_clear(struct list_head *list)
90077ae365eSSteven Rostedt {
90177ae365eSSteven Rostedt 	unsigned long *ptr = (unsigned long *)&list->next;
90277ae365eSSteven Rostedt 
90377ae365eSSteven Rostedt 	*ptr &= ~RB_FLAG_MASK;
90477ae365eSSteven Rostedt }
90577ae365eSSteven Rostedt 
90677ae365eSSteven Rostedt /*
90777ae365eSSteven Rostedt  * rb_head_page_dactivate - clears head page ptr (for free list)
90877ae365eSSteven Rostedt  */
90977ae365eSSteven Rostedt static void
91077ae365eSSteven Rostedt rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
91177ae365eSSteven Rostedt {
91277ae365eSSteven Rostedt 	struct list_head *hd;
91377ae365eSSteven Rostedt 
91477ae365eSSteven Rostedt 	/* Go through the whole list and clear any pointers found. */
91577ae365eSSteven Rostedt 	rb_list_head_clear(cpu_buffer->pages);
91677ae365eSSteven Rostedt 
91777ae365eSSteven Rostedt 	list_for_each(hd, cpu_buffer->pages)
91877ae365eSSteven Rostedt 		rb_list_head_clear(hd);
91977ae365eSSteven Rostedt }
92077ae365eSSteven Rostedt 
92177ae365eSSteven Rostedt static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
92277ae365eSSteven Rostedt 			    struct buffer_page *head,
92377ae365eSSteven Rostedt 			    struct buffer_page *prev,
92477ae365eSSteven Rostedt 			    int old_flag, int new_flag)
92577ae365eSSteven Rostedt {
92677ae365eSSteven Rostedt 	struct list_head *list;
92777ae365eSSteven Rostedt 	unsigned long val = (unsigned long)&head->list;
92877ae365eSSteven Rostedt 	unsigned long ret;
92977ae365eSSteven Rostedt 
93077ae365eSSteven Rostedt 	list = &prev->list;
93177ae365eSSteven Rostedt 
93277ae365eSSteven Rostedt 	val &= ~RB_FLAG_MASK;
93377ae365eSSteven Rostedt 
93408a40816SSteven Rostedt 	ret = cmpxchg((unsigned long *)&list->next,
93577ae365eSSteven Rostedt 		      val | old_flag, val | new_flag);
93677ae365eSSteven Rostedt 
93777ae365eSSteven Rostedt 	/* check if the reader took the page */
93877ae365eSSteven Rostedt 	if ((ret & ~RB_FLAG_MASK) != val)
93977ae365eSSteven Rostedt 		return RB_PAGE_MOVED;
94077ae365eSSteven Rostedt 
94177ae365eSSteven Rostedt 	return ret & RB_FLAG_MASK;
94277ae365eSSteven Rostedt }
94377ae365eSSteven Rostedt 
94477ae365eSSteven Rostedt static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
94577ae365eSSteven Rostedt 				   struct buffer_page *head,
94677ae365eSSteven Rostedt 				   struct buffer_page *prev,
94777ae365eSSteven Rostedt 				   int old_flag)
94877ae365eSSteven Rostedt {
94977ae365eSSteven Rostedt 	return rb_head_page_set(cpu_buffer, head, prev,
95077ae365eSSteven Rostedt 				old_flag, RB_PAGE_UPDATE);
95177ae365eSSteven Rostedt }
95277ae365eSSteven Rostedt 
95377ae365eSSteven Rostedt static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
95477ae365eSSteven Rostedt 				 struct buffer_page *head,
95577ae365eSSteven Rostedt 				 struct buffer_page *prev,
95677ae365eSSteven Rostedt 				 int old_flag)
95777ae365eSSteven Rostedt {
95877ae365eSSteven Rostedt 	return rb_head_page_set(cpu_buffer, head, prev,
95977ae365eSSteven Rostedt 				old_flag, RB_PAGE_HEAD);
96077ae365eSSteven Rostedt }
96177ae365eSSteven Rostedt 
96277ae365eSSteven Rostedt static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
96377ae365eSSteven Rostedt 				   struct buffer_page *head,
96477ae365eSSteven Rostedt 				   struct buffer_page *prev,
96577ae365eSSteven Rostedt 				   int old_flag)
96677ae365eSSteven Rostedt {
96777ae365eSSteven Rostedt 	return rb_head_page_set(cpu_buffer, head, prev,
96877ae365eSSteven Rostedt 				old_flag, RB_PAGE_NORMAL);
96977ae365eSSteven Rostedt }
97077ae365eSSteven Rostedt 
97177ae365eSSteven Rostedt static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
97277ae365eSSteven Rostedt 			       struct buffer_page **bpage)
97377ae365eSSteven Rostedt {
97477ae365eSSteven Rostedt 	struct list_head *p = rb_list_head((*bpage)->list.next);
97577ae365eSSteven Rostedt 
97677ae365eSSteven Rostedt 	*bpage = list_entry(p, struct buffer_page, list);
97777ae365eSSteven Rostedt }
97877ae365eSSteven Rostedt 
97977ae365eSSteven Rostedt static struct buffer_page *
98077ae365eSSteven Rostedt rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
98177ae365eSSteven Rostedt {
98277ae365eSSteven Rostedt 	struct buffer_page *head;
98377ae365eSSteven Rostedt 	struct buffer_page *page;
98477ae365eSSteven Rostedt 	struct list_head *list;
98577ae365eSSteven Rostedt 	int i;
98677ae365eSSteven Rostedt 
98777ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
98877ae365eSSteven Rostedt 		return NULL;
98977ae365eSSteven Rostedt 
99077ae365eSSteven Rostedt 	/* sanity check */
99177ae365eSSteven Rostedt 	list = cpu_buffer->pages;
99277ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
99377ae365eSSteven Rostedt 		return NULL;
99477ae365eSSteven Rostedt 
99577ae365eSSteven Rostedt 	page = head = cpu_buffer->head_page;
99677ae365eSSteven Rostedt 	/*
99777ae365eSSteven Rostedt 	 * It is possible that the writer moves the header behind
99877ae365eSSteven Rostedt 	 * where we started, and we miss in one loop.
99977ae365eSSteven Rostedt 	 * A second loop should grab the header, but we'll do
100077ae365eSSteven Rostedt 	 * three loops just because I'm paranoid.
100177ae365eSSteven Rostedt 	 */
100277ae365eSSteven Rostedt 	for (i = 0; i < 3; i++) {
100377ae365eSSteven Rostedt 		do {
100477ae365eSSteven Rostedt 			if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
100577ae365eSSteven Rostedt 				cpu_buffer->head_page = page;
100677ae365eSSteven Rostedt 				return page;
100777ae365eSSteven Rostedt 			}
100877ae365eSSteven Rostedt 			rb_inc_page(cpu_buffer, &page);
100977ae365eSSteven Rostedt 		} while (page != head);
101077ae365eSSteven Rostedt 	}
101177ae365eSSteven Rostedt 
101277ae365eSSteven Rostedt 	RB_WARN_ON(cpu_buffer, 1);
101377ae365eSSteven Rostedt 
101477ae365eSSteven Rostedt 	return NULL;
101577ae365eSSteven Rostedt }
101677ae365eSSteven Rostedt 
101777ae365eSSteven Rostedt static int rb_head_page_replace(struct buffer_page *old,
101877ae365eSSteven Rostedt 				struct buffer_page *new)
101977ae365eSSteven Rostedt {
102077ae365eSSteven Rostedt 	unsigned long *ptr = (unsigned long *)&old->list.prev->next;
102177ae365eSSteven Rostedt 	unsigned long val;
102277ae365eSSteven Rostedt 	unsigned long ret;
102377ae365eSSteven Rostedt 
102477ae365eSSteven Rostedt 	val = *ptr & ~RB_FLAG_MASK;
102577ae365eSSteven Rostedt 	val |= RB_PAGE_HEAD;
102677ae365eSSteven Rostedt 
102708a40816SSteven Rostedt 	ret = cmpxchg(ptr, val, (unsigned long)&new->list);
102877ae365eSSteven Rostedt 
102977ae365eSSteven Rostedt 	return ret == val;
103077ae365eSSteven Rostedt }
103177ae365eSSteven Rostedt 
103277ae365eSSteven Rostedt /*
103377ae365eSSteven Rostedt  * rb_tail_page_update - move the tail page forward
103477ae365eSSteven Rostedt  */
103570004986SSteven Rostedt (Red Hat) static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
103677ae365eSSteven Rostedt 			       struct buffer_page *tail_page,
103777ae365eSSteven Rostedt 			       struct buffer_page *next_page)
103877ae365eSSteven Rostedt {
103977ae365eSSteven Rostedt 	unsigned long old_entries;
104077ae365eSSteven Rostedt 	unsigned long old_write;
104177ae365eSSteven Rostedt 
104277ae365eSSteven Rostedt 	/*
104377ae365eSSteven Rostedt 	 * The tail page now needs to be moved forward.
104477ae365eSSteven Rostedt 	 *
104577ae365eSSteven Rostedt 	 * We need to reset the tail page, but without messing
104677ae365eSSteven Rostedt 	 * with possible erasing of data brought in by interrupts
104777ae365eSSteven Rostedt 	 * that have moved the tail page and are currently on it.
104877ae365eSSteven Rostedt 	 *
104977ae365eSSteven Rostedt 	 * We add a counter to the write field to denote this.
105077ae365eSSteven Rostedt 	 */
105177ae365eSSteven Rostedt 	old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
105277ae365eSSteven Rostedt 	old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
105377ae365eSSteven Rostedt 
105477ae365eSSteven Rostedt 	/*
105577ae365eSSteven Rostedt 	 * Just make sure we have seen our old_write and synchronize
105677ae365eSSteven Rostedt 	 * with any interrupts that come in.
105777ae365eSSteven Rostedt 	 */
105877ae365eSSteven Rostedt 	barrier();
105977ae365eSSteven Rostedt 
106077ae365eSSteven Rostedt 	/*
106177ae365eSSteven Rostedt 	 * If the tail page is still the same as what we think
106277ae365eSSteven Rostedt 	 * it is, then it is up to us to update the tail
106377ae365eSSteven Rostedt 	 * pointer.
106477ae365eSSteven Rostedt 	 */
10658573636eSSteven Rostedt (Red Hat) 	if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
106677ae365eSSteven Rostedt 		/* Zero the write counter */
106777ae365eSSteven Rostedt 		unsigned long val = old_write & ~RB_WRITE_MASK;
106877ae365eSSteven Rostedt 		unsigned long eval = old_entries & ~RB_WRITE_MASK;
106977ae365eSSteven Rostedt 
107077ae365eSSteven Rostedt 		/*
107177ae365eSSteven Rostedt 		 * This will only succeed if an interrupt did
107277ae365eSSteven Rostedt 		 * not come in and change it. In which case, we
107377ae365eSSteven Rostedt 		 * do not want to modify it.
1074da706d8bSLai Jiangshan 		 *
1075da706d8bSLai Jiangshan 		 * We add (void) to let the compiler know that we do not care
1076da706d8bSLai Jiangshan 		 * about the return value of these functions. We use the
1077da706d8bSLai Jiangshan 		 * cmpxchg to only update if an interrupt did not already
1078da706d8bSLai Jiangshan 		 * do it for us. If the cmpxchg fails, we don't care.
107977ae365eSSteven Rostedt 		 */
1080da706d8bSLai Jiangshan 		(void)local_cmpxchg(&next_page->write, old_write, val);
1081da706d8bSLai Jiangshan 		(void)local_cmpxchg(&next_page->entries, old_entries, eval);
108277ae365eSSteven Rostedt 
108377ae365eSSteven Rostedt 		/*
108477ae365eSSteven Rostedt 		 * No need to worry about races with clearing out the commit.
108577ae365eSSteven Rostedt 		 * it only can increment when a commit takes place. But that
108677ae365eSSteven Rostedt 		 * only happens in the outer most nested commit.
108777ae365eSSteven Rostedt 		 */
108877ae365eSSteven Rostedt 		local_set(&next_page->page->commit, 0);
108977ae365eSSteven Rostedt 
109070004986SSteven Rostedt (Red Hat) 		/* Again, either we update tail_page or an interrupt does */
109170004986SSteven Rostedt (Red Hat) 		(void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
109277ae365eSSteven Rostedt 	}
109377ae365eSSteven Rostedt }
109477ae365eSSteven Rostedt 
109577ae365eSSteven Rostedt static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
109677ae365eSSteven Rostedt 			  struct buffer_page *bpage)
109777ae365eSSteven Rostedt {
109877ae365eSSteven Rostedt 	unsigned long val = (unsigned long)bpage;
109977ae365eSSteven Rostedt 
110077ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
110177ae365eSSteven Rostedt 		return 1;
110277ae365eSSteven Rostedt 
110377ae365eSSteven Rostedt 	return 0;
110477ae365eSSteven Rostedt }
110577ae365eSSteven Rostedt 
110677ae365eSSteven Rostedt /**
110777ae365eSSteven Rostedt  * rb_check_list - make sure a pointer to a list has the last bits zero
110877ae365eSSteven Rostedt  */
110977ae365eSSteven Rostedt static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
111077ae365eSSteven Rostedt 			 struct list_head *list)
111177ae365eSSteven Rostedt {
111277ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
111377ae365eSSteven Rostedt 		return 1;
111477ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
111577ae365eSSteven Rostedt 		return 1;
111677ae365eSSteven Rostedt 	return 0;
111777ae365eSSteven Rostedt }
111877ae365eSSteven Rostedt 
11197a8e76a3SSteven Rostedt /**
1120d611851bSzhangwei(Jovi)  * rb_check_pages - integrity check of buffer pages
11217a8e76a3SSteven Rostedt  * @cpu_buffer: CPU buffer with pages to test
11227a8e76a3SSteven Rostedt  *
1123c3706f00SWenji Huang  * As a safety measure we check to make sure the data pages have not
11247a8e76a3SSteven Rostedt  * been corrupted.
11257a8e76a3SSteven Rostedt  */
11267a8e76a3SSteven Rostedt static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
11277a8e76a3SSteven Rostedt {
11283adc54faSSteven Rostedt 	struct list_head *head = cpu_buffer->pages;
1129044fa782SSteven Rostedt 	struct buffer_page *bpage, *tmp;
11307a8e76a3SSteven Rostedt 
1131308f7eebSSteven Rostedt 	/* Reset the head page if it exists */
1132308f7eebSSteven Rostedt 	if (cpu_buffer->head_page)
1133308f7eebSSteven Rostedt 		rb_set_head_page(cpu_buffer);
1134308f7eebSSteven Rostedt 
113577ae365eSSteven Rostedt 	rb_head_page_deactivate(cpu_buffer);
113677ae365eSSteven Rostedt 
11373e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
11383e89c7bbSSteven Rostedt 		return -1;
11393e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
11403e89c7bbSSteven Rostedt 		return -1;
11417a8e76a3SSteven Rostedt 
114277ae365eSSteven Rostedt 	if (rb_check_list(cpu_buffer, head))
114377ae365eSSteven Rostedt 		return -1;
114477ae365eSSteven Rostedt 
1145044fa782SSteven Rostedt 	list_for_each_entry_safe(bpage, tmp, head, list) {
11463e89c7bbSSteven Rostedt 		if (RB_WARN_ON(cpu_buffer,
1147044fa782SSteven Rostedt 			       bpage->list.next->prev != &bpage->list))
11483e89c7bbSSteven Rostedt 			return -1;
11493e89c7bbSSteven Rostedt 		if (RB_WARN_ON(cpu_buffer,
1150044fa782SSteven Rostedt 			       bpage->list.prev->next != &bpage->list))
11513e89c7bbSSteven Rostedt 			return -1;
115277ae365eSSteven Rostedt 		if (rb_check_list(cpu_buffer, &bpage->list))
115377ae365eSSteven Rostedt 			return -1;
11547a8e76a3SSteven Rostedt 	}
11557a8e76a3SSteven Rostedt 
115677ae365eSSteven Rostedt 	rb_head_page_activate(cpu_buffer);
115777ae365eSSteven Rostedt 
11587a8e76a3SSteven Rostedt 	return 0;
11597a8e76a3SSteven Rostedt }
11607a8e76a3SSteven Rostedt 
11619b94a8fbSSteven Rostedt (Red Hat) static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
11627a8e76a3SSteven Rostedt {
1163044fa782SSteven Rostedt 	struct buffer_page *bpage, *tmp;
11649b94a8fbSSteven Rostedt (Red Hat) 	long i;
11653adc54faSSteven Rostedt 
11667a8e76a3SSteven Rostedt 	for (i = 0; i < nr_pages; i++) {
11677ea59064SVaibhav Nagarnaik 		struct page *page;
1168d7ec4bfeSVaibhav Nagarnaik 		/*
116984861885SJoel Fernandes 		 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
117084861885SJoel Fernandes 		 * gracefully without invoking oom-killer and the system is not
117184861885SJoel Fernandes 		 * destabilized.
1172d7ec4bfeSVaibhav Nagarnaik 		 */
1173044fa782SSteven Rostedt 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
117484861885SJoel Fernandes 				    GFP_KERNEL | __GFP_RETRY_MAYFAIL,
1175438ced17SVaibhav Nagarnaik 				    cpu_to_node(cpu));
1176044fa782SSteven Rostedt 		if (!bpage)
1177e4c2ce82SSteven Rostedt 			goto free_pages;
117877ae365eSSteven Rostedt 
1179438ced17SVaibhav Nagarnaik 		list_add(&bpage->list, pages);
118077ae365eSSteven Rostedt 
1181438ced17SVaibhav Nagarnaik 		page = alloc_pages_node(cpu_to_node(cpu),
118284861885SJoel Fernandes 					GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
11837ea59064SVaibhav Nagarnaik 		if (!page)
11847a8e76a3SSteven Rostedt 			goto free_pages;
11857ea59064SVaibhav Nagarnaik 		bpage->page = page_address(page);
1186044fa782SSteven Rostedt 		rb_init_page(bpage->page);
11877a8e76a3SSteven Rostedt 	}
11887a8e76a3SSteven Rostedt 
1189438ced17SVaibhav Nagarnaik 	return 0;
1190438ced17SVaibhav Nagarnaik 
1191438ced17SVaibhav Nagarnaik free_pages:
1192438ced17SVaibhav Nagarnaik 	list_for_each_entry_safe(bpage, tmp, pages, list) {
1193438ced17SVaibhav Nagarnaik 		list_del_init(&bpage->list);
1194438ced17SVaibhav Nagarnaik 		free_buffer_page(bpage);
1195438ced17SVaibhav Nagarnaik 	}
1196438ced17SVaibhav Nagarnaik 
1197438ced17SVaibhav Nagarnaik 	return -ENOMEM;
1198438ced17SVaibhav Nagarnaik }
1199438ced17SVaibhav Nagarnaik 
1200438ced17SVaibhav Nagarnaik static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
12019b94a8fbSSteven Rostedt (Red Hat) 			     unsigned long nr_pages)
1202438ced17SVaibhav Nagarnaik {
1203438ced17SVaibhav Nagarnaik 	LIST_HEAD(pages);
1204438ced17SVaibhav Nagarnaik 
1205438ced17SVaibhav Nagarnaik 	WARN_ON(!nr_pages);
1206438ced17SVaibhav Nagarnaik 
1207438ced17SVaibhav Nagarnaik 	if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1208438ced17SVaibhav Nagarnaik 		return -ENOMEM;
1209438ced17SVaibhav Nagarnaik 
12103adc54faSSteven Rostedt 	/*
12113adc54faSSteven Rostedt 	 * The ring buffer page list is a circular list that does not
12123adc54faSSteven Rostedt 	 * start and end with a list head. All page list items point to
12133adc54faSSteven Rostedt 	 * other pages.
12143adc54faSSteven Rostedt 	 */
12153adc54faSSteven Rostedt 	cpu_buffer->pages = pages.next;
12163adc54faSSteven Rostedt 	list_del(&pages);
12177a8e76a3SSteven Rostedt 
1218438ced17SVaibhav Nagarnaik 	cpu_buffer->nr_pages = nr_pages;
1219438ced17SVaibhav Nagarnaik 
12207a8e76a3SSteven Rostedt 	rb_check_pages(cpu_buffer);
12217a8e76a3SSteven Rostedt 
12227a8e76a3SSteven Rostedt 	return 0;
12237a8e76a3SSteven Rostedt }
12247a8e76a3SSteven Rostedt 
12257a8e76a3SSteven Rostedt static struct ring_buffer_per_cpu *
12269b94a8fbSSteven Rostedt (Red Hat) rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu)
12277a8e76a3SSteven Rostedt {
12287a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
1229044fa782SSteven Rostedt 	struct buffer_page *bpage;
12307ea59064SVaibhav Nagarnaik 	struct page *page;
12317a8e76a3SSteven Rostedt 	int ret;
12327a8e76a3SSteven Rostedt 
12337a8e76a3SSteven Rostedt 	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
12347a8e76a3SSteven Rostedt 				  GFP_KERNEL, cpu_to_node(cpu));
12357a8e76a3SSteven Rostedt 	if (!cpu_buffer)
12367a8e76a3SSteven Rostedt 		return NULL;
12377a8e76a3SSteven Rostedt 
12387a8e76a3SSteven Rostedt 	cpu_buffer->cpu = cpu;
12397a8e76a3SSteven Rostedt 	cpu_buffer->buffer = buffer;
12405389f6faSThomas Gleixner 	raw_spin_lock_init(&cpu_buffer->reader_lock);
12411f8a6a10SPeter Zijlstra 	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1242edc35bd7SThomas Gleixner 	cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
124383f40318SVaibhav Nagarnaik 	INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
124405fdd70dSVaibhav Nagarnaik 	init_completion(&cpu_buffer->update_done);
124515693458SSteven Rostedt (Red Hat) 	init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1246f1dc6725SSteven Rostedt (Red Hat) 	init_waitqueue_head(&cpu_buffer->irq_work.waiters);
12471e0d6714SSteven Rostedt (Red Hat) 	init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
12487a8e76a3SSteven Rostedt 
1249044fa782SSteven Rostedt 	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1250e4c2ce82SSteven Rostedt 			    GFP_KERNEL, cpu_to_node(cpu));
1251044fa782SSteven Rostedt 	if (!bpage)
1252e4c2ce82SSteven Rostedt 		goto fail_free_buffer;
1253e4c2ce82SSteven Rostedt 
125477ae365eSSteven Rostedt 	rb_check_bpage(cpu_buffer, bpage);
125577ae365eSSteven Rostedt 
1256044fa782SSteven Rostedt 	cpu_buffer->reader_page = bpage;
12577ea59064SVaibhav Nagarnaik 	page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
12587ea59064SVaibhav Nagarnaik 	if (!page)
1259e4c2ce82SSteven Rostedt 		goto fail_free_reader;
12607ea59064SVaibhav Nagarnaik 	bpage->page = page_address(page);
1261044fa782SSteven Rostedt 	rb_init_page(bpage->page);
1262e4c2ce82SSteven Rostedt 
1263d769041fSSteven Rostedt 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
126444b99462SVaibhav Nagarnaik 	INIT_LIST_HEAD(&cpu_buffer->new_pages);
1265d769041fSSteven Rostedt 
1266438ced17SVaibhav Nagarnaik 	ret = rb_allocate_pages(cpu_buffer, nr_pages);
12677a8e76a3SSteven Rostedt 	if (ret < 0)
1268d769041fSSteven Rostedt 		goto fail_free_reader;
12697a8e76a3SSteven Rostedt 
12707a8e76a3SSteven Rostedt 	cpu_buffer->head_page
12713adc54faSSteven Rostedt 		= list_entry(cpu_buffer->pages, struct buffer_page, list);
1272bf41a158SSteven Rostedt 	cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
12737a8e76a3SSteven Rostedt 
127477ae365eSSteven Rostedt 	rb_head_page_activate(cpu_buffer);
127577ae365eSSteven Rostedt 
12767a8e76a3SSteven Rostedt 	return cpu_buffer;
12777a8e76a3SSteven Rostedt 
1278d769041fSSteven Rostedt  fail_free_reader:
1279d769041fSSteven Rostedt 	free_buffer_page(cpu_buffer->reader_page);
1280d769041fSSteven Rostedt 
12817a8e76a3SSteven Rostedt  fail_free_buffer:
12827a8e76a3SSteven Rostedt 	kfree(cpu_buffer);
12837a8e76a3SSteven Rostedt 	return NULL;
12847a8e76a3SSteven Rostedt }
12857a8e76a3SSteven Rostedt 
12867a8e76a3SSteven Rostedt static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
12877a8e76a3SSteven Rostedt {
12883adc54faSSteven Rostedt 	struct list_head *head = cpu_buffer->pages;
1289044fa782SSteven Rostedt 	struct buffer_page *bpage, *tmp;
12907a8e76a3SSteven Rostedt 
1291d769041fSSteven Rostedt 	free_buffer_page(cpu_buffer->reader_page);
1292d769041fSSteven Rostedt 
129377ae365eSSteven Rostedt 	rb_head_page_deactivate(cpu_buffer);
129477ae365eSSteven Rostedt 
12953adc54faSSteven Rostedt 	if (head) {
1296044fa782SSteven Rostedt 		list_for_each_entry_safe(bpage, tmp, head, list) {
1297044fa782SSteven Rostedt 			list_del_init(&bpage->list);
1298044fa782SSteven Rostedt 			free_buffer_page(bpage);
12997a8e76a3SSteven Rostedt 		}
13003adc54faSSteven Rostedt 		bpage = list_entry(head, struct buffer_page, list);
13013adc54faSSteven Rostedt 		free_buffer_page(bpage);
13023adc54faSSteven Rostedt 	}
13033adc54faSSteven Rostedt 
13047a8e76a3SSteven Rostedt 	kfree(cpu_buffer);
13057a8e76a3SSteven Rostedt }
13067a8e76a3SSteven Rostedt 
13077a8e76a3SSteven Rostedt /**
1308d611851bSzhangwei(Jovi)  * __ring_buffer_alloc - allocate a new ring_buffer
130968814b58SRobert Richter  * @size: the size in bytes per cpu that is needed.
13107a8e76a3SSteven Rostedt  * @flags: attributes to set for the ring buffer.
13117a8e76a3SSteven Rostedt  *
13127a8e76a3SSteven Rostedt  * Currently the only flag that is available is the RB_FL_OVERWRITE
13137a8e76a3SSteven Rostedt  * flag. This flag means that the buffer will overwrite old data
13147a8e76a3SSteven Rostedt  * when the buffer wraps. If this flag is not set, the buffer will
13157a8e76a3SSteven Rostedt  * drop data when the tail hits the head.
13167a8e76a3SSteven Rostedt  */
13171f8a6a10SPeter Zijlstra struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
13181f8a6a10SPeter Zijlstra 					struct lock_class_key *key)
13197a8e76a3SSteven Rostedt {
13207a8e76a3SSteven Rostedt 	struct ring_buffer *buffer;
13219b94a8fbSSteven Rostedt (Red Hat) 	long nr_pages;
13227a8e76a3SSteven Rostedt 	int bsize;
13239b94a8fbSSteven Rostedt (Red Hat) 	int cpu;
1324b32614c0SSebastian Andrzej Siewior 	int ret;
13257a8e76a3SSteven Rostedt 
13267a8e76a3SSteven Rostedt 	/* keep it in its own cache line */
13277a8e76a3SSteven Rostedt 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
13287a8e76a3SSteven Rostedt 			 GFP_KERNEL);
13297a8e76a3SSteven Rostedt 	if (!buffer)
13307a8e76a3SSteven Rostedt 		return NULL;
13317a8e76a3SSteven Rostedt 
1332b18cc3deSSebastian Andrzej Siewior 	if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
13339e01c1b7SRusty Russell 		goto fail_free_buffer;
13349e01c1b7SRusty Russell 
1335438ced17SVaibhav Nagarnaik 	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
13367a8e76a3SSteven Rostedt 	buffer->flags = flags;
133737886f6aSSteven Rostedt 	buffer->clock = trace_clock_local;
13381f8a6a10SPeter Zijlstra 	buffer->reader_lock_key = key;
13397a8e76a3SSteven Rostedt 
134015693458SSteven Rostedt (Red Hat) 	init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1341f1dc6725SSteven Rostedt (Red Hat) 	init_waitqueue_head(&buffer->irq_work.waiters);
134215693458SSteven Rostedt (Red Hat) 
13437a8e76a3SSteven Rostedt 	/* need at least two pages */
1344438ced17SVaibhav Nagarnaik 	if (nr_pages < 2)
1345438ced17SVaibhav Nagarnaik 		nr_pages = 2;
13467a8e76a3SSteven Rostedt 
13477a8e76a3SSteven Rostedt 	buffer->cpus = nr_cpu_ids;
13487a8e76a3SSteven Rostedt 
13497a8e76a3SSteven Rostedt 	bsize = sizeof(void *) * nr_cpu_ids;
13507a8e76a3SSteven Rostedt 	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
13517a8e76a3SSteven Rostedt 				  GFP_KERNEL);
13527a8e76a3SSteven Rostedt 	if (!buffer->buffers)
13539e01c1b7SRusty Russell 		goto fail_free_cpumask;
13547a8e76a3SSteven Rostedt 
1355b32614c0SSebastian Andrzej Siewior 	cpu = raw_smp_processor_id();
1356b32614c0SSebastian Andrzej Siewior 	cpumask_set_cpu(cpu, buffer->cpumask);
1357b32614c0SSebastian Andrzej Siewior 	buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
13587a8e76a3SSteven Rostedt 	if (!buffer->buffers[cpu])
13597a8e76a3SSteven Rostedt 		goto fail_free_buffers;
13607a8e76a3SSteven Rostedt 
1361b32614c0SSebastian Andrzej Siewior 	ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1362b32614c0SSebastian Andrzej Siewior 	if (ret < 0)
1363b32614c0SSebastian Andrzej Siewior 		goto fail_free_buffers;
1364554f786eSSteven Rostedt 
13657a8e76a3SSteven Rostedt 	mutex_init(&buffer->mutex);
13667a8e76a3SSteven Rostedt 
13677a8e76a3SSteven Rostedt 	return buffer;
13687a8e76a3SSteven Rostedt 
13697a8e76a3SSteven Rostedt  fail_free_buffers:
13707a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
13717a8e76a3SSteven Rostedt 		if (buffer->buffers[cpu])
13727a8e76a3SSteven Rostedt 			rb_free_cpu_buffer(buffer->buffers[cpu]);
13737a8e76a3SSteven Rostedt 	}
13747a8e76a3SSteven Rostedt 	kfree(buffer->buffers);
13757a8e76a3SSteven Rostedt 
13769e01c1b7SRusty Russell  fail_free_cpumask:
13779e01c1b7SRusty Russell 	free_cpumask_var(buffer->cpumask);
13789e01c1b7SRusty Russell 
13797a8e76a3SSteven Rostedt  fail_free_buffer:
13807a8e76a3SSteven Rostedt 	kfree(buffer);
13817a8e76a3SSteven Rostedt 	return NULL;
13827a8e76a3SSteven Rostedt }
13831f8a6a10SPeter Zijlstra EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
13847a8e76a3SSteven Rostedt 
13857a8e76a3SSteven Rostedt /**
13867a8e76a3SSteven Rostedt  * ring_buffer_free - free a ring buffer.
13877a8e76a3SSteven Rostedt  * @buffer: the buffer to free.
13887a8e76a3SSteven Rostedt  */
13897a8e76a3SSteven Rostedt void
13907a8e76a3SSteven Rostedt ring_buffer_free(struct ring_buffer *buffer)
13917a8e76a3SSteven Rostedt {
13927a8e76a3SSteven Rostedt 	int cpu;
13937a8e76a3SSteven Rostedt 
1394b32614c0SSebastian Andrzej Siewior 	cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1395554f786eSSteven Rostedt 
13967a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu)
13977a8e76a3SSteven Rostedt 		rb_free_cpu_buffer(buffer->buffers[cpu]);
13987a8e76a3SSteven Rostedt 
1399bd3f0221SEric Dumazet 	kfree(buffer->buffers);
14009e01c1b7SRusty Russell 	free_cpumask_var(buffer->cpumask);
14019e01c1b7SRusty Russell 
14027a8e76a3SSteven Rostedt 	kfree(buffer);
14037a8e76a3SSteven Rostedt }
1404c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_free);
14057a8e76a3SSteven Rostedt 
140637886f6aSSteven Rostedt void ring_buffer_set_clock(struct ring_buffer *buffer,
140737886f6aSSteven Rostedt 			   u64 (*clock)(void))
140837886f6aSSteven Rostedt {
140937886f6aSSteven Rostedt 	buffer->clock = clock;
141037886f6aSSteven Rostedt }
141137886f6aSSteven Rostedt 
141200b41452STom Zanussi void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
141300b41452STom Zanussi {
141400b41452STom Zanussi 	buffer->time_stamp_abs = abs;
141500b41452STom Zanussi }
141600b41452STom Zanussi 
141700b41452STom Zanussi bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
141800b41452STom Zanussi {
141900b41452STom Zanussi 	return buffer->time_stamp_abs;
142000b41452STom Zanussi }
142100b41452STom Zanussi 
14227a8e76a3SSteven Rostedt static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
14237a8e76a3SSteven Rostedt 
142483f40318SVaibhav Nagarnaik static inline unsigned long rb_page_entries(struct buffer_page *bpage)
14257a8e76a3SSteven Rostedt {
142683f40318SVaibhav Nagarnaik 	return local_read(&bpage->entries) & RB_WRITE_MASK;
142783f40318SVaibhav Nagarnaik }
142883f40318SVaibhav Nagarnaik 
142983f40318SVaibhav Nagarnaik static inline unsigned long rb_page_write(struct buffer_page *bpage)
143083f40318SVaibhav Nagarnaik {
143183f40318SVaibhav Nagarnaik 	return local_read(&bpage->write) & RB_WRITE_MASK;
143283f40318SVaibhav Nagarnaik }
143383f40318SVaibhav Nagarnaik 
14345040b4b7SVaibhav Nagarnaik static int
14359b94a8fbSSteven Rostedt (Red Hat) rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
143683f40318SVaibhav Nagarnaik {
143783f40318SVaibhav Nagarnaik 	struct list_head *tail_page, *to_remove, *next_page;
143883f40318SVaibhav Nagarnaik 	struct buffer_page *to_remove_page, *tmp_iter_page;
143983f40318SVaibhav Nagarnaik 	struct buffer_page *last_page, *first_page;
14409b94a8fbSSteven Rostedt (Red Hat) 	unsigned long nr_removed;
144183f40318SVaibhav Nagarnaik 	unsigned long head_bit;
144283f40318SVaibhav Nagarnaik 	int page_entries;
144383f40318SVaibhav Nagarnaik 
144483f40318SVaibhav Nagarnaik 	head_bit = 0;
14457a8e76a3SSteven Rostedt 
14465389f6faSThomas Gleixner 	raw_spin_lock_irq(&cpu_buffer->reader_lock);
144783f40318SVaibhav Nagarnaik 	atomic_inc(&cpu_buffer->record_disabled);
144883f40318SVaibhav Nagarnaik 	/*
144983f40318SVaibhav Nagarnaik 	 * We don't race with the readers since we have acquired the reader
145083f40318SVaibhav Nagarnaik 	 * lock. We also don't race with writers after disabling recording.
145183f40318SVaibhav Nagarnaik 	 * This makes it easy to figure out the first and the last page to be
145283f40318SVaibhav Nagarnaik 	 * removed from the list. We unlink all the pages in between including
145383f40318SVaibhav Nagarnaik 	 * the first and last pages. This is done in a busy loop so that we
145483f40318SVaibhav Nagarnaik 	 * lose the least number of traces.
145583f40318SVaibhav Nagarnaik 	 * The pages are freed after we restart recording and unlock readers.
145683f40318SVaibhav Nagarnaik 	 */
145783f40318SVaibhav Nagarnaik 	tail_page = &cpu_buffer->tail_page->list;
145877ae365eSSteven Rostedt 
145983f40318SVaibhav Nagarnaik 	/*
146083f40318SVaibhav Nagarnaik 	 * tail page might be on reader page, we remove the next page
146183f40318SVaibhav Nagarnaik 	 * from the ring buffer
146283f40318SVaibhav Nagarnaik 	 */
146383f40318SVaibhav Nagarnaik 	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
146483f40318SVaibhav Nagarnaik 		tail_page = rb_list_head(tail_page->next);
146583f40318SVaibhav Nagarnaik 	to_remove = tail_page;
146683f40318SVaibhav Nagarnaik 
146783f40318SVaibhav Nagarnaik 	/* start of pages to remove */
146883f40318SVaibhav Nagarnaik 	first_page = list_entry(rb_list_head(to_remove->next),
146983f40318SVaibhav Nagarnaik 				struct buffer_page, list);
147083f40318SVaibhav Nagarnaik 
147183f40318SVaibhav Nagarnaik 	for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
147283f40318SVaibhav Nagarnaik 		to_remove = rb_list_head(to_remove)->next;
147383f40318SVaibhav Nagarnaik 		head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
14747a8e76a3SSteven Rostedt 	}
14757a8e76a3SSteven Rostedt 
147683f40318SVaibhav Nagarnaik 	next_page = rb_list_head(to_remove)->next;
14777a8e76a3SSteven Rostedt 
147883f40318SVaibhav Nagarnaik 	/*
147983f40318SVaibhav Nagarnaik 	 * Now we remove all pages between tail_page and next_page.
148083f40318SVaibhav Nagarnaik 	 * Make sure that we have head_bit value preserved for the
148183f40318SVaibhav Nagarnaik 	 * next page
148283f40318SVaibhav Nagarnaik 	 */
148383f40318SVaibhav Nagarnaik 	tail_page->next = (struct list_head *)((unsigned long)next_page |
148483f40318SVaibhav Nagarnaik 						head_bit);
148583f40318SVaibhav Nagarnaik 	next_page = rb_list_head(next_page);
148683f40318SVaibhav Nagarnaik 	next_page->prev = tail_page;
148783f40318SVaibhav Nagarnaik 
148883f40318SVaibhav Nagarnaik 	/* make sure pages points to a valid page in the ring buffer */
148983f40318SVaibhav Nagarnaik 	cpu_buffer->pages = next_page;
149083f40318SVaibhav Nagarnaik 
149183f40318SVaibhav Nagarnaik 	/* update head page */
149283f40318SVaibhav Nagarnaik 	if (head_bit)
149383f40318SVaibhav Nagarnaik 		cpu_buffer->head_page = list_entry(next_page,
149483f40318SVaibhav Nagarnaik 						struct buffer_page, list);
149583f40318SVaibhav Nagarnaik 
149683f40318SVaibhav Nagarnaik 	/*
149783f40318SVaibhav Nagarnaik 	 * change read pointer to make sure any read iterators reset
149883f40318SVaibhav Nagarnaik 	 * themselves
149983f40318SVaibhav Nagarnaik 	 */
150083f40318SVaibhav Nagarnaik 	cpu_buffer->read = 0;
150183f40318SVaibhav Nagarnaik 
150283f40318SVaibhav Nagarnaik 	/* pages are removed, resume tracing and then free the pages */
150383f40318SVaibhav Nagarnaik 	atomic_dec(&cpu_buffer->record_disabled);
15045389f6faSThomas Gleixner 	raw_spin_unlock_irq(&cpu_buffer->reader_lock);
150583f40318SVaibhav Nagarnaik 
150683f40318SVaibhav Nagarnaik 	RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
150783f40318SVaibhav Nagarnaik 
150883f40318SVaibhav Nagarnaik 	/* last buffer page to remove */
150983f40318SVaibhav Nagarnaik 	last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
151083f40318SVaibhav Nagarnaik 				list);
151183f40318SVaibhav Nagarnaik 	tmp_iter_page = first_page;
151283f40318SVaibhav Nagarnaik 
151383f40318SVaibhav Nagarnaik 	do {
151483f40318SVaibhav Nagarnaik 		to_remove_page = tmp_iter_page;
151583f40318SVaibhav Nagarnaik 		rb_inc_page(cpu_buffer, &tmp_iter_page);
151683f40318SVaibhav Nagarnaik 
151783f40318SVaibhav Nagarnaik 		/* update the counters */
151883f40318SVaibhav Nagarnaik 		page_entries = rb_page_entries(to_remove_page);
151983f40318SVaibhav Nagarnaik 		if (page_entries) {
152083f40318SVaibhav Nagarnaik 			/*
152183f40318SVaibhav Nagarnaik 			 * If something was added to this page, it was full
152283f40318SVaibhav Nagarnaik 			 * since it is not the tail page. So we deduct the
152383f40318SVaibhav Nagarnaik 			 * bytes consumed in ring buffer from here.
152448fdc72fSVaibhav Nagarnaik 			 * Increment overrun to account for the lost events.
152583f40318SVaibhav Nagarnaik 			 */
152648fdc72fSVaibhav Nagarnaik 			local_add(page_entries, &cpu_buffer->overrun);
152783f40318SVaibhav Nagarnaik 			local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
152883f40318SVaibhav Nagarnaik 		}
152983f40318SVaibhav Nagarnaik 
153083f40318SVaibhav Nagarnaik 		/*
153183f40318SVaibhav Nagarnaik 		 * We have already removed references to this list item, just
153283f40318SVaibhav Nagarnaik 		 * free up the buffer_page and its page
153383f40318SVaibhav Nagarnaik 		 */
153483f40318SVaibhav Nagarnaik 		free_buffer_page(to_remove_page);
153583f40318SVaibhav Nagarnaik 		nr_removed--;
153683f40318SVaibhav Nagarnaik 
153783f40318SVaibhav Nagarnaik 	} while (to_remove_page != last_page);
153883f40318SVaibhav Nagarnaik 
153983f40318SVaibhav Nagarnaik 	RB_WARN_ON(cpu_buffer, nr_removed);
15405040b4b7SVaibhav Nagarnaik 
15415040b4b7SVaibhav Nagarnaik 	return nr_removed == 0;
15427a8e76a3SSteven Rostedt }
15437a8e76a3SSteven Rostedt 
15445040b4b7SVaibhav Nagarnaik static int
15455040b4b7SVaibhav Nagarnaik rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
15467a8e76a3SSteven Rostedt {
15475040b4b7SVaibhav Nagarnaik 	struct list_head *pages = &cpu_buffer->new_pages;
15485040b4b7SVaibhav Nagarnaik 	int retries, success;
15497a8e76a3SSteven Rostedt 
15505389f6faSThomas Gleixner 	raw_spin_lock_irq(&cpu_buffer->reader_lock);
15515040b4b7SVaibhav Nagarnaik 	/*
15525040b4b7SVaibhav Nagarnaik 	 * We are holding the reader lock, so the reader page won't be swapped
15535040b4b7SVaibhav Nagarnaik 	 * in the ring buffer. Now we are racing with the writer trying to
15545040b4b7SVaibhav Nagarnaik 	 * move head page and the tail page.
15555040b4b7SVaibhav Nagarnaik 	 * We are going to adapt the reader page update process where:
15565040b4b7SVaibhav Nagarnaik 	 * 1. We first splice the start and end of list of new pages between
15575040b4b7SVaibhav Nagarnaik 	 *    the head page and its previous page.
15585040b4b7SVaibhav Nagarnaik 	 * 2. We cmpxchg the prev_page->next to point from head page to the
15595040b4b7SVaibhav Nagarnaik 	 *    start of new pages list.
15605040b4b7SVaibhav Nagarnaik 	 * 3. Finally, we update the head->prev to the end of new list.
15615040b4b7SVaibhav Nagarnaik 	 *
15625040b4b7SVaibhav Nagarnaik 	 * We will try this process 10 times, to make sure that we don't keep
15635040b4b7SVaibhav Nagarnaik 	 * spinning.
15645040b4b7SVaibhav Nagarnaik 	 */
15655040b4b7SVaibhav Nagarnaik 	retries = 10;
15665040b4b7SVaibhav Nagarnaik 	success = 0;
15675040b4b7SVaibhav Nagarnaik 	while (retries--) {
15685040b4b7SVaibhav Nagarnaik 		struct list_head *head_page, *prev_page, *r;
15695040b4b7SVaibhav Nagarnaik 		struct list_head *last_page, *first_page;
15705040b4b7SVaibhav Nagarnaik 		struct list_head *head_page_with_bit;
157177ae365eSSteven Rostedt 
15725040b4b7SVaibhav Nagarnaik 		head_page = &rb_set_head_page(cpu_buffer)->list;
157354f7be5bSSteven Rostedt 		if (!head_page)
157454f7be5bSSteven Rostedt 			break;
15755040b4b7SVaibhav Nagarnaik 		prev_page = head_page->prev;
15765040b4b7SVaibhav Nagarnaik 
15775040b4b7SVaibhav Nagarnaik 		first_page = pages->next;
15785040b4b7SVaibhav Nagarnaik 		last_page  = pages->prev;
15795040b4b7SVaibhav Nagarnaik 
15805040b4b7SVaibhav Nagarnaik 		head_page_with_bit = (struct list_head *)
15815040b4b7SVaibhav Nagarnaik 				     ((unsigned long)head_page | RB_PAGE_HEAD);
15825040b4b7SVaibhav Nagarnaik 
15835040b4b7SVaibhav Nagarnaik 		last_page->next = head_page_with_bit;
15845040b4b7SVaibhav Nagarnaik 		first_page->prev = prev_page;
15855040b4b7SVaibhav Nagarnaik 
15865040b4b7SVaibhav Nagarnaik 		r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
15875040b4b7SVaibhav Nagarnaik 
15885040b4b7SVaibhav Nagarnaik 		if (r == head_page_with_bit) {
15895040b4b7SVaibhav Nagarnaik 			/*
15905040b4b7SVaibhav Nagarnaik 			 * yay, we replaced the page pointer to our new list,
15915040b4b7SVaibhav Nagarnaik 			 * now, we just have to update to head page's prev
15925040b4b7SVaibhav Nagarnaik 			 * pointer to point to end of list
15935040b4b7SVaibhav Nagarnaik 			 */
15945040b4b7SVaibhav Nagarnaik 			head_page->prev = last_page;
15955040b4b7SVaibhav Nagarnaik 			success = 1;
15965040b4b7SVaibhav Nagarnaik 			break;
15977a8e76a3SSteven Rostedt 		}
15985040b4b7SVaibhav Nagarnaik 	}
15997a8e76a3SSteven Rostedt 
16005040b4b7SVaibhav Nagarnaik 	if (success)
16015040b4b7SVaibhav Nagarnaik 		INIT_LIST_HEAD(pages);
16025040b4b7SVaibhav Nagarnaik 	/*
16035040b4b7SVaibhav Nagarnaik 	 * If we weren't successful in adding in new pages, warn and stop
16045040b4b7SVaibhav Nagarnaik 	 * tracing
16055040b4b7SVaibhav Nagarnaik 	 */
16065040b4b7SVaibhav Nagarnaik 	RB_WARN_ON(cpu_buffer, !success);
16075389f6faSThomas Gleixner 	raw_spin_unlock_irq(&cpu_buffer->reader_lock);
16085040b4b7SVaibhav Nagarnaik 
16095040b4b7SVaibhav Nagarnaik 	/* free pages if they weren't inserted */
16105040b4b7SVaibhav Nagarnaik 	if (!success) {
16115040b4b7SVaibhav Nagarnaik 		struct buffer_page *bpage, *tmp;
16125040b4b7SVaibhav Nagarnaik 		list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
16135040b4b7SVaibhav Nagarnaik 					 list) {
16145040b4b7SVaibhav Nagarnaik 			list_del_init(&bpage->list);
16155040b4b7SVaibhav Nagarnaik 			free_buffer_page(bpage);
16165040b4b7SVaibhav Nagarnaik 		}
16175040b4b7SVaibhav Nagarnaik 	}
16185040b4b7SVaibhav Nagarnaik 	return success;
16197a8e76a3SSteven Rostedt }
16207a8e76a3SSteven Rostedt 
162183f40318SVaibhav Nagarnaik static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1622438ced17SVaibhav Nagarnaik {
16235040b4b7SVaibhav Nagarnaik 	int success;
162483f40318SVaibhav Nagarnaik 
16255040b4b7SVaibhav Nagarnaik 	if (cpu_buffer->nr_pages_to_update > 0)
16265040b4b7SVaibhav Nagarnaik 		success = rb_insert_pages(cpu_buffer);
16275040b4b7SVaibhav Nagarnaik 	else
16285040b4b7SVaibhav Nagarnaik 		success = rb_remove_pages(cpu_buffer,
16295040b4b7SVaibhav Nagarnaik 					-cpu_buffer->nr_pages_to_update);
16305040b4b7SVaibhav Nagarnaik 
16315040b4b7SVaibhav Nagarnaik 	if (success)
1632438ced17SVaibhav Nagarnaik 		cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
163383f40318SVaibhav Nagarnaik }
163483f40318SVaibhav Nagarnaik 
163583f40318SVaibhav Nagarnaik static void update_pages_handler(struct work_struct *work)
163683f40318SVaibhav Nagarnaik {
163783f40318SVaibhav Nagarnaik 	struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
163883f40318SVaibhav Nagarnaik 			struct ring_buffer_per_cpu, update_pages_work);
163983f40318SVaibhav Nagarnaik 	rb_update_pages(cpu_buffer);
164005fdd70dSVaibhav Nagarnaik 	complete(&cpu_buffer->update_done);
1641438ced17SVaibhav Nagarnaik }
1642438ced17SVaibhav Nagarnaik 
16437a8e76a3SSteven Rostedt /**
16447a8e76a3SSteven Rostedt  * ring_buffer_resize - resize the ring buffer
16457a8e76a3SSteven Rostedt  * @buffer: the buffer to resize.
16467a8e76a3SSteven Rostedt  * @size: the new size.
1647d611851bSzhangwei(Jovi)  * @cpu_id: the cpu buffer to resize
16487a8e76a3SSteven Rostedt  *
16497a8e76a3SSteven Rostedt  * Minimum size is 2 * BUF_PAGE_SIZE.
16507a8e76a3SSteven Rostedt  *
165183f40318SVaibhav Nagarnaik  * Returns 0 on success and < 0 on failure.
16527a8e76a3SSteven Rostedt  */
1653438ced17SVaibhav Nagarnaik int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1654438ced17SVaibhav Nagarnaik 			int cpu_id)
16557a8e76a3SSteven Rostedt {
16567a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
16579b94a8fbSSteven Rostedt (Red Hat) 	unsigned long nr_pages;
165883f40318SVaibhav Nagarnaik 	int cpu, err = 0;
16597a8e76a3SSteven Rostedt 
1660ee51a1deSIngo Molnar 	/*
1661ee51a1deSIngo Molnar 	 * Always succeed at resizing a non-existent buffer:
1662ee51a1deSIngo Molnar 	 */
1663ee51a1deSIngo Molnar 	if (!buffer)
1664ee51a1deSIngo Molnar 		return size;
1665ee51a1deSIngo Molnar 
16666a31e1f1SSteven Rostedt 	/* Make sure the requested buffer exists */
16676a31e1f1SSteven Rostedt 	if (cpu_id != RING_BUFFER_ALL_CPUS &&
16686a31e1f1SSteven Rostedt 	    !cpumask_test_cpu(cpu_id, buffer->cpumask))
16696a31e1f1SSteven Rostedt 		return size;
16706a31e1f1SSteven Rostedt 
167159643d15SSteven Rostedt (Red Hat) 	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
16727a8e76a3SSteven Rostedt 
16737a8e76a3SSteven Rostedt 	/* we need a minimum of two pages */
167459643d15SSteven Rostedt (Red Hat) 	if (nr_pages < 2)
167559643d15SSteven Rostedt (Red Hat) 		nr_pages = 2;
16767a8e76a3SSteven Rostedt 
167759643d15SSteven Rostedt (Red Hat) 	size = nr_pages * BUF_PAGE_SIZE;
16787a8e76a3SSteven Rostedt 
167983f40318SVaibhav Nagarnaik 	/*
168083f40318SVaibhav Nagarnaik 	 * Don't succeed if resizing is disabled, as a reader might be
168183f40318SVaibhav Nagarnaik 	 * manipulating the ring buffer and is expecting a sane state while
168283f40318SVaibhav Nagarnaik 	 * this is true.
168383f40318SVaibhav Nagarnaik 	 */
168483f40318SVaibhav Nagarnaik 	if (atomic_read(&buffer->resize_disabled))
168583f40318SVaibhav Nagarnaik 		return -EBUSY;
168683f40318SVaibhav Nagarnaik 
168783f40318SVaibhav Nagarnaik 	/* prevent another thread from changing buffer sizes */
168883f40318SVaibhav Nagarnaik 	mutex_lock(&buffer->mutex);
168983f40318SVaibhav Nagarnaik 
1690438ced17SVaibhav Nagarnaik 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
1691438ced17SVaibhav Nagarnaik 		/* calculate the pages to update */
16927a8e76a3SSteven Rostedt 		for_each_buffer_cpu(buffer, cpu) {
16937a8e76a3SSteven Rostedt 			cpu_buffer = buffer->buffers[cpu];
1694438ced17SVaibhav Nagarnaik 
1695438ced17SVaibhav Nagarnaik 			cpu_buffer->nr_pages_to_update = nr_pages -
1696438ced17SVaibhav Nagarnaik 							cpu_buffer->nr_pages;
1697438ced17SVaibhav Nagarnaik 			/*
1698438ced17SVaibhav Nagarnaik 			 * nothing more to do for removing pages or no update
1699438ced17SVaibhav Nagarnaik 			 */
1700438ced17SVaibhav Nagarnaik 			if (cpu_buffer->nr_pages_to_update <= 0)
1701438ced17SVaibhav Nagarnaik 				continue;
1702438ced17SVaibhav Nagarnaik 			/*
1703438ced17SVaibhav Nagarnaik 			 * to add pages, make sure all new pages can be
1704438ced17SVaibhav Nagarnaik 			 * allocated without receiving ENOMEM
1705438ced17SVaibhav Nagarnaik 			 */
1706438ced17SVaibhav Nagarnaik 			INIT_LIST_HEAD(&cpu_buffer->new_pages);
1707438ced17SVaibhav Nagarnaik 			if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
170883f40318SVaibhav Nagarnaik 						&cpu_buffer->new_pages, cpu)) {
1709438ced17SVaibhav Nagarnaik 				/* not enough memory for new pages */
171083f40318SVaibhav Nagarnaik 				err = -ENOMEM;
171183f40318SVaibhav Nagarnaik 				goto out_err;
171283f40318SVaibhav Nagarnaik 			}
171383f40318SVaibhav Nagarnaik 		}
171483f40318SVaibhav Nagarnaik 
171583f40318SVaibhav Nagarnaik 		get_online_cpus();
171683f40318SVaibhav Nagarnaik 		/*
171783f40318SVaibhav Nagarnaik 		 * Fire off all the required work handlers
171805fdd70dSVaibhav Nagarnaik 		 * We can't schedule on offline CPUs, but it's not necessary
171983f40318SVaibhav Nagarnaik 		 * since we can change their buffer sizes without any race.
172083f40318SVaibhav Nagarnaik 		 */
172183f40318SVaibhav Nagarnaik 		for_each_buffer_cpu(buffer, cpu) {
172283f40318SVaibhav Nagarnaik 			cpu_buffer = buffer->buffers[cpu];
172305fdd70dSVaibhav Nagarnaik 			if (!cpu_buffer->nr_pages_to_update)
172483f40318SVaibhav Nagarnaik 				continue;
172583f40318SVaibhav Nagarnaik 
1726021c5b34SCorey Minyard 			/* Can't run something on an offline CPU. */
1727021c5b34SCorey Minyard 			if (!cpu_online(cpu)) {
1728f5eb5588SSteven Rostedt (Red Hat) 				rb_update_pages(cpu_buffer);
1729f5eb5588SSteven Rostedt (Red Hat) 				cpu_buffer->nr_pages_to_update = 0;
1730f5eb5588SSteven Rostedt (Red Hat) 			} else {
173105fdd70dSVaibhav Nagarnaik 				schedule_work_on(cpu,
173205fdd70dSVaibhav Nagarnaik 						&cpu_buffer->update_pages_work);
1733f5eb5588SSteven Rostedt (Red Hat) 			}
17347a8e76a3SSteven Rostedt 		}
1735438ced17SVaibhav Nagarnaik 
1736438ced17SVaibhav Nagarnaik 		/* wait for all the updates to complete */
1737438ced17SVaibhav Nagarnaik 		for_each_buffer_cpu(buffer, cpu) {
1738438ced17SVaibhav Nagarnaik 			cpu_buffer = buffer->buffers[cpu];
173905fdd70dSVaibhav Nagarnaik 			if (!cpu_buffer->nr_pages_to_update)
174083f40318SVaibhav Nagarnaik 				continue;
174183f40318SVaibhav Nagarnaik 
174205fdd70dSVaibhav Nagarnaik 			if (cpu_online(cpu))
174305fdd70dSVaibhav Nagarnaik 				wait_for_completion(&cpu_buffer->update_done);
174483f40318SVaibhav Nagarnaik 			cpu_buffer->nr_pages_to_update = 0;
1745438ced17SVaibhav Nagarnaik 		}
174683f40318SVaibhav Nagarnaik 
174783f40318SVaibhav Nagarnaik 		put_online_cpus();
1748438ced17SVaibhav Nagarnaik 	} else {
17498e49f418SVaibhav Nagarnaik 		/* Make sure this CPU has been intitialized */
17508e49f418SVaibhav Nagarnaik 		if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
17518e49f418SVaibhav Nagarnaik 			goto out;
17528e49f418SVaibhav Nagarnaik 
1753438ced17SVaibhav Nagarnaik 		cpu_buffer = buffer->buffers[cpu_id];
175483f40318SVaibhav Nagarnaik 
1755438ced17SVaibhav Nagarnaik 		if (nr_pages == cpu_buffer->nr_pages)
17567a8e76a3SSteven Rostedt 			goto out;
1757438ced17SVaibhav Nagarnaik 
1758438ced17SVaibhav Nagarnaik 		cpu_buffer->nr_pages_to_update = nr_pages -
1759438ced17SVaibhav Nagarnaik 						cpu_buffer->nr_pages;
1760438ced17SVaibhav Nagarnaik 
1761438ced17SVaibhav Nagarnaik 		INIT_LIST_HEAD(&cpu_buffer->new_pages);
1762438ced17SVaibhav Nagarnaik 		if (cpu_buffer->nr_pages_to_update > 0 &&
1763438ced17SVaibhav Nagarnaik 			__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
176483f40318SVaibhav Nagarnaik 					    &cpu_buffer->new_pages, cpu_id)) {
176583f40318SVaibhav Nagarnaik 			err = -ENOMEM;
176683f40318SVaibhav Nagarnaik 			goto out_err;
176783f40318SVaibhav Nagarnaik 		}
1768438ced17SVaibhav Nagarnaik 
176983f40318SVaibhav Nagarnaik 		get_online_cpus();
177083f40318SVaibhav Nagarnaik 
1771021c5b34SCorey Minyard 		/* Can't run something on an offline CPU. */
1772021c5b34SCorey Minyard 		if (!cpu_online(cpu_id))
1773f5eb5588SSteven Rostedt (Red Hat) 			rb_update_pages(cpu_buffer);
1774f5eb5588SSteven Rostedt (Red Hat) 		else {
177583f40318SVaibhav Nagarnaik 			schedule_work_on(cpu_id,
177683f40318SVaibhav Nagarnaik 					 &cpu_buffer->update_pages_work);
177705fdd70dSVaibhav Nagarnaik 			wait_for_completion(&cpu_buffer->update_done);
1778f5eb5588SSteven Rostedt (Red Hat) 		}
177983f40318SVaibhav Nagarnaik 
178083f40318SVaibhav Nagarnaik 		cpu_buffer->nr_pages_to_update = 0;
178105fdd70dSVaibhav Nagarnaik 		put_online_cpus();
17827a8e76a3SSteven Rostedt 	}
17837a8e76a3SSteven Rostedt 
17847a8e76a3SSteven Rostedt  out:
1785659f451fSSteven Rostedt 	/*
1786659f451fSSteven Rostedt 	 * The ring buffer resize can happen with the ring buffer
1787659f451fSSteven Rostedt 	 * enabled, so that the update disturbs the tracing as little
1788659f451fSSteven Rostedt 	 * as possible. But if the buffer is disabled, we do not need
1789659f451fSSteven Rostedt 	 * to worry about that, and we can take the time to verify
1790659f451fSSteven Rostedt 	 * that the buffer is not corrupt.
1791659f451fSSteven Rostedt 	 */
1792659f451fSSteven Rostedt 	if (atomic_read(&buffer->record_disabled)) {
1793659f451fSSteven Rostedt 		atomic_inc(&buffer->record_disabled);
1794659f451fSSteven Rostedt 		/*
1795659f451fSSteven Rostedt 		 * Even though the buffer was disabled, we must make sure
1796659f451fSSteven Rostedt 		 * that it is truly disabled before calling rb_check_pages.
1797659f451fSSteven Rostedt 		 * There could have been a race between checking
1798659f451fSSteven Rostedt 		 * record_disable and incrementing it.
1799659f451fSSteven Rostedt 		 */
1800659f451fSSteven Rostedt 		synchronize_sched();
1801659f451fSSteven Rostedt 		for_each_buffer_cpu(buffer, cpu) {
1802659f451fSSteven Rostedt 			cpu_buffer = buffer->buffers[cpu];
1803659f451fSSteven Rostedt 			rb_check_pages(cpu_buffer);
1804659f451fSSteven Rostedt 		}
1805659f451fSSteven Rostedt 		atomic_dec(&buffer->record_disabled);
1806659f451fSSteven Rostedt 	}
1807659f451fSSteven Rostedt 
18087a8e76a3SSteven Rostedt 	mutex_unlock(&buffer->mutex);
18097a8e76a3SSteven Rostedt 	return size;
18107a8e76a3SSteven Rostedt 
181183f40318SVaibhav Nagarnaik  out_err:
1812438ced17SVaibhav Nagarnaik 	for_each_buffer_cpu(buffer, cpu) {
1813438ced17SVaibhav Nagarnaik 		struct buffer_page *bpage, *tmp;
181483f40318SVaibhav Nagarnaik 
1815438ced17SVaibhav Nagarnaik 		cpu_buffer = buffer->buffers[cpu];
1816438ced17SVaibhav Nagarnaik 		cpu_buffer->nr_pages_to_update = 0;
181783f40318SVaibhav Nagarnaik 
1818438ced17SVaibhav Nagarnaik 		if (list_empty(&cpu_buffer->new_pages))
1819438ced17SVaibhav Nagarnaik 			continue;
182083f40318SVaibhav Nagarnaik 
1821438ced17SVaibhav Nagarnaik 		list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1822438ced17SVaibhav Nagarnaik 					list) {
1823044fa782SSteven Rostedt 			list_del_init(&bpage->list);
1824044fa782SSteven Rostedt 			free_buffer_page(bpage);
18257a8e76a3SSteven Rostedt 		}
1826438ced17SVaibhav Nagarnaik 	}
1827641d2f63SVegard Nossum 	mutex_unlock(&buffer->mutex);
182883f40318SVaibhav Nagarnaik 	return err;
18297a8e76a3SSteven Rostedt }
1830c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_resize);
18317a8e76a3SSteven Rostedt 
1832750912faSDavid Sharp void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1833750912faSDavid Sharp {
1834750912faSDavid Sharp 	mutex_lock(&buffer->mutex);
1835750912faSDavid Sharp 	if (val)
1836750912faSDavid Sharp 		buffer->flags |= RB_FL_OVERWRITE;
1837750912faSDavid Sharp 	else
1838750912faSDavid Sharp 		buffer->flags &= ~RB_FL_OVERWRITE;
1839750912faSDavid Sharp 	mutex_unlock(&buffer->mutex);
1840750912faSDavid Sharp }
1841750912faSDavid Sharp EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1842750912faSDavid Sharp 
18432289d567SSteven Rostedt (Red Hat) static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
18447a8e76a3SSteven Rostedt {
1845044fa782SSteven Rostedt 	return bpage->page->data + index;
18467a8e76a3SSteven Rostedt }
18477a8e76a3SSteven Rostedt 
18482289d567SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event *
1849d769041fSSteven Rostedt rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
18507a8e76a3SSteven Rostedt {
18516f807acdSSteven Rostedt 	return __rb_page_index(cpu_buffer->reader_page,
18526f807acdSSteven Rostedt 			       cpu_buffer->reader_page->read);
18536f807acdSSteven Rostedt }
18546f807acdSSteven Rostedt 
18552289d567SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event *
18567a8e76a3SSteven Rostedt rb_iter_head_event(struct ring_buffer_iter *iter)
18577a8e76a3SSteven Rostedt {
18586f807acdSSteven Rostedt 	return __rb_page_index(iter->head_page, iter->head);
18597a8e76a3SSteven Rostedt }
18607a8e76a3SSteven Rostedt 
18612289d567SSteven Rostedt (Red Hat) static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
1862bf41a158SSteven Rostedt {
1863abc9b56dSSteven Rostedt 	return local_read(&bpage->page->commit);
1864bf41a158SSteven Rostedt }
1865bf41a158SSteven Rostedt 
186625985edcSLucas De Marchi /* Size is determined by what has been committed */
18672289d567SSteven Rostedt (Red Hat) static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
1868bf41a158SSteven Rostedt {
1869bf41a158SSteven Rostedt 	return rb_page_commit(bpage);
1870bf41a158SSteven Rostedt }
1871bf41a158SSteven Rostedt 
18722289d567SSteven Rostedt (Red Hat) static __always_inline unsigned
1873bf41a158SSteven Rostedt rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1874bf41a158SSteven Rostedt {
1875bf41a158SSteven Rostedt 	return rb_page_commit(cpu_buffer->commit_page);
1876bf41a158SSteven Rostedt }
1877bf41a158SSteven Rostedt 
18782289d567SSteven Rostedt (Red Hat) static __always_inline unsigned
1879bf41a158SSteven Rostedt rb_event_index(struct ring_buffer_event *event)
18807a8e76a3SSteven Rostedt {
1881bf41a158SSteven Rostedt 	unsigned long addr = (unsigned long)event;
1882bf41a158SSteven Rostedt 
188322f470f8SSteven Rostedt 	return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
18847a8e76a3SSteven Rostedt }
18857a8e76a3SSteven Rostedt 
188634a148bfSAndrew Morton static void rb_inc_iter(struct ring_buffer_iter *iter)
1887d769041fSSteven Rostedt {
1888d769041fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1889d769041fSSteven Rostedt 
1890d769041fSSteven Rostedt 	/*
1891d769041fSSteven Rostedt 	 * The iterator could be on the reader page (it starts there).
1892d769041fSSteven Rostedt 	 * But the head could have moved, since the reader was
1893d769041fSSteven Rostedt 	 * found. Check for this case and assign the iterator
1894d769041fSSteven Rostedt 	 * to the head page instead of next.
1895d769041fSSteven Rostedt 	 */
1896d769041fSSteven Rostedt 	if (iter->head_page == cpu_buffer->reader_page)
189777ae365eSSteven Rostedt 		iter->head_page = rb_set_head_page(cpu_buffer);
1898d769041fSSteven Rostedt 	else
1899d769041fSSteven Rostedt 		rb_inc_page(cpu_buffer, &iter->head_page);
1900d769041fSSteven Rostedt 
1901abc9b56dSSteven Rostedt 	iter->read_stamp = iter->head_page->page->time_stamp;
19027a8e76a3SSteven Rostedt 	iter->head = 0;
19037a8e76a3SSteven Rostedt }
19047a8e76a3SSteven Rostedt 
190577ae365eSSteven Rostedt /*
190677ae365eSSteven Rostedt  * rb_handle_head_page - writer hit the head page
190777ae365eSSteven Rostedt  *
190877ae365eSSteven Rostedt  * Returns: +1 to retry page
190977ae365eSSteven Rostedt  *           0 to continue
191077ae365eSSteven Rostedt  *          -1 on error
191177ae365eSSteven Rostedt  */
191277ae365eSSteven Rostedt static int
191377ae365eSSteven Rostedt rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
191477ae365eSSteven Rostedt 		    struct buffer_page *tail_page,
191577ae365eSSteven Rostedt 		    struct buffer_page *next_page)
191677ae365eSSteven Rostedt {
191777ae365eSSteven Rostedt 	struct buffer_page *new_head;
191877ae365eSSteven Rostedt 	int entries;
191977ae365eSSteven Rostedt 	int type;
192077ae365eSSteven Rostedt 	int ret;
192177ae365eSSteven Rostedt 
192277ae365eSSteven Rostedt 	entries = rb_page_entries(next_page);
192377ae365eSSteven Rostedt 
192477ae365eSSteven Rostedt 	/*
192577ae365eSSteven Rostedt 	 * The hard part is here. We need to move the head
192677ae365eSSteven Rostedt 	 * forward, and protect against both readers on
192777ae365eSSteven Rostedt 	 * other CPUs and writers coming in via interrupts.
192877ae365eSSteven Rostedt 	 */
192977ae365eSSteven Rostedt 	type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
193077ae365eSSteven Rostedt 				       RB_PAGE_HEAD);
193177ae365eSSteven Rostedt 
193277ae365eSSteven Rostedt 	/*
193377ae365eSSteven Rostedt 	 * type can be one of four:
193477ae365eSSteven Rostedt 	 *  NORMAL - an interrupt already moved it for us
193577ae365eSSteven Rostedt 	 *  HEAD   - we are the first to get here.
193677ae365eSSteven Rostedt 	 *  UPDATE - we are the interrupt interrupting
193777ae365eSSteven Rostedt 	 *           a current move.
193877ae365eSSteven Rostedt 	 *  MOVED  - a reader on another CPU moved the next
193977ae365eSSteven Rostedt 	 *           pointer to its reader page. Give up
194077ae365eSSteven Rostedt 	 *           and try again.
194177ae365eSSteven Rostedt 	 */
194277ae365eSSteven Rostedt 
194377ae365eSSteven Rostedt 	switch (type) {
194477ae365eSSteven Rostedt 	case RB_PAGE_HEAD:
194577ae365eSSteven Rostedt 		/*
194677ae365eSSteven Rostedt 		 * We changed the head to UPDATE, thus
194777ae365eSSteven Rostedt 		 * it is our responsibility to update
194877ae365eSSteven Rostedt 		 * the counters.
194977ae365eSSteven Rostedt 		 */
195077ae365eSSteven Rostedt 		local_add(entries, &cpu_buffer->overrun);
1951c64e148aSVaibhav Nagarnaik 		local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
195277ae365eSSteven Rostedt 
195377ae365eSSteven Rostedt 		/*
195477ae365eSSteven Rostedt 		 * The entries will be zeroed out when we move the
195577ae365eSSteven Rostedt 		 * tail page.
195677ae365eSSteven Rostedt 		 */
195777ae365eSSteven Rostedt 
195877ae365eSSteven Rostedt 		/* still more to do */
195977ae365eSSteven Rostedt 		break;
196077ae365eSSteven Rostedt 
196177ae365eSSteven Rostedt 	case RB_PAGE_UPDATE:
196277ae365eSSteven Rostedt 		/*
196377ae365eSSteven Rostedt 		 * This is an interrupt that interrupt the
196477ae365eSSteven Rostedt 		 * previous update. Still more to do.
196577ae365eSSteven Rostedt 		 */
196677ae365eSSteven Rostedt 		break;
196777ae365eSSteven Rostedt 	case RB_PAGE_NORMAL:
196877ae365eSSteven Rostedt 		/*
196977ae365eSSteven Rostedt 		 * An interrupt came in before the update
197077ae365eSSteven Rostedt 		 * and processed this for us.
197177ae365eSSteven Rostedt 		 * Nothing left to do.
197277ae365eSSteven Rostedt 		 */
197377ae365eSSteven Rostedt 		return 1;
197477ae365eSSteven Rostedt 	case RB_PAGE_MOVED:
197577ae365eSSteven Rostedt 		/*
197677ae365eSSteven Rostedt 		 * The reader is on another CPU and just did
197777ae365eSSteven Rostedt 		 * a swap with our next_page.
197877ae365eSSteven Rostedt 		 * Try again.
197977ae365eSSteven Rostedt 		 */
198077ae365eSSteven Rostedt 		return 1;
198177ae365eSSteven Rostedt 	default:
198277ae365eSSteven Rostedt 		RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
198377ae365eSSteven Rostedt 		return -1;
198477ae365eSSteven Rostedt 	}
198577ae365eSSteven Rostedt 
198677ae365eSSteven Rostedt 	/*
198777ae365eSSteven Rostedt 	 * Now that we are here, the old head pointer is
198877ae365eSSteven Rostedt 	 * set to UPDATE. This will keep the reader from
198977ae365eSSteven Rostedt 	 * swapping the head page with the reader page.
199077ae365eSSteven Rostedt 	 * The reader (on another CPU) will spin till
199177ae365eSSteven Rostedt 	 * we are finished.
199277ae365eSSteven Rostedt 	 *
199377ae365eSSteven Rostedt 	 * We just need to protect against interrupts
199477ae365eSSteven Rostedt 	 * doing the job. We will set the next pointer
199577ae365eSSteven Rostedt 	 * to HEAD. After that, we set the old pointer
199677ae365eSSteven Rostedt 	 * to NORMAL, but only if it was HEAD before.
199777ae365eSSteven Rostedt 	 * otherwise we are an interrupt, and only
199877ae365eSSteven Rostedt 	 * want the outer most commit to reset it.
199977ae365eSSteven Rostedt 	 */
200077ae365eSSteven Rostedt 	new_head = next_page;
200177ae365eSSteven Rostedt 	rb_inc_page(cpu_buffer, &new_head);
200277ae365eSSteven Rostedt 
200377ae365eSSteven Rostedt 	ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
200477ae365eSSteven Rostedt 				    RB_PAGE_NORMAL);
200577ae365eSSteven Rostedt 
200677ae365eSSteven Rostedt 	/*
200777ae365eSSteven Rostedt 	 * Valid returns are:
200877ae365eSSteven Rostedt 	 *  HEAD   - an interrupt came in and already set it.
200977ae365eSSteven Rostedt 	 *  NORMAL - One of two things:
201077ae365eSSteven Rostedt 	 *            1) We really set it.
201177ae365eSSteven Rostedt 	 *            2) A bunch of interrupts came in and moved
201277ae365eSSteven Rostedt 	 *               the page forward again.
201377ae365eSSteven Rostedt 	 */
201477ae365eSSteven Rostedt 	switch (ret) {
201577ae365eSSteven Rostedt 	case RB_PAGE_HEAD:
201677ae365eSSteven Rostedt 	case RB_PAGE_NORMAL:
201777ae365eSSteven Rostedt 		/* OK */
201877ae365eSSteven Rostedt 		break;
201977ae365eSSteven Rostedt 	default:
202077ae365eSSteven Rostedt 		RB_WARN_ON(cpu_buffer, 1);
202177ae365eSSteven Rostedt 		return -1;
202277ae365eSSteven Rostedt 	}
202377ae365eSSteven Rostedt 
202477ae365eSSteven Rostedt 	/*
202577ae365eSSteven Rostedt 	 * It is possible that an interrupt came in,
202677ae365eSSteven Rostedt 	 * set the head up, then more interrupts came in
202777ae365eSSteven Rostedt 	 * and moved it again. When we get back here,
202877ae365eSSteven Rostedt 	 * the page would have been set to NORMAL but we
202977ae365eSSteven Rostedt 	 * just set it back to HEAD.
203077ae365eSSteven Rostedt 	 *
203177ae365eSSteven Rostedt 	 * How do you detect this? Well, if that happened
203277ae365eSSteven Rostedt 	 * the tail page would have moved.
203377ae365eSSteven Rostedt 	 */
203477ae365eSSteven Rostedt 	if (ret == RB_PAGE_NORMAL) {
20358573636eSSteven Rostedt (Red Hat) 		struct buffer_page *buffer_tail_page;
20368573636eSSteven Rostedt (Red Hat) 
20378573636eSSteven Rostedt (Red Hat) 		buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
203877ae365eSSteven Rostedt 		/*
203977ae365eSSteven Rostedt 		 * If the tail had moved passed next, then we need
204077ae365eSSteven Rostedt 		 * to reset the pointer.
204177ae365eSSteven Rostedt 		 */
20428573636eSSteven Rostedt (Red Hat) 		if (buffer_tail_page != tail_page &&
20438573636eSSteven Rostedt (Red Hat) 		    buffer_tail_page != next_page)
204477ae365eSSteven Rostedt 			rb_head_page_set_normal(cpu_buffer, new_head,
204577ae365eSSteven Rostedt 						next_page,
204677ae365eSSteven Rostedt 						RB_PAGE_HEAD);
204777ae365eSSteven Rostedt 	}
204877ae365eSSteven Rostedt 
204977ae365eSSteven Rostedt 	/*
205077ae365eSSteven Rostedt 	 * If this was the outer most commit (the one that
205177ae365eSSteven Rostedt 	 * changed the original pointer from HEAD to UPDATE),
205277ae365eSSteven Rostedt 	 * then it is up to us to reset it to NORMAL.
205377ae365eSSteven Rostedt 	 */
205477ae365eSSteven Rostedt 	if (type == RB_PAGE_HEAD) {
205577ae365eSSteven Rostedt 		ret = rb_head_page_set_normal(cpu_buffer, next_page,
205677ae365eSSteven Rostedt 					      tail_page,
205777ae365eSSteven Rostedt 					      RB_PAGE_UPDATE);
205877ae365eSSteven Rostedt 		if (RB_WARN_ON(cpu_buffer,
205977ae365eSSteven Rostedt 			       ret != RB_PAGE_UPDATE))
206077ae365eSSteven Rostedt 			return -1;
206177ae365eSSteven Rostedt 	}
206277ae365eSSteven Rostedt 
206377ae365eSSteven Rostedt 	return 0;
206477ae365eSSteven Rostedt }
206577ae365eSSteven Rostedt 
2066c7b09308SSteven Rostedt static inline void
2067c7b09308SSteven Rostedt rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2068fcc742eaSSteven Rostedt (Red Hat) 	      unsigned long tail, struct rb_event_info *info)
2069c7b09308SSteven Rostedt {
2070fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page *tail_page = info->tail_page;
2071c7b09308SSteven Rostedt 	struct ring_buffer_event *event;
2072fcc742eaSSteven Rostedt (Red Hat) 	unsigned long length = info->length;
2073c7b09308SSteven Rostedt 
2074c7b09308SSteven Rostedt 	/*
2075c7b09308SSteven Rostedt 	 * Only the event that crossed the page boundary
2076c7b09308SSteven Rostedt 	 * must fill the old tail_page with padding.
2077c7b09308SSteven Rostedt 	 */
2078c7b09308SSteven Rostedt 	if (tail >= BUF_PAGE_SIZE) {
2079b3230c8bSSteven Rostedt 		/*
2080b3230c8bSSteven Rostedt 		 * If the page was filled, then we still need
2081b3230c8bSSteven Rostedt 		 * to update the real_end. Reset it to zero
2082b3230c8bSSteven Rostedt 		 * and the reader will ignore it.
2083b3230c8bSSteven Rostedt 		 */
2084b3230c8bSSteven Rostedt 		if (tail == BUF_PAGE_SIZE)
2085b3230c8bSSteven Rostedt 			tail_page->real_end = 0;
2086b3230c8bSSteven Rostedt 
2087c7b09308SSteven Rostedt 		local_sub(length, &tail_page->write);
2088c7b09308SSteven Rostedt 		return;
2089c7b09308SSteven Rostedt 	}
2090c7b09308SSteven Rostedt 
2091c7b09308SSteven Rostedt 	event = __rb_page_index(tail_page, tail);
2092c7b09308SSteven Rostedt 
2093c64e148aSVaibhav Nagarnaik 	/* account for padding bytes */
2094c64e148aSVaibhav Nagarnaik 	local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2095c64e148aSVaibhav Nagarnaik 
2096c7b09308SSteven Rostedt 	/*
2097ff0ff84aSSteven Rostedt 	 * Save the original length to the meta data.
2098ff0ff84aSSteven Rostedt 	 * This will be used by the reader to add lost event
2099ff0ff84aSSteven Rostedt 	 * counter.
2100ff0ff84aSSteven Rostedt 	 */
2101ff0ff84aSSteven Rostedt 	tail_page->real_end = tail;
2102ff0ff84aSSteven Rostedt 
2103ff0ff84aSSteven Rostedt 	/*
2104c7b09308SSteven Rostedt 	 * If this event is bigger than the minimum size, then
2105c7b09308SSteven Rostedt 	 * we need to be careful that we don't subtract the
2106c7b09308SSteven Rostedt 	 * write counter enough to allow another writer to slip
2107c7b09308SSteven Rostedt 	 * in on this page.
2108c7b09308SSteven Rostedt 	 * We put in a discarded commit instead, to make sure
2109c7b09308SSteven Rostedt 	 * that this space is not used again.
2110c7b09308SSteven Rostedt 	 *
2111c7b09308SSteven Rostedt 	 * If we are less than the minimum size, we don't need to
2112c7b09308SSteven Rostedt 	 * worry about it.
2113c7b09308SSteven Rostedt 	 */
2114c7b09308SSteven Rostedt 	if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2115c7b09308SSteven Rostedt 		/* No room for any events */
2116c7b09308SSteven Rostedt 
2117c7b09308SSteven Rostedt 		/* Mark the rest of the page with padding */
2118c7b09308SSteven Rostedt 		rb_event_set_padding(event);
2119c7b09308SSteven Rostedt 
2120c7b09308SSteven Rostedt 		/* Set the write back to the previous setting */
2121c7b09308SSteven Rostedt 		local_sub(length, &tail_page->write);
2122c7b09308SSteven Rostedt 		return;
2123c7b09308SSteven Rostedt 	}
2124c7b09308SSteven Rostedt 
2125c7b09308SSteven Rostedt 	/* Put in a discarded event */
2126c7b09308SSteven Rostedt 	event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2127c7b09308SSteven Rostedt 	event->type_len = RINGBUF_TYPE_PADDING;
2128c7b09308SSteven Rostedt 	/* time delta must be non zero */
2129c7b09308SSteven Rostedt 	event->time_delta = 1;
2130c7b09308SSteven Rostedt 
2131c7b09308SSteven Rostedt 	/* Set write to end of buffer */
2132c7b09308SSteven Rostedt 	length = (tail + length) - BUF_PAGE_SIZE;
2133c7b09308SSteven Rostedt 	local_sub(length, &tail_page->write);
2134c7b09308SSteven Rostedt }
21356634ff26SSteven Rostedt 
21364239c38fSSteven Rostedt (Red Hat) static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
21374239c38fSSteven Rostedt (Red Hat) 
2138747e94aeSSteven Rostedt /*
2139747e94aeSSteven Rostedt  * This is the slow path, force gcc not to inline it.
2140747e94aeSSteven Rostedt  */
2141747e94aeSSteven Rostedt static noinline struct ring_buffer_event *
21426634ff26SSteven Rostedt rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2143fcc742eaSSteven Rostedt (Red Hat) 	     unsigned long tail, struct rb_event_info *info)
21447a8e76a3SSteven Rostedt {
2145fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page *tail_page = info->tail_page;
21465a50e33cSSteven Rostedt 	struct buffer_page *commit_page = cpu_buffer->commit_page;
21477a8e76a3SSteven Rostedt 	struct ring_buffer *buffer = cpu_buffer->buffer;
214877ae365eSSteven Rostedt 	struct buffer_page *next_page;
214977ae365eSSteven Rostedt 	int ret;
2150aa20ae84SSteven Rostedt 
2151aa20ae84SSteven Rostedt 	next_page = tail_page;
21527a8e76a3SSteven Rostedt 
21537a8e76a3SSteven Rostedt 	rb_inc_page(cpu_buffer, &next_page);
21547a8e76a3SSteven Rostedt 
2155bf41a158SSteven Rostedt 	/*
2156bf41a158SSteven Rostedt 	 * If for some reason, we had an interrupt storm that made
2157bf41a158SSteven Rostedt 	 * it all the way around the buffer, bail, and warn
2158bf41a158SSteven Rostedt 	 * about it.
2159bf41a158SSteven Rostedt 	 */
216098db8df7SSteven Rostedt 	if (unlikely(next_page == commit_page)) {
216177ae365eSSteven Rostedt 		local_inc(&cpu_buffer->commit_overrun);
216245141d46SSteven Rostedt 		goto out_reset;
2163bf41a158SSteven Rostedt 	}
2164d769041fSSteven Rostedt 
2165bf41a158SSteven Rostedt 	/*
216677ae365eSSteven Rostedt 	 * This is where the fun begins!
216777ae365eSSteven Rostedt 	 *
216877ae365eSSteven Rostedt 	 * We are fighting against races between a reader that
216977ae365eSSteven Rostedt 	 * could be on another CPU trying to swap its reader
217077ae365eSSteven Rostedt 	 * page with the buffer head.
217177ae365eSSteven Rostedt 	 *
217277ae365eSSteven Rostedt 	 * We are also fighting against interrupts coming in and
217377ae365eSSteven Rostedt 	 * moving the head or tail on us as well.
217477ae365eSSteven Rostedt 	 *
217577ae365eSSteven Rostedt 	 * If the next page is the head page then we have filled
217677ae365eSSteven Rostedt 	 * the buffer, unless the commit page is still on the
217777ae365eSSteven Rostedt 	 * reader page.
2178bf41a158SSteven Rostedt 	 */
217977ae365eSSteven Rostedt 	if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2180bf41a158SSteven Rostedt 
218177ae365eSSteven Rostedt 		/*
218277ae365eSSteven Rostedt 		 * If the commit is not on the reader page, then
218377ae365eSSteven Rostedt 		 * move the header page.
218477ae365eSSteven Rostedt 		 */
218577ae365eSSteven Rostedt 		if (!rb_is_reader_page(cpu_buffer->commit_page)) {
218677ae365eSSteven Rostedt 			/*
218777ae365eSSteven Rostedt 			 * If we are not in overwrite mode,
218877ae365eSSteven Rostedt 			 * this is easy, just stop here.
218977ae365eSSteven Rostedt 			 */
2190884bfe89SSlava Pestov 			if (!(buffer->flags & RB_FL_OVERWRITE)) {
2191884bfe89SSlava Pestov 				local_inc(&cpu_buffer->dropped_events);
219277ae365eSSteven Rostedt 				goto out_reset;
2193884bfe89SSlava Pestov 			}
219477ae365eSSteven Rostedt 
219577ae365eSSteven Rostedt 			ret = rb_handle_head_page(cpu_buffer,
219677ae365eSSteven Rostedt 						  tail_page,
219777ae365eSSteven Rostedt 						  next_page);
219877ae365eSSteven Rostedt 			if (ret < 0)
219977ae365eSSteven Rostedt 				goto out_reset;
220077ae365eSSteven Rostedt 			if (ret)
220177ae365eSSteven Rostedt 				goto out_again;
220277ae365eSSteven Rostedt 		} else {
220377ae365eSSteven Rostedt 			/*
220477ae365eSSteven Rostedt 			 * We need to be careful here too. The
220577ae365eSSteven Rostedt 			 * commit page could still be on the reader
220677ae365eSSteven Rostedt 			 * page. We could have a small buffer, and
220777ae365eSSteven Rostedt 			 * have filled up the buffer with events
220877ae365eSSteven Rostedt 			 * from interrupts and such, and wrapped.
220977ae365eSSteven Rostedt 			 *
221077ae365eSSteven Rostedt 			 * Note, if the tail page is also the on the
221177ae365eSSteven Rostedt 			 * reader_page, we let it move out.
221277ae365eSSteven Rostedt 			 */
221377ae365eSSteven Rostedt 			if (unlikely((cpu_buffer->commit_page !=
221477ae365eSSteven Rostedt 				      cpu_buffer->tail_page) &&
221577ae365eSSteven Rostedt 				     (cpu_buffer->commit_page ==
221677ae365eSSteven Rostedt 				      cpu_buffer->reader_page))) {
221777ae365eSSteven Rostedt 				local_inc(&cpu_buffer->commit_overrun);
221877ae365eSSteven Rostedt 				goto out_reset;
221977ae365eSSteven Rostedt 			}
222077ae365eSSteven Rostedt 		}
2221bf41a158SSteven Rostedt 	}
2222bf41a158SSteven Rostedt 
222370004986SSteven Rostedt (Red Hat) 	rb_tail_page_update(cpu_buffer, tail_page, next_page);
22247a8e76a3SSteven Rostedt 
222577ae365eSSteven Rostedt  out_again:
222677ae365eSSteven Rostedt 
2227fcc742eaSSteven Rostedt (Red Hat) 	rb_reset_tail(cpu_buffer, tail, info);
2228bf41a158SSteven Rostedt 
22294239c38fSSteven Rostedt (Red Hat) 	/* Commit what we have for now. */
22304239c38fSSteven Rostedt (Red Hat) 	rb_end_commit(cpu_buffer);
22314239c38fSSteven Rostedt (Red Hat) 	/* rb_end_commit() decs committing */
22324239c38fSSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->committing);
22334239c38fSSteven Rostedt (Red Hat) 
2234bf41a158SSteven Rostedt 	/* fail and let the caller try again */
2235bf41a158SSteven Rostedt 	return ERR_PTR(-EAGAIN);
2236bf41a158SSteven Rostedt 
223745141d46SSteven Rostedt  out_reset:
22386f3b3440SLai Jiangshan 	/* reset write */
2239fcc742eaSSteven Rostedt (Red Hat) 	rb_reset_tail(cpu_buffer, tail, info);
22406f3b3440SLai Jiangshan 
2241bf41a158SSteven Rostedt 	return NULL;
22427a8e76a3SSteven Rostedt }
22437a8e76a3SSteven Rostedt 
2244d90fd774SSteven Rostedt (Red Hat) /* Slow path, do not inline */
2245d90fd774SSteven Rostedt (Red Hat) static noinline struct ring_buffer_event *
2246*dc4e2801STom Zanussi rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
2247d90fd774SSteven Rostedt (Red Hat) {
2248*dc4e2801STom Zanussi 	if (abs)
2249*dc4e2801STom Zanussi 		event->type_len = RINGBUF_TYPE_TIME_STAMP;
2250*dc4e2801STom Zanussi 	else
2251d90fd774SSteven Rostedt (Red Hat) 		event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2252d90fd774SSteven Rostedt (Red Hat) 
2253*dc4e2801STom Zanussi 	/* Not the first event on the page, or not delta? */
2254*dc4e2801STom Zanussi 	if (abs || rb_event_index(event)) {
2255d90fd774SSteven Rostedt (Red Hat) 		event->time_delta = delta & TS_MASK;
2256d90fd774SSteven Rostedt (Red Hat) 		event->array[0] = delta >> TS_SHIFT;
2257d90fd774SSteven Rostedt (Red Hat) 	} else {
2258d90fd774SSteven Rostedt (Red Hat) 		/* nope, just zero it */
2259d90fd774SSteven Rostedt (Red Hat) 		event->time_delta = 0;
2260d90fd774SSteven Rostedt (Red Hat) 		event->array[0] = 0;
2261d90fd774SSteven Rostedt (Red Hat) 	}
2262d90fd774SSteven Rostedt (Red Hat) 
2263d90fd774SSteven Rostedt (Red Hat) 	return skip_time_extend(event);
2264d90fd774SSteven Rostedt (Red Hat) }
2265d90fd774SSteven Rostedt (Red Hat) 
2266cdb2a0a9SYaowei Bai static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2267b7dc42fdSSteven Rostedt (Red Hat) 				     struct ring_buffer_event *event);
2268b7dc42fdSSteven Rostedt (Red Hat) 
2269d90fd774SSteven Rostedt (Red Hat) /**
2270d90fd774SSteven Rostedt (Red Hat)  * rb_update_event - update event type and data
2271d90fd774SSteven Rostedt (Red Hat)  * @event: the event to update
2272d90fd774SSteven Rostedt (Red Hat)  * @type: the type of event
2273d90fd774SSteven Rostedt (Red Hat)  * @length: the size of the event field in the ring buffer
2274d90fd774SSteven Rostedt (Red Hat)  *
2275d90fd774SSteven Rostedt (Red Hat)  * Update the type and data fields of the event. The length
2276d90fd774SSteven Rostedt (Red Hat)  * is the actual size that is written to the ring buffer,
2277d90fd774SSteven Rostedt (Red Hat)  * and with this, we can determine what to place into the
2278d90fd774SSteven Rostedt (Red Hat)  * data field.
2279d90fd774SSteven Rostedt (Red Hat)  */
2280b7dc42fdSSteven Rostedt (Red Hat) static void
2281d90fd774SSteven Rostedt (Red Hat) rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2282d90fd774SSteven Rostedt (Red Hat) 		struct ring_buffer_event *event,
2283d90fd774SSteven Rostedt (Red Hat) 		struct rb_event_info *info)
2284d90fd774SSteven Rostedt (Red Hat) {
2285d90fd774SSteven Rostedt (Red Hat) 	unsigned length = info->length;
2286d90fd774SSteven Rostedt (Red Hat) 	u64 delta = info->delta;
2287d90fd774SSteven Rostedt (Red Hat) 
2288b7dc42fdSSteven Rostedt (Red Hat) 	/* Only a commit updates the timestamp */
2289b7dc42fdSSteven Rostedt (Red Hat) 	if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2290b7dc42fdSSteven Rostedt (Red Hat) 		delta = 0;
2291b7dc42fdSSteven Rostedt (Red Hat) 
2292d90fd774SSteven Rostedt (Red Hat) 	/*
2293d90fd774SSteven Rostedt (Red Hat) 	 * If we need to add a timestamp, then we
2294d90fd774SSteven Rostedt (Red Hat) 	 * add it to the start of the resevered space.
2295d90fd774SSteven Rostedt (Red Hat) 	 */
2296d90fd774SSteven Rostedt (Red Hat) 	if (unlikely(info->add_timestamp)) {
2297*dc4e2801STom Zanussi 		bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
2298*dc4e2801STom Zanussi 
2299*dc4e2801STom Zanussi 		event = rb_add_time_stamp(event, info->delta, abs);
2300d90fd774SSteven Rostedt (Red Hat) 		length -= RB_LEN_TIME_EXTEND;
2301d90fd774SSteven Rostedt (Red Hat) 		delta = 0;
2302d90fd774SSteven Rostedt (Red Hat) 	}
2303d90fd774SSteven Rostedt (Red Hat) 
2304d90fd774SSteven Rostedt (Red Hat) 	event->time_delta = delta;
2305d90fd774SSteven Rostedt (Red Hat) 	length -= RB_EVNT_HDR_SIZE;
2306d90fd774SSteven Rostedt (Red Hat) 	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2307d90fd774SSteven Rostedt (Red Hat) 		event->type_len = 0;
2308d90fd774SSteven Rostedt (Red Hat) 		event->array[0] = length;
2309d90fd774SSteven Rostedt (Red Hat) 	} else
2310d90fd774SSteven Rostedt (Red Hat) 		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2311d90fd774SSteven Rostedt (Red Hat) }
2312d90fd774SSteven Rostedt (Red Hat) 
2313d90fd774SSteven Rostedt (Red Hat) static unsigned rb_calculate_event_length(unsigned length)
2314d90fd774SSteven Rostedt (Red Hat) {
2315d90fd774SSteven Rostedt (Red Hat) 	struct ring_buffer_event event; /* Used only for sizeof array */
2316d90fd774SSteven Rostedt (Red Hat) 
2317d90fd774SSteven Rostedt (Red Hat) 	/* zero length can cause confusions */
2318d90fd774SSteven Rostedt (Red Hat) 	if (!length)
2319d90fd774SSteven Rostedt (Red Hat) 		length++;
2320d90fd774SSteven Rostedt (Red Hat) 
2321d90fd774SSteven Rostedt (Red Hat) 	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2322d90fd774SSteven Rostedt (Red Hat) 		length += sizeof(event.array[0]);
2323d90fd774SSteven Rostedt (Red Hat) 
2324d90fd774SSteven Rostedt (Red Hat) 	length += RB_EVNT_HDR_SIZE;
2325d90fd774SSteven Rostedt (Red Hat) 	length = ALIGN(length, RB_ARCH_ALIGNMENT);
2326d90fd774SSteven Rostedt (Red Hat) 
2327d90fd774SSteven Rostedt (Red Hat) 	/*
2328d90fd774SSteven Rostedt (Red Hat) 	 * In case the time delta is larger than the 27 bits for it
2329d90fd774SSteven Rostedt (Red Hat) 	 * in the header, we need to add a timestamp. If another
2330d90fd774SSteven Rostedt (Red Hat) 	 * event comes in when trying to discard this one to increase
2331d90fd774SSteven Rostedt (Red Hat) 	 * the length, then the timestamp will be added in the allocated
2332d90fd774SSteven Rostedt (Red Hat) 	 * space of this event. If length is bigger than the size needed
2333d90fd774SSteven Rostedt (Red Hat) 	 * for the TIME_EXTEND, then padding has to be used. The events
2334d90fd774SSteven Rostedt (Red Hat) 	 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
2335d90fd774SSteven Rostedt (Red Hat) 	 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
2336d90fd774SSteven Rostedt (Red Hat) 	 * As length is a multiple of 4, we only need to worry if it
2337d90fd774SSteven Rostedt (Red Hat) 	 * is 12 (RB_LEN_TIME_EXTEND + 4).
2338d90fd774SSteven Rostedt (Red Hat) 	 */
2339d90fd774SSteven Rostedt (Red Hat) 	if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2340d90fd774SSteven Rostedt (Red Hat) 		length += RB_ALIGNMENT;
2341d90fd774SSteven Rostedt (Red Hat) 
2342d90fd774SSteven Rostedt (Red Hat) 	return length;
2343d90fd774SSteven Rostedt (Red Hat) }
2344d90fd774SSteven Rostedt (Red Hat) 
23459826b273SSteven Rostedt (Red Hat) #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
23469826b273SSteven Rostedt (Red Hat) static inline bool sched_clock_stable(void)
23479826b273SSteven Rostedt (Red Hat) {
23489826b273SSteven Rostedt (Red Hat) 	return true;
23499826b273SSteven Rostedt (Red Hat) }
23509826b273SSteven Rostedt (Red Hat) #endif
23519826b273SSteven Rostedt (Red Hat) 
2352a4543a2fSSteven Rostedt (Red Hat) static inline int
2353a4543a2fSSteven Rostedt (Red Hat) rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2354d90fd774SSteven Rostedt (Red Hat) 		  struct ring_buffer_event *event)
2355d90fd774SSteven Rostedt (Red Hat) {
2356d90fd774SSteven Rostedt (Red Hat) 	unsigned long new_index, old_index;
2357d90fd774SSteven Rostedt (Red Hat) 	struct buffer_page *bpage;
2358d90fd774SSteven Rostedt (Red Hat) 	unsigned long index;
2359d90fd774SSteven Rostedt (Red Hat) 	unsigned long addr;
2360d90fd774SSteven Rostedt (Red Hat) 
2361d90fd774SSteven Rostedt (Red Hat) 	new_index = rb_event_index(event);
2362d90fd774SSteven Rostedt (Red Hat) 	old_index = new_index + rb_event_ts_length(event);
2363d90fd774SSteven Rostedt (Red Hat) 	addr = (unsigned long)event;
2364d90fd774SSteven Rostedt (Red Hat) 	addr &= PAGE_MASK;
2365d90fd774SSteven Rostedt (Red Hat) 
23668573636eSSteven Rostedt (Red Hat) 	bpage = READ_ONCE(cpu_buffer->tail_page);
2367d90fd774SSteven Rostedt (Red Hat) 
2368d90fd774SSteven Rostedt (Red Hat) 	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2369d90fd774SSteven Rostedt (Red Hat) 		unsigned long write_mask =
2370d90fd774SSteven Rostedt (Red Hat) 			local_read(&bpage->write) & ~RB_WRITE_MASK;
2371d90fd774SSteven Rostedt (Red Hat) 		unsigned long event_length = rb_event_length(event);
2372d90fd774SSteven Rostedt (Red Hat) 		/*
2373d90fd774SSteven Rostedt (Red Hat) 		 * This is on the tail page. It is possible that
2374d90fd774SSteven Rostedt (Red Hat) 		 * a write could come in and move the tail page
2375d90fd774SSteven Rostedt (Red Hat) 		 * and write to the next page. That is fine
2376d90fd774SSteven Rostedt (Red Hat) 		 * because we just shorten what is on this page.
2377d90fd774SSteven Rostedt (Red Hat) 		 */
2378d90fd774SSteven Rostedt (Red Hat) 		old_index += write_mask;
2379d90fd774SSteven Rostedt (Red Hat) 		new_index += write_mask;
2380d90fd774SSteven Rostedt (Red Hat) 		index = local_cmpxchg(&bpage->write, old_index, new_index);
2381d90fd774SSteven Rostedt (Red Hat) 		if (index == old_index) {
2382d90fd774SSteven Rostedt (Red Hat) 			/* update counters */
2383d90fd774SSteven Rostedt (Red Hat) 			local_sub(event_length, &cpu_buffer->entries_bytes);
2384d90fd774SSteven Rostedt (Red Hat) 			return 1;
2385d90fd774SSteven Rostedt (Red Hat) 		}
2386d90fd774SSteven Rostedt (Red Hat) 	}
2387d90fd774SSteven Rostedt (Red Hat) 
2388d90fd774SSteven Rostedt (Red Hat) 	/* could not discard */
2389d90fd774SSteven Rostedt (Red Hat) 	return 0;
2390d90fd774SSteven Rostedt (Red Hat) }
2391d90fd774SSteven Rostedt (Red Hat) 
2392d90fd774SSteven Rostedt (Red Hat) static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2393d90fd774SSteven Rostedt (Red Hat) {
2394d90fd774SSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->committing);
2395d90fd774SSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->commits);
2396d90fd774SSteven Rostedt (Red Hat) }
2397d90fd774SSteven Rostedt (Red Hat) 
239838e11df1SSteven Rostedt (Red Hat) static __always_inline void
2399d90fd774SSteven Rostedt (Red Hat) rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2400d90fd774SSteven Rostedt (Red Hat) {
2401d90fd774SSteven Rostedt (Red Hat) 	unsigned long max_count;
2402d90fd774SSteven Rostedt (Red Hat) 
2403d90fd774SSteven Rostedt (Red Hat) 	/*
2404d90fd774SSteven Rostedt (Red Hat) 	 * We only race with interrupts and NMIs on this CPU.
2405d90fd774SSteven Rostedt (Red Hat) 	 * If we own the commit event, then we can commit
2406d90fd774SSteven Rostedt (Red Hat) 	 * all others that interrupted us, since the interruptions
2407d90fd774SSteven Rostedt (Red Hat) 	 * are in stack format (they finish before they come
2408d90fd774SSteven Rostedt (Red Hat) 	 * back to us). This allows us to do a simple loop to
2409d90fd774SSteven Rostedt (Red Hat) 	 * assign the commit to the tail.
2410d90fd774SSteven Rostedt (Red Hat) 	 */
2411d90fd774SSteven Rostedt (Red Hat)  again:
2412d90fd774SSteven Rostedt (Red Hat) 	max_count = cpu_buffer->nr_pages * 100;
2413d90fd774SSteven Rostedt (Red Hat) 
24148573636eSSteven Rostedt (Red Hat) 	while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2415d90fd774SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2416d90fd774SSteven Rostedt (Red Hat) 			return;
2417d90fd774SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(cpu_buffer,
2418d90fd774SSteven Rostedt (Red Hat) 			       rb_is_reader_page(cpu_buffer->tail_page)))
2419d90fd774SSteven Rostedt (Red Hat) 			return;
2420d90fd774SSteven Rostedt (Red Hat) 		local_set(&cpu_buffer->commit_page->page->commit,
2421d90fd774SSteven Rostedt (Red Hat) 			  rb_page_write(cpu_buffer->commit_page));
2422d90fd774SSteven Rostedt (Red Hat) 		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
242370004986SSteven Rostedt (Red Hat) 		/* Only update the write stamp if the page has an event */
242470004986SSteven Rostedt (Red Hat) 		if (rb_page_write(cpu_buffer->commit_page))
2425d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp =
2426d90fd774SSteven Rostedt (Red Hat) 				cpu_buffer->commit_page->page->time_stamp;
2427d90fd774SSteven Rostedt (Red Hat) 		/* add barrier to keep gcc from optimizing too much */
2428d90fd774SSteven Rostedt (Red Hat) 		barrier();
2429d90fd774SSteven Rostedt (Red Hat) 	}
2430d90fd774SSteven Rostedt (Red Hat) 	while (rb_commit_index(cpu_buffer) !=
2431d90fd774SSteven Rostedt (Red Hat) 	       rb_page_write(cpu_buffer->commit_page)) {
2432d90fd774SSteven Rostedt (Red Hat) 
2433d90fd774SSteven Rostedt (Red Hat) 		local_set(&cpu_buffer->commit_page->page->commit,
2434d90fd774SSteven Rostedt (Red Hat) 			  rb_page_write(cpu_buffer->commit_page));
2435d90fd774SSteven Rostedt (Red Hat) 		RB_WARN_ON(cpu_buffer,
2436d90fd774SSteven Rostedt (Red Hat) 			   local_read(&cpu_buffer->commit_page->page->commit) &
2437d90fd774SSteven Rostedt (Red Hat) 			   ~RB_WRITE_MASK);
2438d90fd774SSteven Rostedt (Red Hat) 		barrier();
2439d90fd774SSteven Rostedt (Red Hat) 	}
2440d90fd774SSteven Rostedt (Red Hat) 
2441d90fd774SSteven Rostedt (Red Hat) 	/* again, keep gcc from optimizing */
2442d90fd774SSteven Rostedt (Red Hat) 	barrier();
2443d90fd774SSteven Rostedt (Red Hat) 
2444d90fd774SSteven Rostedt (Red Hat) 	/*
2445d90fd774SSteven Rostedt (Red Hat) 	 * If an interrupt came in just after the first while loop
2446d90fd774SSteven Rostedt (Red Hat) 	 * and pushed the tail page forward, we will be left with
2447d90fd774SSteven Rostedt (Red Hat) 	 * a dangling commit that will never go forward.
2448d90fd774SSteven Rostedt (Red Hat) 	 */
24498573636eSSteven Rostedt (Red Hat) 	if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
2450d90fd774SSteven Rostedt (Red Hat) 		goto again;
2451d90fd774SSteven Rostedt (Red Hat) }
2452d90fd774SSteven Rostedt (Red Hat) 
245338e11df1SSteven Rostedt (Red Hat) static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2454d90fd774SSteven Rostedt (Red Hat) {
2455d90fd774SSteven Rostedt (Red Hat) 	unsigned long commits;
2456d90fd774SSteven Rostedt (Red Hat) 
2457d90fd774SSteven Rostedt (Red Hat) 	if (RB_WARN_ON(cpu_buffer,
2458d90fd774SSteven Rostedt (Red Hat) 		       !local_read(&cpu_buffer->committing)))
2459d90fd774SSteven Rostedt (Red Hat) 		return;
2460d90fd774SSteven Rostedt (Red Hat) 
2461d90fd774SSteven Rostedt (Red Hat)  again:
2462d90fd774SSteven Rostedt (Red Hat) 	commits = local_read(&cpu_buffer->commits);
2463d90fd774SSteven Rostedt (Red Hat) 	/* synchronize with interrupts */
2464d90fd774SSteven Rostedt (Red Hat) 	barrier();
2465d90fd774SSteven Rostedt (Red Hat) 	if (local_read(&cpu_buffer->committing) == 1)
2466d90fd774SSteven Rostedt (Red Hat) 		rb_set_commit_to_write(cpu_buffer);
2467d90fd774SSteven Rostedt (Red Hat) 
2468d90fd774SSteven Rostedt (Red Hat) 	local_dec(&cpu_buffer->committing);
2469d90fd774SSteven Rostedt (Red Hat) 
2470d90fd774SSteven Rostedt (Red Hat) 	/* synchronize with interrupts */
2471d90fd774SSteven Rostedt (Red Hat) 	barrier();
2472d90fd774SSteven Rostedt (Red Hat) 
2473d90fd774SSteven Rostedt (Red Hat) 	/*
2474d90fd774SSteven Rostedt (Red Hat) 	 * Need to account for interrupts coming in between the
2475d90fd774SSteven Rostedt (Red Hat) 	 * updating of the commit page and the clearing of the
2476d90fd774SSteven Rostedt (Red Hat) 	 * committing counter.
2477d90fd774SSteven Rostedt (Red Hat) 	 */
2478d90fd774SSteven Rostedt (Red Hat) 	if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2479d90fd774SSteven Rostedt (Red Hat) 	    !local_read(&cpu_buffer->committing)) {
2480d90fd774SSteven Rostedt (Red Hat) 		local_inc(&cpu_buffer->committing);
2481d90fd774SSteven Rostedt (Red Hat) 		goto again;
2482d90fd774SSteven Rostedt (Red Hat) 	}
2483d90fd774SSteven Rostedt (Red Hat) }
2484d90fd774SSteven Rostedt (Red Hat) 
2485d90fd774SSteven Rostedt (Red Hat) static inline void rb_event_discard(struct ring_buffer_event *event)
2486d90fd774SSteven Rostedt (Red Hat) {
2487*dc4e2801STom Zanussi 	if (extended_time(event))
2488d90fd774SSteven Rostedt (Red Hat) 		event = skip_time_extend(event);
2489d90fd774SSteven Rostedt (Red Hat) 
2490d90fd774SSteven Rostedt (Red Hat) 	/* array[0] holds the actual length for the discarded event */
2491d90fd774SSteven Rostedt (Red Hat) 	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2492d90fd774SSteven Rostedt (Red Hat) 	event->type_len = RINGBUF_TYPE_PADDING;
2493d90fd774SSteven Rostedt (Red Hat) 	/* time delta must be non zero */
2494d90fd774SSteven Rostedt (Red Hat) 	if (!event->time_delta)
2495d90fd774SSteven Rostedt (Red Hat) 		event->time_delta = 1;
2496d90fd774SSteven Rostedt (Red Hat) }
2497d90fd774SSteven Rostedt (Red Hat) 
2498babe3fceSSteven Rostedt (Red Hat) static __always_inline bool
2499d90fd774SSteven Rostedt (Red Hat) rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2500d90fd774SSteven Rostedt (Red Hat) 		   struct ring_buffer_event *event)
2501d90fd774SSteven Rostedt (Red Hat) {
2502d90fd774SSteven Rostedt (Red Hat) 	unsigned long addr = (unsigned long)event;
2503d90fd774SSteven Rostedt (Red Hat) 	unsigned long index;
2504d90fd774SSteven Rostedt (Red Hat) 
2505d90fd774SSteven Rostedt (Red Hat) 	index = rb_event_index(event);
2506d90fd774SSteven Rostedt (Red Hat) 	addr &= PAGE_MASK;
2507d90fd774SSteven Rostedt (Red Hat) 
2508d90fd774SSteven Rostedt (Red Hat) 	return cpu_buffer->commit_page->page == (void *)addr &&
2509d90fd774SSteven Rostedt (Red Hat) 		rb_commit_index(cpu_buffer) == index;
2510d90fd774SSteven Rostedt (Red Hat) }
2511d90fd774SSteven Rostedt (Red Hat) 
2512babe3fceSSteven Rostedt (Red Hat) static __always_inline void
2513a4543a2fSSteven Rostedt (Red Hat) rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2514d90fd774SSteven Rostedt (Red Hat) 		      struct ring_buffer_event *event)
2515d90fd774SSteven Rostedt (Red Hat) {
2516d90fd774SSteven Rostedt (Red Hat) 	u64 delta;
2517d90fd774SSteven Rostedt (Red Hat) 
2518d90fd774SSteven Rostedt (Red Hat) 	/*
2519d90fd774SSteven Rostedt (Red Hat) 	 * The event first in the commit queue updates the
2520d90fd774SSteven Rostedt (Red Hat) 	 * time stamp.
2521d90fd774SSteven Rostedt (Red Hat) 	 */
2522d90fd774SSteven Rostedt (Red Hat) 	if (rb_event_is_commit(cpu_buffer, event)) {
2523d90fd774SSteven Rostedt (Red Hat) 		/*
2524d90fd774SSteven Rostedt (Red Hat) 		 * A commit event that is first on a page
2525d90fd774SSteven Rostedt (Red Hat) 		 * updates the write timestamp with the page stamp
2526d90fd774SSteven Rostedt (Red Hat) 		 */
2527d90fd774SSteven Rostedt (Red Hat) 		if (!rb_event_index(event))
2528d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp =
2529d90fd774SSteven Rostedt (Red Hat) 				cpu_buffer->commit_page->page->time_stamp;
2530d90fd774SSteven Rostedt (Red Hat) 		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2531*dc4e2801STom Zanussi 			delta = ring_buffer_event_time_stamp(event);
2532d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp += delta;
2533*dc4e2801STom Zanussi 		} else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
2534*dc4e2801STom Zanussi 			delta = ring_buffer_event_time_stamp(event);
2535*dc4e2801STom Zanussi 			cpu_buffer->write_stamp = delta;
2536d90fd774SSteven Rostedt (Red Hat) 		} else
2537d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp += event->time_delta;
2538d90fd774SSteven Rostedt (Red Hat) 	}
2539d90fd774SSteven Rostedt (Red Hat) }
2540d90fd774SSteven Rostedt (Red Hat) 
2541d90fd774SSteven Rostedt (Red Hat) static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2542d90fd774SSteven Rostedt (Red Hat) 		      struct ring_buffer_event *event)
2543d90fd774SSteven Rostedt (Red Hat) {
2544d90fd774SSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->entries);
2545d90fd774SSteven Rostedt (Red Hat) 	rb_update_write_stamp(cpu_buffer, event);
2546d90fd774SSteven Rostedt (Red Hat) 	rb_end_commit(cpu_buffer);
2547d90fd774SSteven Rostedt (Red Hat) }
2548d90fd774SSteven Rostedt (Red Hat) 
2549d90fd774SSteven Rostedt (Red Hat) static __always_inline void
2550d90fd774SSteven Rostedt (Red Hat) rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2551d90fd774SSteven Rostedt (Red Hat) {
2552d90fd774SSteven Rostedt (Red Hat) 	bool pagebusy;
2553d90fd774SSteven Rostedt (Red Hat) 
2554d90fd774SSteven Rostedt (Red Hat) 	if (buffer->irq_work.waiters_pending) {
2555d90fd774SSteven Rostedt (Red Hat) 		buffer->irq_work.waiters_pending = false;
2556d90fd774SSteven Rostedt (Red Hat) 		/* irq_work_queue() supplies it's own memory barriers */
2557d90fd774SSteven Rostedt (Red Hat) 		irq_work_queue(&buffer->irq_work.work);
2558d90fd774SSteven Rostedt (Red Hat) 	}
2559d90fd774SSteven Rostedt (Red Hat) 
2560d90fd774SSteven Rostedt (Red Hat) 	if (cpu_buffer->irq_work.waiters_pending) {
2561d90fd774SSteven Rostedt (Red Hat) 		cpu_buffer->irq_work.waiters_pending = false;
2562d90fd774SSteven Rostedt (Red Hat) 		/* irq_work_queue() supplies it's own memory barriers */
2563d90fd774SSteven Rostedt (Red Hat) 		irq_work_queue(&cpu_buffer->irq_work.work);
2564d90fd774SSteven Rostedt (Red Hat) 	}
2565d90fd774SSteven Rostedt (Red Hat) 
2566d90fd774SSteven Rostedt (Red Hat) 	pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2567d90fd774SSteven Rostedt (Red Hat) 
2568d90fd774SSteven Rostedt (Red Hat) 	if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2569d90fd774SSteven Rostedt (Red Hat) 		cpu_buffer->irq_work.wakeup_full = true;
2570d90fd774SSteven Rostedt (Red Hat) 		cpu_buffer->irq_work.full_waiters_pending = false;
2571d90fd774SSteven Rostedt (Red Hat) 		/* irq_work_queue() supplies it's own memory barriers */
2572d90fd774SSteven Rostedt (Red Hat) 		irq_work_queue(&cpu_buffer->irq_work.work);
2573d90fd774SSteven Rostedt (Red Hat) 	}
2574d90fd774SSteven Rostedt (Red Hat) }
2575d90fd774SSteven Rostedt (Red Hat) 
2576d90fd774SSteven Rostedt (Red Hat) /*
2577d90fd774SSteven Rostedt (Red Hat)  * The lock and unlock are done within a preempt disable section.
2578d90fd774SSteven Rostedt (Red Hat)  * The current_context per_cpu variable can only be modified
2579d90fd774SSteven Rostedt (Red Hat)  * by the current task between lock and unlock. But it can
2580a0e3a18fSSteven Rostedt (VMware)  * be modified more than once via an interrupt. To pass this
2581a0e3a18fSSteven Rostedt (VMware)  * information from the lock to the unlock without having to
2582a0e3a18fSSteven Rostedt (VMware)  * access the 'in_interrupt()' functions again (which do show
2583a0e3a18fSSteven Rostedt (VMware)  * a bit of overhead in something as critical as function tracing,
2584a0e3a18fSSteven Rostedt (VMware)  * we use a bitmask trick.
2585d90fd774SSteven Rostedt (Red Hat)  *
2586a0e3a18fSSteven Rostedt (VMware)  *  bit 0 =  NMI context
2587a0e3a18fSSteven Rostedt (VMware)  *  bit 1 =  IRQ context
2588a0e3a18fSSteven Rostedt (VMware)  *  bit 2 =  SoftIRQ context
2589a0e3a18fSSteven Rostedt (VMware)  *  bit 3 =  normal context.
2590d90fd774SSteven Rostedt (Red Hat)  *
2591a0e3a18fSSteven Rostedt (VMware)  * This works because this is the order of contexts that can
2592a0e3a18fSSteven Rostedt (VMware)  * preempt other contexts. A SoftIRQ never preempts an IRQ
2593a0e3a18fSSteven Rostedt (VMware)  * context.
2594a0e3a18fSSteven Rostedt (VMware)  *
2595a0e3a18fSSteven Rostedt (VMware)  * When the context is determined, the corresponding bit is
2596a0e3a18fSSteven Rostedt (VMware)  * checked and set (if it was set, then a recursion of that context
2597a0e3a18fSSteven Rostedt (VMware)  * happened).
2598a0e3a18fSSteven Rostedt (VMware)  *
2599a0e3a18fSSteven Rostedt (VMware)  * On unlock, we need to clear this bit. To do so, just subtract
2600a0e3a18fSSteven Rostedt (VMware)  * 1 from the current_context and AND it to itself.
2601a0e3a18fSSteven Rostedt (VMware)  *
2602a0e3a18fSSteven Rostedt (VMware)  * (binary)
2603a0e3a18fSSteven Rostedt (VMware)  *  101 - 1 = 100
2604a0e3a18fSSteven Rostedt (VMware)  *  101 & 100 = 100 (clearing bit zero)
2605a0e3a18fSSteven Rostedt (VMware)  *
2606a0e3a18fSSteven Rostedt (VMware)  *  1010 - 1 = 1001
2607a0e3a18fSSteven Rostedt (VMware)  *  1010 & 1001 = 1000 (clearing bit 1)
2608a0e3a18fSSteven Rostedt (VMware)  *
2609a0e3a18fSSteven Rostedt (VMware)  * The least significant bit can be cleared this way, and it
2610a0e3a18fSSteven Rostedt (VMware)  * just so happens that it is the same bit corresponding to
2611a0e3a18fSSteven Rostedt (VMware)  * the current context.
2612d90fd774SSteven Rostedt (Red Hat)  */
2613d90fd774SSteven Rostedt (Red Hat) 
2614d90fd774SSteven Rostedt (Red Hat) static __always_inline int
2615d90fd774SSteven Rostedt (Red Hat) trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2616d90fd774SSteven Rostedt (Red Hat) {
2617a0e3a18fSSteven Rostedt (VMware) 	unsigned int val = cpu_buffer->current_context;
2618a0e3a18fSSteven Rostedt (VMware) 	unsigned long pc = preempt_count();
2619a0e3a18fSSteven Rostedt (VMware) 	int bit;
2620a0e3a18fSSteven Rostedt (VMware) 
2621a0e3a18fSSteven Rostedt (VMware) 	if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
2622a0e3a18fSSteven Rostedt (VMware) 		bit = RB_CTX_NORMAL;
2623a0e3a18fSSteven Rostedt (VMware) 	else
2624a0e3a18fSSteven Rostedt (VMware) 		bit = pc & NMI_MASK ? RB_CTX_NMI :
26250164e0d7SSteven Rostedt (VMware) 			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
2626a0e3a18fSSteven Rostedt (VMware) 
2627a0e3a18fSSteven Rostedt (VMware) 	if (unlikely(val & (1 << bit)))
2628d90fd774SSteven Rostedt (Red Hat) 		return 1;
2629d90fd774SSteven Rostedt (Red Hat) 
2630a0e3a18fSSteven Rostedt (VMware) 	val |= (1 << bit);
2631a0e3a18fSSteven Rostedt (VMware) 	cpu_buffer->current_context = val;
2632d90fd774SSteven Rostedt (Red Hat) 
2633d90fd774SSteven Rostedt (Red Hat) 	return 0;
2634d90fd774SSteven Rostedt (Red Hat) }
2635d90fd774SSteven Rostedt (Red Hat) 
2636d90fd774SSteven Rostedt (Red Hat) static __always_inline void
2637d90fd774SSteven Rostedt (Red Hat) trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2638d90fd774SSteven Rostedt (Red Hat) {
2639a0e3a18fSSteven Rostedt (VMware) 	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
2640d90fd774SSteven Rostedt (Red Hat) }
2641d90fd774SSteven Rostedt (Red Hat) 
2642d90fd774SSteven Rostedt (Red Hat) /**
2643d90fd774SSteven Rostedt (Red Hat)  * ring_buffer_unlock_commit - commit a reserved
2644d90fd774SSteven Rostedt (Red Hat)  * @buffer: The buffer to commit to
2645d90fd774SSteven Rostedt (Red Hat)  * @event: The event pointer to commit.
2646d90fd774SSteven Rostedt (Red Hat)  *
2647d90fd774SSteven Rostedt (Red Hat)  * This commits the data to the ring buffer, and releases any locks held.
2648d90fd774SSteven Rostedt (Red Hat)  *
2649d90fd774SSteven Rostedt (Red Hat)  * Must be paired with ring_buffer_lock_reserve.
2650d90fd774SSteven Rostedt (Red Hat)  */
2651d90fd774SSteven Rostedt (Red Hat) int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2652d90fd774SSteven Rostedt (Red Hat) 			      struct ring_buffer_event *event)
2653d90fd774SSteven Rostedt (Red Hat) {
2654d90fd774SSteven Rostedt (Red Hat) 	struct ring_buffer_per_cpu *cpu_buffer;
2655d90fd774SSteven Rostedt (Red Hat) 	int cpu = raw_smp_processor_id();
2656d90fd774SSteven Rostedt (Red Hat) 
2657d90fd774SSteven Rostedt (Red Hat) 	cpu_buffer = buffer->buffers[cpu];
2658d90fd774SSteven Rostedt (Red Hat) 
2659d90fd774SSteven Rostedt (Red Hat) 	rb_commit(cpu_buffer, event);
2660d90fd774SSteven Rostedt (Red Hat) 
2661d90fd774SSteven Rostedt (Red Hat) 	rb_wakeups(buffer, cpu_buffer);
2662d90fd774SSteven Rostedt (Red Hat) 
2663d90fd774SSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
2664d90fd774SSteven Rostedt (Red Hat) 
2665d90fd774SSteven Rostedt (Red Hat) 	preempt_enable_notrace();
2666d90fd774SSteven Rostedt (Red Hat) 
2667d90fd774SSteven Rostedt (Red Hat) 	return 0;
2668d90fd774SSteven Rostedt (Red Hat) }
2669d90fd774SSteven Rostedt (Red Hat) EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2670a4543a2fSSteven Rostedt (Red Hat) 
26719826b273SSteven Rostedt (Red Hat) static noinline void
26729826b273SSteven Rostedt (Red Hat) rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
26739826b273SSteven Rostedt (Red Hat) 		    struct rb_event_info *info)
26749826b273SSteven Rostedt (Red Hat) {
26759826b273SSteven Rostedt (Red Hat) 	WARN_ONCE(info->delta > (1ULL << 59),
26769826b273SSteven Rostedt (Red Hat) 		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
26779826b273SSteven Rostedt (Red Hat) 		  (unsigned long long)info->delta,
26789826b273SSteven Rostedt (Red Hat) 		  (unsigned long long)info->ts,
26799826b273SSteven Rostedt (Red Hat) 		  (unsigned long long)cpu_buffer->write_stamp,
26809826b273SSteven Rostedt (Red Hat) 		  sched_clock_stable() ? "" :
26819826b273SSteven Rostedt (Red Hat) 		  "If you just came from a suspend/resume,\n"
26829826b273SSteven Rostedt (Red Hat) 		  "please switch to the trace global clock:\n"
26839826b273SSteven Rostedt (Red Hat) 		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
26849826b273SSteven Rostedt (Red Hat) 	info->add_timestamp = 1;
26859826b273SSteven Rostedt (Red Hat) }
26869826b273SSteven Rostedt (Red Hat) 
26876634ff26SSteven Rostedt static struct ring_buffer_event *
26886634ff26SSteven Rostedt __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2689fcc742eaSSteven Rostedt (Red Hat) 		  struct rb_event_info *info)
26906634ff26SSteven Rostedt {
26916634ff26SSteven Rostedt 	struct ring_buffer_event *event;
2692fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page *tail_page;
26936634ff26SSteven Rostedt 	unsigned long tail, write;
2694b7dc42fdSSteven Rostedt (Red Hat) 
2695b7dc42fdSSteven Rostedt (Red Hat) 	/*
2696b7dc42fdSSteven Rostedt (Red Hat) 	 * If the time delta since the last event is too big to
2697b7dc42fdSSteven Rostedt (Red Hat) 	 * hold in the time field of the event, then we append a
2698b7dc42fdSSteven Rostedt (Red Hat) 	 * TIME EXTEND event ahead of the data event.
2699b7dc42fdSSteven Rostedt (Red Hat) 	 */
2700b7dc42fdSSteven Rostedt (Red Hat) 	if (unlikely(info->add_timestamp))
2701b7dc42fdSSteven Rostedt (Red Hat) 		info->length += RB_LEN_TIME_EXTEND;
270269d1b839SSteven Rostedt 
27038573636eSSteven Rostedt (Red Hat) 	/* Don't let the compiler play games with cpu_buffer->tail_page */
27048573636eSSteven Rostedt (Red Hat) 	tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
2705fcc742eaSSteven Rostedt (Red Hat) 	write = local_add_return(info->length, &tail_page->write);
270677ae365eSSteven Rostedt 
270777ae365eSSteven Rostedt 	/* set write to only the index of the write */
270877ae365eSSteven Rostedt 	write &= RB_WRITE_MASK;
2709fcc742eaSSteven Rostedt (Red Hat) 	tail = write - info->length;
27106634ff26SSteven Rostedt 
2711b7dc42fdSSteven Rostedt (Red Hat) 	/*
2712b7dc42fdSSteven Rostedt (Red Hat) 	 * If this is the first commit on the page, then it has the same
2713b7dc42fdSSteven Rostedt (Red Hat) 	 * timestamp as the page itself.
2714b7dc42fdSSteven Rostedt (Red Hat) 	 */
2715*dc4e2801STom Zanussi 	if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
2716b7dc42fdSSteven Rostedt (Red Hat) 		info->delta = 0;
2717b7dc42fdSSteven Rostedt (Red Hat) 
27186634ff26SSteven Rostedt 	/* See if we shot pass the end of this buffer page */
2719747e94aeSSteven Rostedt 	if (unlikely(write > BUF_PAGE_SIZE))
2720fcc742eaSSteven Rostedt (Red Hat) 		return rb_move_tail(cpu_buffer, tail, info);
27216634ff26SSteven Rostedt 
27226634ff26SSteven Rostedt 	/* We reserved something on the buffer */
2723b7dc42fdSSteven Rostedt (Red Hat) 
27246634ff26SSteven Rostedt 	event = __rb_page_index(tail_page, tail);
2725fcc742eaSSteven Rostedt (Red Hat) 	rb_update_event(cpu_buffer, event, info);
27266634ff26SSteven Rostedt 
27276634ff26SSteven Rostedt 	local_inc(&tail_page->entries);
27286634ff26SSteven Rostedt 
2729b7dc42fdSSteven Rostedt (Red Hat) 	/*
2730b7dc42fdSSteven Rostedt (Red Hat) 	 * If this is the first commit on the page, then update
2731b7dc42fdSSteven Rostedt (Red Hat) 	 * its timestamp.
2732b7dc42fdSSteven Rostedt (Red Hat) 	 */
2733b7dc42fdSSteven Rostedt (Red Hat) 	if (!tail)
2734b7dc42fdSSteven Rostedt (Red Hat) 		tail_page->page->time_stamp = info->ts;
2735b7dc42fdSSteven Rostedt (Red Hat) 
2736c64e148aSVaibhav Nagarnaik 	/* account for these added bytes */
2737fcc742eaSSteven Rostedt (Red Hat) 	local_add(info->length, &cpu_buffer->entries_bytes);
2738c64e148aSVaibhav Nagarnaik 
27396634ff26SSteven Rostedt 	return event;
27406634ff26SSteven Rostedt }
27416634ff26SSteven Rostedt 
2742fa7ffb39SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event *
274362f0b3ebSSteven Rostedt rb_reserve_next_event(struct ring_buffer *buffer,
274462f0b3ebSSteven Rostedt 		      struct ring_buffer_per_cpu *cpu_buffer,
27451cd8d735SSteven Rostedt 		      unsigned long length)
27467a8e76a3SSteven Rostedt {
27477a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
2748fcc742eaSSteven Rostedt (Red Hat) 	struct rb_event_info info;
2749818e3dd3SSteven Rostedt 	int nr_loops = 0;
2750b7dc42fdSSteven Rostedt (Red Hat) 	u64 diff;
27517a8e76a3SSteven Rostedt 
2752fa743953SSteven Rostedt 	rb_start_commit(cpu_buffer);
2753fa743953SSteven Rostedt 
275485bac32cSSteven Rostedt #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
275562f0b3ebSSteven Rostedt 	/*
275662f0b3ebSSteven Rostedt 	 * Due to the ability to swap a cpu buffer from a buffer
275762f0b3ebSSteven Rostedt 	 * it is possible it was swapped before we committed.
275862f0b3ebSSteven Rostedt 	 * (committing stops a swap). We check for it here and
275962f0b3ebSSteven Rostedt 	 * if it happened, we have to fail the write.
276062f0b3ebSSteven Rostedt 	 */
276162f0b3ebSSteven Rostedt 	barrier();
27626aa7de05SMark Rutland 	if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
276362f0b3ebSSteven Rostedt 		local_dec(&cpu_buffer->committing);
276462f0b3ebSSteven Rostedt 		local_dec(&cpu_buffer->commits);
276562f0b3ebSSteven Rostedt 		return NULL;
276662f0b3ebSSteven Rostedt 	}
276785bac32cSSteven Rostedt #endif
2768b7dc42fdSSteven Rostedt (Red Hat) 
2769fcc742eaSSteven Rostedt (Red Hat) 	info.length = rb_calculate_event_length(length);
2770a4543a2fSSteven Rostedt (Red Hat)  again:
2771b7dc42fdSSteven Rostedt (Red Hat) 	info.add_timestamp = 0;
2772b7dc42fdSSteven Rostedt (Red Hat) 	info.delta = 0;
2773b7dc42fdSSteven Rostedt (Red Hat) 
2774818e3dd3SSteven Rostedt 	/*
2775818e3dd3SSteven Rostedt 	 * We allow for interrupts to reenter here and do a trace.
2776818e3dd3SSteven Rostedt 	 * If one does, it will cause this original code to loop
2777818e3dd3SSteven Rostedt 	 * back here. Even with heavy interrupts happening, this
2778818e3dd3SSteven Rostedt 	 * should only happen a few times in a row. If this happens
2779818e3dd3SSteven Rostedt 	 * 1000 times in a row, there must be either an interrupt
2780818e3dd3SSteven Rostedt 	 * storm or we have something buggy.
2781818e3dd3SSteven Rostedt 	 * Bail!
2782818e3dd3SSteven Rostedt 	 */
27833e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2784fa743953SSteven Rostedt 		goto out_fail;
2785818e3dd3SSteven Rostedt 
2786b7dc42fdSSteven Rostedt (Red Hat) 	info.ts = rb_time_stamp(cpu_buffer->buffer);
2787b7dc42fdSSteven Rostedt (Red Hat) 	diff = info.ts - cpu_buffer->write_stamp;
2788b7dc42fdSSteven Rostedt (Red Hat) 
2789b7dc42fdSSteven Rostedt (Red Hat) 	/* make sure this diff is calculated here */
2790b7dc42fdSSteven Rostedt (Red Hat) 	barrier();
2791b7dc42fdSSteven Rostedt (Red Hat) 
2792*dc4e2801STom Zanussi 	if (ring_buffer_time_stamp_abs(buffer)) {
2793*dc4e2801STom Zanussi 		info.delta = info.ts;
2794*dc4e2801STom Zanussi 		rb_handle_timestamp(cpu_buffer, &info);
2795*dc4e2801STom Zanussi 	} else /* Did the write stamp get updated already? */
2796b7dc42fdSSteven Rostedt (Red Hat) 		if (likely(info.ts >= cpu_buffer->write_stamp)) {
2797b7dc42fdSSteven Rostedt (Red Hat) 		info.delta = diff;
2798b7dc42fdSSteven Rostedt (Red Hat) 		if (unlikely(test_time_stamp(info.delta)))
2799b7dc42fdSSteven Rostedt (Red Hat) 			rb_handle_timestamp(cpu_buffer, &info);
2800b7dc42fdSSteven Rostedt (Red Hat) 	}
2801b7dc42fdSSteven Rostedt (Red Hat) 
2802fcc742eaSSteven Rostedt (Red Hat) 	event = __rb_reserve_next(cpu_buffer, &info);
2803fcc742eaSSteven Rostedt (Red Hat) 
2804bd1b7cd3SSteven Rostedt (Red Hat) 	if (unlikely(PTR_ERR(event) == -EAGAIN)) {
2805bd1b7cd3SSteven Rostedt (Red Hat) 		if (info.add_timestamp)
2806bd1b7cd3SSteven Rostedt (Red Hat) 			info.length -= RB_LEN_TIME_EXTEND;
2807bf41a158SSteven Rostedt 		goto again;
2808bd1b7cd3SSteven Rostedt (Red Hat) 	}
28097a8e76a3SSteven Rostedt 
2810fa743953SSteven Rostedt 	if (!event)
2811fa743953SSteven Rostedt 		goto out_fail;
2812bf41a158SSteven Rostedt 
28137a8e76a3SSteven Rostedt 	return event;
2814fa743953SSteven Rostedt 
2815fa743953SSteven Rostedt  out_fail:
2816fa743953SSteven Rostedt 	rb_end_commit(cpu_buffer);
2817fa743953SSteven Rostedt 	return NULL;
28187a8e76a3SSteven Rostedt }
28197a8e76a3SSteven Rostedt 
28207a8e76a3SSteven Rostedt /**
28217a8e76a3SSteven Rostedt  * ring_buffer_lock_reserve - reserve a part of the buffer
28227a8e76a3SSteven Rostedt  * @buffer: the ring buffer to reserve from
28237a8e76a3SSteven Rostedt  * @length: the length of the data to reserve (excluding event header)
28247a8e76a3SSteven Rostedt  *
28257a8e76a3SSteven Rostedt  * Returns a reseverd event on the ring buffer to copy directly to.
28267a8e76a3SSteven Rostedt  * The user of this interface will need to get the body to write into
28277a8e76a3SSteven Rostedt  * and can use the ring_buffer_event_data() interface.
28287a8e76a3SSteven Rostedt  *
28297a8e76a3SSteven Rostedt  * The length is the length of the data needed, not the event length
28307a8e76a3SSteven Rostedt  * which also includes the event header.
28317a8e76a3SSteven Rostedt  *
28327a8e76a3SSteven Rostedt  * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
28337a8e76a3SSteven Rostedt  * If NULL is returned, then nothing has been allocated or locked.
28347a8e76a3SSteven Rostedt  */
28357a8e76a3SSteven Rostedt struct ring_buffer_event *
28360a987751SArnaldo Carvalho de Melo ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
28377a8e76a3SSteven Rostedt {
28387a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
28397a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
28405168ae50SSteven Rostedt 	int cpu;
28417a8e76a3SSteven Rostedt 
2842bf41a158SSteven Rostedt 	/* If we are tracing schedule, we don't want to recurse */
28435168ae50SSteven Rostedt 	preempt_disable_notrace();
2844bf41a158SSteven Rostedt 
28453205f806SSteven Rostedt (Red Hat) 	if (unlikely(atomic_read(&buffer->record_disabled)))
284658a09ec6SSteven Rostedt (Red Hat) 		goto out;
2847261842b7SSteven Rostedt 
28487a8e76a3SSteven Rostedt 	cpu = raw_smp_processor_id();
28497a8e76a3SSteven Rostedt 
28503205f806SSteven Rostedt (Red Hat) 	if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
2851d769041fSSteven Rostedt 		goto out;
28527a8e76a3SSteven Rostedt 
28537a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
28547a8e76a3SSteven Rostedt 
28553205f806SSteven Rostedt (Red Hat) 	if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
2856d769041fSSteven Rostedt 		goto out;
28577a8e76a3SSteven Rostedt 
28583205f806SSteven Rostedt (Red Hat) 	if (unlikely(length > BUF_MAX_DATA_SIZE))
2859bf41a158SSteven Rostedt 		goto out;
28607a8e76a3SSteven Rostedt 
286158a09ec6SSteven Rostedt (Red Hat) 	if (unlikely(trace_recursive_lock(cpu_buffer)))
286258a09ec6SSteven Rostedt (Red Hat) 		goto out;
286358a09ec6SSteven Rostedt (Red Hat) 
286462f0b3ebSSteven Rostedt 	event = rb_reserve_next_event(buffer, cpu_buffer, length);
28657a8e76a3SSteven Rostedt 	if (!event)
286658a09ec6SSteven Rostedt (Red Hat) 		goto out_unlock;
28677a8e76a3SSteven Rostedt 
28687a8e76a3SSteven Rostedt 	return event;
28697a8e76a3SSteven Rostedt 
287058a09ec6SSteven Rostedt (Red Hat)  out_unlock:
287158a09ec6SSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
2872d769041fSSteven Rostedt  out:
28735168ae50SSteven Rostedt 	preempt_enable_notrace();
28747a8e76a3SSteven Rostedt 	return NULL;
28757a8e76a3SSteven Rostedt }
2876c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
28777a8e76a3SSteven Rostedt 
2878a1863c21SSteven Rostedt /*
2879a1863c21SSteven Rostedt  * Decrement the entries to the page that an event is on.
2880a1863c21SSteven Rostedt  * The event does not even need to exist, only the pointer
2881a1863c21SSteven Rostedt  * to the page it is on. This may only be called before the commit
2882a1863c21SSteven Rostedt  * takes place.
2883a1863c21SSteven Rostedt  */
2884a1863c21SSteven Rostedt static inline void
2885a1863c21SSteven Rostedt rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2886a1863c21SSteven Rostedt 		   struct ring_buffer_event *event)
2887a1863c21SSteven Rostedt {
2888a1863c21SSteven Rostedt 	unsigned long addr = (unsigned long)event;
2889a1863c21SSteven Rostedt 	struct buffer_page *bpage = cpu_buffer->commit_page;
2890a1863c21SSteven Rostedt 	struct buffer_page *start;
2891a1863c21SSteven Rostedt 
2892a1863c21SSteven Rostedt 	addr &= PAGE_MASK;
2893a1863c21SSteven Rostedt 
2894a1863c21SSteven Rostedt 	/* Do the likely case first */
2895a1863c21SSteven Rostedt 	if (likely(bpage->page == (void *)addr)) {
2896a1863c21SSteven Rostedt 		local_dec(&bpage->entries);
2897a1863c21SSteven Rostedt 		return;
2898a1863c21SSteven Rostedt 	}
2899a1863c21SSteven Rostedt 
2900a1863c21SSteven Rostedt 	/*
2901a1863c21SSteven Rostedt 	 * Because the commit page may be on the reader page we
2902a1863c21SSteven Rostedt 	 * start with the next page and check the end loop there.
2903a1863c21SSteven Rostedt 	 */
2904a1863c21SSteven Rostedt 	rb_inc_page(cpu_buffer, &bpage);
2905a1863c21SSteven Rostedt 	start = bpage;
2906a1863c21SSteven Rostedt 	do {
2907a1863c21SSteven Rostedt 		if (bpage->page == (void *)addr) {
2908a1863c21SSteven Rostedt 			local_dec(&bpage->entries);
2909a1863c21SSteven Rostedt 			return;
2910a1863c21SSteven Rostedt 		}
2911a1863c21SSteven Rostedt 		rb_inc_page(cpu_buffer, &bpage);
2912a1863c21SSteven Rostedt 	} while (bpage != start);
2913a1863c21SSteven Rostedt 
2914a1863c21SSteven Rostedt 	/* commit not part of this buffer?? */
2915a1863c21SSteven Rostedt 	RB_WARN_ON(cpu_buffer, 1);
2916a1863c21SSteven Rostedt }
2917a1863c21SSteven Rostedt 
29187a8e76a3SSteven Rostedt /**
2919fa1b47ddSSteven Rostedt  * ring_buffer_commit_discard - discard an event that has not been committed
2920fa1b47ddSSteven Rostedt  * @buffer: the ring buffer
2921fa1b47ddSSteven Rostedt  * @event: non committed event to discard
2922fa1b47ddSSteven Rostedt  *
2923dc892f73SSteven Rostedt  * Sometimes an event that is in the ring buffer needs to be ignored.
2924dc892f73SSteven Rostedt  * This function lets the user discard an event in the ring buffer
2925dc892f73SSteven Rostedt  * and then that event will not be read later.
2926dc892f73SSteven Rostedt  *
2927dc892f73SSteven Rostedt  * This function only works if it is called before the the item has been
2928dc892f73SSteven Rostedt  * committed. It will try to free the event from the ring buffer
2929fa1b47ddSSteven Rostedt  * if another event has not been added behind it.
2930fa1b47ddSSteven Rostedt  *
2931fa1b47ddSSteven Rostedt  * If another event has been added behind it, it will set the event
2932fa1b47ddSSteven Rostedt  * up as discarded, and perform the commit.
2933fa1b47ddSSteven Rostedt  *
2934fa1b47ddSSteven Rostedt  * If this function is called, do not call ring_buffer_unlock_commit on
2935fa1b47ddSSteven Rostedt  * the event.
2936fa1b47ddSSteven Rostedt  */
2937fa1b47ddSSteven Rostedt void ring_buffer_discard_commit(struct ring_buffer *buffer,
2938fa1b47ddSSteven Rostedt 				struct ring_buffer_event *event)
2939fa1b47ddSSteven Rostedt {
2940fa1b47ddSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
2941fa1b47ddSSteven Rostedt 	int cpu;
2942fa1b47ddSSteven Rostedt 
2943fa1b47ddSSteven Rostedt 	/* The event is discarded regardless */
2944f3b9aae1SFrederic Weisbecker 	rb_event_discard(event);
2945fa1b47ddSSteven Rostedt 
2946fa743953SSteven Rostedt 	cpu = smp_processor_id();
2947fa743953SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
2948fa743953SSteven Rostedt 
2949fa1b47ddSSteven Rostedt 	/*
2950fa1b47ddSSteven Rostedt 	 * This must only be called if the event has not been
2951fa1b47ddSSteven Rostedt 	 * committed yet. Thus we can assume that preemption
2952fa1b47ddSSteven Rostedt 	 * is still disabled.
2953fa1b47ddSSteven Rostedt 	 */
2954fa743953SSteven Rostedt 	RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
2955fa1b47ddSSteven Rostedt 
2956a1863c21SSteven Rostedt 	rb_decrement_entry(cpu_buffer, event);
29570f2541d2SSteven Rostedt 	if (rb_try_to_discard(cpu_buffer, event))
2958fa1b47ddSSteven Rostedt 		goto out;
2959fa1b47ddSSteven Rostedt 
2960fa1b47ddSSteven Rostedt 	/*
2961fa1b47ddSSteven Rostedt 	 * The commit is still visible by the reader, so we
2962a1863c21SSteven Rostedt 	 * must still update the timestamp.
2963fa1b47ddSSteven Rostedt 	 */
2964a1863c21SSteven Rostedt 	rb_update_write_stamp(cpu_buffer, event);
2965fa1b47ddSSteven Rostedt  out:
2966fa743953SSteven Rostedt 	rb_end_commit(cpu_buffer);
2967fa1b47ddSSteven Rostedt 
296858a09ec6SSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
2969f3b9aae1SFrederic Weisbecker 
29705168ae50SSteven Rostedt 	preempt_enable_notrace();
2971fa1b47ddSSteven Rostedt 
2972fa1b47ddSSteven Rostedt }
2973fa1b47ddSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2974fa1b47ddSSteven Rostedt 
2975fa1b47ddSSteven Rostedt /**
29767a8e76a3SSteven Rostedt  * ring_buffer_write - write data to the buffer without reserving
29777a8e76a3SSteven Rostedt  * @buffer: The ring buffer to write to.
29787a8e76a3SSteven Rostedt  * @length: The length of the data being written (excluding the event header)
29797a8e76a3SSteven Rostedt  * @data: The data to write to the buffer.
29807a8e76a3SSteven Rostedt  *
29817a8e76a3SSteven Rostedt  * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
29827a8e76a3SSteven Rostedt  * one function. If you already have the data to write to the buffer, it
29837a8e76a3SSteven Rostedt  * may be easier to simply call this function.
29847a8e76a3SSteven Rostedt  *
29857a8e76a3SSteven Rostedt  * Note, like ring_buffer_lock_reserve, the length is the length of the data
29867a8e76a3SSteven Rostedt  * and not the length of the event which would hold the header.
29877a8e76a3SSteven Rostedt  */
29887a8e76a3SSteven Rostedt int ring_buffer_write(struct ring_buffer *buffer,
29897a8e76a3SSteven Rostedt 		      unsigned long length,
29907a8e76a3SSteven Rostedt 		      void *data)
29917a8e76a3SSteven Rostedt {
29927a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
29937a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
29947a8e76a3SSteven Rostedt 	void *body;
29957a8e76a3SSteven Rostedt 	int ret = -EBUSY;
29965168ae50SSteven Rostedt 	int cpu;
29977a8e76a3SSteven Rostedt 
29985168ae50SSteven Rostedt 	preempt_disable_notrace();
2999bf41a158SSteven Rostedt 
300052fbe9cdSLai Jiangshan 	if (atomic_read(&buffer->record_disabled))
300152fbe9cdSLai Jiangshan 		goto out;
300252fbe9cdSLai Jiangshan 
30037a8e76a3SSteven Rostedt 	cpu = raw_smp_processor_id();
30047a8e76a3SSteven Rostedt 
30059e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3006d769041fSSteven Rostedt 		goto out;
30077a8e76a3SSteven Rostedt 
30087a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
30097a8e76a3SSteven Rostedt 
30107a8e76a3SSteven Rostedt 	if (atomic_read(&cpu_buffer->record_disabled))
30117a8e76a3SSteven Rostedt 		goto out;
30127a8e76a3SSteven Rostedt 
3013be957c44SSteven Rostedt 	if (length > BUF_MAX_DATA_SIZE)
3014be957c44SSteven Rostedt 		goto out;
3015be957c44SSteven Rostedt 
3016985e871bSSteven Rostedt (Red Hat) 	if (unlikely(trace_recursive_lock(cpu_buffer)))
3017985e871bSSteven Rostedt (Red Hat) 		goto out;
3018985e871bSSteven Rostedt (Red Hat) 
301962f0b3ebSSteven Rostedt 	event = rb_reserve_next_event(buffer, cpu_buffer, length);
30207a8e76a3SSteven Rostedt 	if (!event)
3021985e871bSSteven Rostedt (Red Hat) 		goto out_unlock;
30227a8e76a3SSteven Rostedt 
30237a8e76a3SSteven Rostedt 	body = rb_event_data(event);
30247a8e76a3SSteven Rostedt 
30257a8e76a3SSteven Rostedt 	memcpy(body, data, length);
30267a8e76a3SSteven Rostedt 
30277a8e76a3SSteven Rostedt 	rb_commit(cpu_buffer, event);
30287a8e76a3SSteven Rostedt 
302915693458SSteven Rostedt (Red Hat) 	rb_wakeups(buffer, cpu_buffer);
303015693458SSteven Rostedt (Red Hat) 
30317a8e76a3SSteven Rostedt 	ret = 0;
3032985e871bSSteven Rostedt (Red Hat) 
3033985e871bSSteven Rostedt (Red Hat)  out_unlock:
3034985e871bSSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
3035985e871bSSteven Rostedt (Red Hat) 
30367a8e76a3SSteven Rostedt  out:
30375168ae50SSteven Rostedt 	preempt_enable_notrace();
30387a8e76a3SSteven Rostedt 
30397a8e76a3SSteven Rostedt 	return ret;
30407a8e76a3SSteven Rostedt }
3041c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_write);
30427a8e76a3SSteven Rostedt 
3043da58834cSYaowei Bai static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3044bf41a158SSteven Rostedt {
3045bf41a158SSteven Rostedt 	struct buffer_page *reader = cpu_buffer->reader_page;
304677ae365eSSteven Rostedt 	struct buffer_page *head = rb_set_head_page(cpu_buffer);
3047bf41a158SSteven Rostedt 	struct buffer_page *commit = cpu_buffer->commit_page;
3048bf41a158SSteven Rostedt 
304977ae365eSSteven Rostedt 	/* In case of error, head will be NULL */
305077ae365eSSteven Rostedt 	if (unlikely(!head))
3051da58834cSYaowei Bai 		return true;
305277ae365eSSteven Rostedt 
3053bf41a158SSteven Rostedt 	return reader->read == rb_page_commit(reader) &&
3054bf41a158SSteven Rostedt 		(commit == reader ||
3055bf41a158SSteven Rostedt 		 (commit == head &&
3056bf41a158SSteven Rostedt 		  head->read == rb_page_commit(commit)));
3057bf41a158SSteven Rostedt }
3058bf41a158SSteven Rostedt 
30597a8e76a3SSteven Rostedt /**
30607a8e76a3SSteven Rostedt  * ring_buffer_record_disable - stop all writes into the buffer
30617a8e76a3SSteven Rostedt  * @buffer: The ring buffer to stop writes to.
30627a8e76a3SSteven Rostedt  *
30637a8e76a3SSteven Rostedt  * This prevents all writes to the buffer. Any attempt to write
30647a8e76a3SSteven Rostedt  * to the buffer after this will fail and return NULL.
30657a8e76a3SSteven Rostedt  *
30667a8e76a3SSteven Rostedt  * The caller should call synchronize_sched() after this.
30677a8e76a3SSteven Rostedt  */
30687a8e76a3SSteven Rostedt void ring_buffer_record_disable(struct ring_buffer *buffer)
30697a8e76a3SSteven Rostedt {
30707a8e76a3SSteven Rostedt 	atomic_inc(&buffer->record_disabled);
30717a8e76a3SSteven Rostedt }
3072c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
30737a8e76a3SSteven Rostedt 
30747a8e76a3SSteven Rostedt /**
30757a8e76a3SSteven Rostedt  * ring_buffer_record_enable - enable writes to the buffer
30767a8e76a3SSteven Rostedt  * @buffer: The ring buffer to enable writes
30777a8e76a3SSteven Rostedt  *
30787a8e76a3SSteven Rostedt  * Note, multiple disables will need the same number of enables
3079c41b20e7SAdam Buchbinder  * to truly enable the writing (much like preempt_disable).
30807a8e76a3SSteven Rostedt  */
30817a8e76a3SSteven Rostedt void ring_buffer_record_enable(struct ring_buffer *buffer)
30827a8e76a3SSteven Rostedt {
30837a8e76a3SSteven Rostedt 	atomic_dec(&buffer->record_disabled);
30847a8e76a3SSteven Rostedt }
3085c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
30867a8e76a3SSteven Rostedt 
30877a8e76a3SSteven Rostedt /**
3088499e5470SSteven Rostedt  * ring_buffer_record_off - stop all writes into the buffer
3089499e5470SSteven Rostedt  * @buffer: The ring buffer to stop writes to.
3090499e5470SSteven Rostedt  *
3091499e5470SSteven Rostedt  * This prevents all writes to the buffer. Any attempt to write
3092499e5470SSteven Rostedt  * to the buffer after this will fail and return NULL.
3093499e5470SSteven Rostedt  *
3094499e5470SSteven Rostedt  * This is different than ring_buffer_record_disable() as
309587abb3b1SWang Tianhong  * it works like an on/off switch, where as the disable() version
3096499e5470SSteven Rostedt  * must be paired with a enable().
3097499e5470SSteven Rostedt  */
3098499e5470SSteven Rostedt void ring_buffer_record_off(struct ring_buffer *buffer)
3099499e5470SSteven Rostedt {
3100499e5470SSteven Rostedt 	unsigned int rd;
3101499e5470SSteven Rostedt 	unsigned int new_rd;
3102499e5470SSteven Rostedt 
3103499e5470SSteven Rostedt 	do {
3104499e5470SSteven Rostedt 		rd = atomic_read(&buffer->record_disabled);
3105499e5470SSteven Rostedt 		new_rd = rd | RB_BUFFER_OFF;
3106499e5470SSteven Rostedt 	} while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3107499e5470SSteven Rostedt }
3108499e5470SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3109499e5470SSteven Rostedt 
3110499e5470SSteven Rostedt /**
3111499e5470SSteven Rostedt  * ring_buffer_record_on - restart writes into the buffer
3112499e5470SSteven Rostedt  * @buffer: The ring buffer to start writes to.
3113499e5470SSteven Rostedt  *
3114499e5470SSteven Rostedt  * This enables all writes to the buffer that was disabled by
3115499e5470SSteven Rostedt  * ring_buffer_record_off().
3116499e5470SSteven Rostedt  *
3117499e5470SSteven Rostedt  * This is different than ring_buffer_record_enable() as
311887abb3b1SWang Tianhong  * it works like an on/off switch, where as the enable() version
3119499e5470SSteven Rostedt  * must be paired with a disable().
3120499e5470SSteven Rostedt  */
3121499e5470SSteven Rostedt void ring_buffer_record_on(struct ring_buffer *buffer)
3122499e5470SSteven Rostedt {
3123499e5470SSteven Rostedt 	unsigned int rd;
3124499e5470SSteven Rostedt 	unsigned int new_rd;
3125499e5470SSteven Rostedt 
3126499e5470SSteven Rostedt 	do {
3127499e5470SSteven Rostedt 		rd = atomic_read(&buffer->record_disabled);
3128499e5470SSteven Rostedt 		new_rd = rd & ~RB_BUFFER_OFF;
3129499e5470SSteven Rostedt 	} while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3130499e5470SSteven Rostedt }
3131499e5470SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3132499e5470SSteven Rostedt 
3133499e5470SSteven Rostedt /**
3134499e5470SSteven Rostedt  * ring_buffer_record_is_on - return true if the ring buffer can write
3135499e5470SSteven Rostedt  * @buffer: The ring buffer to see if write is enabled
3136499e5470SSteven Rostedt  *
3137499e5470SSteven Rostedt  * Returns true if the ring buffer is in a state that it accepts writes.
3138499e5470SSteven Rostedt  */
3139499e5470SSteven Rostedt int ring_buffer_record_is_on(struct ring_buffer *buffer)
3140499e5470SSteven Rostedt {
3141499e5470SSteven Rostedt 	return !atomic_read(&buffer->record_disabled);
3142499e5470SSteven Rostedt }
3143499e5470SSteven Rostedt 
3144499e5470SSteven Rostedt /**
31457a8e76a3SSteven Rostedt  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
31467a8e76a3SSteven Rostedt  * @buffer: The ring buffer to stop writes to.
31477a8e76a3SSteven Rostedt  * @cpu: The CPU buffer to stop
31487a8e76a3SSteven Rostedt  *
31497a8e76a3SSteven Rostedt  * This prevents all writes to the buffer. Any attempt to write
31507a8e76a3SSteven Rostedt  * to the buffer after this will fail and return NULL.
31517a8e76a3SSteven Rostedt  *
31527a8e76a3SSteven Rostedt  * The caller should call synchronize_sched() after this.
31537a8e76a3SSteven Rostedt  */
31547a8e76a3SSteven Rostedt void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
31557a8e76a3SSteven Rostedt {
31567a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
31577a8e76a3SSteven Rostedt 
31589e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
31598aabee57SSteven Rostedt 		return;
31607a8e76a3SSteven Rostedt 
31617a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
31627a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer->record_disabled);
31637a8e76a3SSteven Rostedt }
3164c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
31657a8e76a3SSteven Rostedt 
31667a8e76a3SSteven Rostedt /**
31677a8e76a3SSteven Rostedt  * ring_buffer_record_enable_cpu - enable writes to the buffer
31687a8e76a3SSteven Rostedt  * @buffer: The ring buffer to enable writes
31697a8e76a3SSteven Rostedt  * @cpu: The CPU to enable.
31707a8e76a3SSteven Rostedt  *
31717a8e76a3SSteven Rostedt  * Note, multiple disables will need the same number of enables
3172c41b20e7SAdam Buchbinder  * to truly enable the writing (much like preempt_disable).
31737a8e76a3SSteven Rostedt  */
31747a8e76a3SSteven Rostedt void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
31757a8e76a3SSteven Rostedt {
31767a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
31777a8e76a3SSteven Rostedt 
31789e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
31798aabee57SSteven Rostedt 		return;
31807a8e76a3SSteven Rostedt 
31817a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
31827a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer->record_disabled);
31837a8e76a3SSteven Rostedt }
3184c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
31857a8e76a3SSteven Rostedt 
3186f6195aa0SSteven Rostedt /*
3187f6195aa0SSteven Rostedt  * The total entries in the ring buffer is the running counter
3188f6195aa0SSteven Rostedt  * of entries entered into the ring buffer, minus the sum of
3189f6195aa0SSteven Rostedt  * the entries read from the ring buffer and the number of
3190f6195aa0SSteven Rostedt  * entries that were overwritten.
3191f6195aa0SSteven Rostedt  */
3192f6195aa0SSteven Rostedt static inline unsigned long
3193f6195aa0SSteven Rostedt rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3194f6195aa0SSteven Rostedt {
3195f6195aa0SSteven Rostedt 	return local_read(&cpu_buffer->entries) -
3196f6195aa0SSteven Rostedt 		(local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3197f6195aa0SSteven Rostedt }
3198f6195aa0SSteven Rostedt 
31997a8e76a3SSteven Rostedt /**
3200c64e148aSVaibhav Nagarnaik  * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
3201c64e148aSVaibhav Nagarnaik  * @buffer: The ring buffer
3202c64e148aSVaibhav Nagarnaik  * @cpu: The per CPU buffer to read from.
3203c64e148aSVaibhav Nagarnaik  */
320450ecf2c3SYoshihiro YUNOMAE u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
3205c64e148aSVaibhav Nagarnaik {
3206c64e148aSVaibhav Nagarnaik 	unsigned long flags;
3207c64e148aSVaibhav Nagarnaik 	struct ring_buffer_per_cpu *cpu_buffer;
3208c64e148aSVaibhav Nagarnaik 	struct buffer_page *bpage;
3209da830e58SLinus Torvalds 	u64 ret = 0;
3210c64e148aSVaibhav Nagarnaik 
3211c64e148aSVaibhav Nagarnaik 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3212c64e148aSVaibhav Nagarnaik 		return 0;
3213c64e148aSVaibhav Nagarnaik 
3214c64e148aSVaibhav Nagarnaik 	cpu_buffer = buffer->buffers[cpu];
32157115e3fcSLinus Torvalds 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3216c64e148aSVaibhav Nagarnaik 	/*
3217c64e148aSVaibhav Nagarnaik 	 * if the tail is on reader_page, oldest time stamp is on the reader
3218c64e148aSVaibhav Nagarnaik 	 * page
3219c64e148aSVaibhav Nagarnaik 	 */
3220c64e148aSVaibhav Nagarnaik 	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3221c64e148aSVaibhav Nagarnaik 		bpage = cpu_buffer->reader_page;
3222c64e148aSVaibhav Nagarnaik 	else
3223c64e148aSVaibhav Nagarnaik 		bpage = rb_set_head_page(cpu_buffer);
322454f7be5bSSteven Rostedt 	if (bpage)
3225c64e148aSVaibhav Nagarnaik 		ret = bpage->page->time_stamp;
32267115e3fcSLinus Torvalds 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3227c64e148aSVaibhav Nagarnaik 
3228c64e148aSVaibhav Nagarnaik 	return ret;
3229c64e148aSVaibhav Nagarnaik }
3230c64e148aSVaibhav Nagarnaik EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3231c64e148aSVaibhav Nagarnaik 
3232c64e148aSVaibhav Nagarnaik /**
3233c64e148aSVaibhav Nagarnaik  * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
3234c64e148aSVaibhav Nagarnaik  * @buffer: The ring buffer
3235c64e148aSVaibhav Nagarnaik  * @cpu: The per CPU buffer to read from.
3236c64e148aSVaibhav Nagarnaik  */
3237c64e148aSVaibhav Nagarnaik unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
3238c64e148aSVaibhav Nagarnaik {
3239c64e148aSVaibhav Nagarnaik 	struct ring_buffer_per_cpu *cpu_buffer;
3240c64e148aSVaibhav Nagarnaik 	unsigned long ret;
3241c64e148aSVaibhav Nagarnaik 
3242c64e148aSVaibhav Nagarnaik 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3243c64e148aSVaibhav Nagarnaik 		return 0;
3244c64e148aSVaibhav Nagarnaik 
3245c64e148aSVaibhav Nagarnaik 	cpu_buffer = buffer->buffers[cpu];
3246c64e148aSVaibhav Nagarnaik 	ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3247c64e148aSVaibhav Nagarnaik 
3248c64e148aSVaibhav Nagarnaik 	return ret;
3249c64e148aSVaibhav Nagarnaik }
3250c64e148aSVaibhav Nagarnaik EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3251c64e148aSVaibhav Nagarnaik 
3252c64e148aSVaibhav Nagarnaik /**
32537a8e76a3SSteven Rostedt  * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
32547a8e76a3SSteven Rostedt  * @buffer: The ring buffer
32557a8e76a3SSteven Rostedt  * @cpu: The per CPU buffer to get the entries from.
32567a8e76a3SSteven Rostedt  */
32577a8e76a3SSteven Rostedt unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
32587a8e76a3SSteven Rostedt {
32597a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
32607a8e76a3SSteven Rostedt 
32619e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
32628aabee57SSteven Rostedt 		return 0;
32637a8e76a3SSteven Rostedt 
32647a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
3265554f786eSSteven Rostedt 
3266f6195aa0SSteven Rostedt 	return rb_num_of_entries(cpu_buffer);
32677a8e76a3SSteven Rostedt }
3268c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
32697a8e76a3SSteven Rostedt 
32707a8e76a3SSteven Rostedt /**
3271884bfe89SSlava Pestov  * ring_buffer_overrun_cpu - get the number of overruns caused by the ring
3272884bfe89SSlava Pestov  * buffer wrapping around (only if RB_FL_OVERWRITE is on).
32737a8e76a3SSteven Rostedt  * @buffer: The ring buffer
32747a8e76a3SSteven Rostedt  * @cpu: The per CPU buffer to get the number of overruns from
32757a8e76a3SSteven Rostedt  */
32767a8e76a3SSteven Rostedt unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
32777a8e76a3SSteven Rostedt {
32787a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
32798aabee57SSteven Rostedt 	unsigned long ret;
32807a8e76a3SSteven Rostedt 
32819e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
32828aabee57SSteven Rostedt 		return 0;
32837a8e76a3SSteven Rostedt 
32847a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
328577ae365eSSteven Rostedt 	ret = local_read(&cpu_buffer->overrun);
3286554f786eSSteven Rostedt 
3287554f786eSSteven Rostedt 	return ret;
32887a8e76a3SSteven Rostedt }
3289c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
32907a8e76a3SSteven Rostedt 
32917a8e76a3SSteven Rostedt /**
3292884bfe89SSlava Pestov  * ring_buffer_commit_overrun_cpu - get the number of overruns caused by
3293884bfe89SSlava Pestov  * commits failing due to the buffer wrapping around while there are uncommitted
3294884bfe89SSlava Pestov  * events, such as during an interrupt storm.
3295f0d2c681SSteven Rostedt  * @buffer: The ring buffer
3296f0d2c681SSteven Rostedt  * @cpu: The per CPU buffer to get the number of overruns from
3297f0d2c681SSteven Rostedt  */
3298f0d2c681SSteven Rostedt unsigned long
3299f0d2c681SSteven Rostedt ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3300f0d2c681SSteven Rostedt {
3301f0d2c681SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
3302f0d2c681SSteven Rostedt 	unsigned long ret;
3303f0d2c681SSteven Rostedt 
3304f0d2c681SSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3305f0d2c681SSteven Rostedt 		return 0;
3306f0d2c681SSteven Rostedt 
3307f0d2c681SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
330877ae365eSSteven Rostedt 	ret = local_read(&cpu_buffer->commit_overrun);
3309f0d2c681SSteven Rostedt 
3310f0d2c681SSteven Rostedt 	return ret;
3311f0d2c681SSteven Rostedt }
3312f0d2c681SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3313f0d2c681SSteven Rostedt 
3314f0d2c681SSteven Rostedt /**
3315884bfe89SSlava Pestov  * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
3316884bfe89SSlava Pestov  * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
3317884bfe89SSlava Pestov  * @buffer: The ring buffer
3318884bfe89SSlava Pestov  * @cpu: The per CPU buffer to get the number of overruns from
3319884bfe89SSlava Pestov  */
3320884bfe89SSlava Pestov unsigned long
3321884bfe89SSlava Pestov ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3322884bfe89SSlava Pestov {
3323884bfe89SSlava Pestov 	struct ring_buffer_per_cpu *cpu_buffer;
3324884bfe89SSlava Pestov 	unsigned long ret;
3325884bfe89SSlava Pestov 
3326884bfe89SSlava Pestov 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3327884bfe89SSlava Pestov 		return 0;
3328884bfe89SSlava Pestov 
3329884bfe89SSlava Pestov 	cpu_buffer = buffer->buffers[cpu];
3330884bfe89SSlava Pestov 	ret = local_read(&cpu_buffer->dropped_events);
3331884bfe89SSlava Pestov 
3332884bfe89SSlava Pestov 	return ret;
3333884bfe89SSlava Pestov }
3334884bfe89SSlava Pestov EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3335884bfe89SSlava Pestov 
3336884bfe89SSlava Pestov /**
3337ad964704SSteven Rostedt (Red Hat)  * ring_buffer_read_events_cpu - get the number of events successfully read
3338ad964704SSteven Rostedt (Red Hat)  * @buffer: The ring buffer
3339ad964704SSteven Rostedt (Red Hat)  * @cpu: The per CPU buffer to get the number of events read
3340ad964704SSteven Rostedt (Red Hat)  */
3341ad964704SSteven Rostedt (Red Hat) unsigned long
3342ad964704SSteven Rostedt (Red Hat) ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3343ad964704SSteven Rostedt (Red Hat) {
3344ad964704SSteven Rostedt (Red Hat) 	struct ring_buffer_per_cpu *cpu_buffer;
3345ad964704SSteven Rostedt (Red Hat) 
3346ad964704SSteven Rostedt (Red Hat) 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3347ad964704SSteven Rostedt (Red Hat) 		return 0;
3348ad964704SSteven Rostedt (Red Hat) 
3349ad964704SSteven Rostedt (Red Hat) 	cpu_buffer = buffer->buffers[cpu];
3350ad964704SSteven Rostedt (Red Hat) 	return cpu_buffer->read;
3351ad964704SSteven Rostedt (Red Hat) }
3352ad964704SSteven Rostedt (Red Hat) EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3353ad964704SSteven Rostedt (Red Hat) 
3354ad964704SSteven Rostedt (Red Hat) /**
33557a8e76a3SSteven Rostedt  * ring_buffer_entries - get the number of entries in a buffer
33567a8e76a3SSteven Rostedt  * @buffer: The ring buffer
33577a8e76a3SSteven Rostedt  *
33587a8e76a3SSteven Rostedt  * Returns the total number of entries in the ring buffer
33597a8e76a3SSteven Rostedt  * (all CPU entries)
33607a8e76a3SSteven Rostedt  */
33617a8e76a3SSteven Rostedt unsigned long ring_buffer_entries(struct ring_buffer *buffer)
33627a8e76a3SSteven Rostedt {
33637a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
33647a8e76a3SSteven Rostedt 	unsigned long entries = 0;
33657a8e76a3SSteven Rostedt 	int cpu;
33667a8e76a3SSteven Rostedt 
33677a8e76a3SSteven Rostedt 	/* if you care about this being correct, lock the buffer */
33687a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
33697a8e76a3SSteven Rostedt 		cpu_buffer = buffer->buffers[cpu];
3370f6195aa0SSteven Rostedt 		entries += rb_num_of_entries(cpu_buffer);
33717a8e76a3SSteven Rostedt 	}
33727a8e76a3SSteven Rostedt 
33737a8e76a3SSteven Rostedt 	return entries;
33747a8e76a3SSteven Rostedt }
3375c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_entries);
33767a8e76a3SSteven Rostedt 
33777a8e76a3SSteven Rostedt /**
337867b394f7SJiri Olsa  * ring_buffer_overruns - get the number of overruns in buffer
33797a8e76a3SSteven Rostedt  * @buffer: The ring buffer
33807a8e76a3SSteven Rostedt  *
33817a8e76a3SSteven Rostedt  * Returns the total number of overruns in the ring buffer
33827a8e76a3SSteven Rostedt  * (all CPU entries)
33837a8e76a3SSteven Rostedt  */
33847a8e76a3SSteven Rostedt unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
33857a8e76a3SSteven Rostedt {
33867a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
33877a8e76a3SSteven Rostedt 	unsigned long overruns = 0;
33887a8e76a3SSteven Rostedt 	int cpu;
33897a8e76a3SSteven Rostedt 
33907a8e76a3SSteven Rostedt 	/* if you care about this being correct, lock the buffer */
33917a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
33927a8e76a3SSteven Rostedt 		cpu_buffer = buffer->buffers[cpu];
339377ae365eSSteven Rostedt 		overruns += local_read(&cpu_buffer->overrun);
33947a8e76a3SSteven Rostedt 	}
33957a8e76a3SSteven Rostedt 
33967a8e76a3SSteven Rostedt 	return overruns;
33977a8e76a3SSteven Rostedt }
3398c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_overruns);
33997a8e76a3SSteven Rostedt 
3400642edba5SSteven Rostedt static void rb_iter_reset(struct ring_buffer_iter *iter)
34017a8e76a3SSteven Rostedt {
34027a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
34037a8e76a3SSteven Rostedt 
3404d769041fSSteven Rostedt 	/* Iterator usage is expected to have record disabled */
3405d769041fSSteven Rostedt 	iter->head_page = cpu_buffer->reader_page;
34066f807acdSSteven Rostedt 	iter->head = cpu_buffer->reader_page->read;
3407651e22f2SSteven Rostedt (Red Hat) 
3408651e22f2SSteven Rostedt (Red Hat) 	iter->cache_reader_page = iter->head_page;
340924607f11SSteven Rostedt (Red Hat) 	iter->cache_read = cpu_buffer->read;
3410651e22f2SSteven Rostedt (Red Hat) 
3411d769041fSSteven Rostedt 	if (iter->head)
3412d769041fSSteven Rostedt 		iter->read_stamp = cpu_buffer->read_stamp;
3413d769041fSSteven Rostedt 	else
3414abc9b56dSSteven Rostedt 		iter->read_stamp = iter->head_page->page->time_stamp;
3415642edba5SSteven Rostedt }
3416f83c9d0fSSteven Rostedt 
3417642edba5SSteven Rostedt /**
3418642edba5SSteven Rostedt  * ring_buffer_iter_reset - reset an iterator
3419642edba5SSteven Rostedt  * @iter: The iterator to reset
3420642edba5SSteven Rostedt  *
3421642edba5SSteven Rostedt  * Resets the iterator, so that it will start from the beginning
3422642edba5SSteven Rostedt  * again.
3423642edba5SSteven Rostedt  */
3424642edba5SSteven Rostedt void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3425642edba5SSteven Rostedt {
3426554f786eSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
3427642edba5SSteven Rostedt 	unsigned long flags;
3428642edba5SSteven Rostedt 
3429554f786eSSteven Rostedt 	if (!iter)
3430554f786eSSteven Rostedt 		return;
3431554f786eSSteven Rostedt 
3432554f786eSSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
3433554f786eSSteven Rostedt 
34345389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3435642edba5SSteven Rostedt 	rb_iter_reset(iter);
34365389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
34377a8e76a3SSteven Rostedt }
3438c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
34397a8e76a3SSteven Rostedt 
34407a8e76a3SSteven Rostedt /**
34417a8e76a3SSteven Rostedt  * ring_buffer_iter_empty - check if an iterator has no more to read
34427a8e76a3SSteven Rostedt  * @iter: The iterator to check
34437a8e76a3SSteven Rostedt  */
34447a8e76a3SSteven Rostedt int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
34457a8e76a3SSteven Rostedt {
34467a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
344778f7a45dSSteven Rostedt (VMware) 	struct buffer_page *reader;
344878f7a45dSSteven Rostedt (VMware) 	struct buffer_page *head_page;
344978f7a45dSSteven Rostedt (VMware) 	struct buffer_page *commit_page;
345078f7a45dSSteven Rostedt (VMware) 	unsigned commit;
34517a8e76a3SSteven Rostedt 
34527a8e76a3SSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
34537a8e76a3SSteven Rostedt 
345478f7a45dSSteven Rostedt (VMware) 	/* Remember, trace recording is off when iterator is in use */
345578f7a45dSSteven Rostedt (VMware) 	reader = cpu_buffer->reader_page;
345678f7a45dSSteven Rostedt (VMware) 	head_page = cpu_buffer->head_page;
345778f7a45dSSteven Rostedt (VMware) 	commit_page = cpu_buffer->commit_page;
345878f7a45dSSteven Rostedt (VMware) 	commit = rb_page_commit(commit_page);
345978f7a45dSSteven Rostedt (VMware) 
346078f7a45dSSteven Rostedt (VMware) 	return ((iter->head_page == commit_page && iter->head == commit) ||
346178f7a45dSSteven Rostedt (VMware) 		(iter->head_page == reader && commit_page == head_page &&
346278f7a45dSSteven Rostedt (VMware) 		 head_page->read == commit &&
346378f7a45dSSteven Rostedt (VMware) 		 iter->head == rb_page_commit(cpu_buffer->reader_page)));
34647a8e76a3SSteven Rostedt }
3465c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
34667a8e76a3SSteven Rostedt 
34677a8e76a3SSteven Rostedt static void
34687a8e76a3SSteven Rostedt rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
34697a8e76a3SSteven Rostedt 		     struct ring_buffer_event *event)
34707a8e76a3SSteven Rostedt {
34717a8e76a3SSteven Rostedt 	u64 delta;
34727a8e76a3SSteven Rostedt 
3473334d4169SLai Jiangshan 	switch (event->type_len) {
34747a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
34757a8e76a3SSteven Rostedt 		return;
34767a8e76a3SSteven Rostedt 
34777a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
3478*dc4e2801STom Zanussi 		delta = ring_buffer_event_time_stamp(event);
34797a8e76a3SSteven Rostedt 		cpu_buffer->read_stamp += delta;
34807a8e76a3SSteven Rostedt 		return;
34817a8e76a3SSteven Rostedt 
34827a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
3483*dc4e2801STom Zanussi 		delta = ring_buffer_event_time_stamp(event);
3484*dc4e2801STom Zanussi 		cpu_buffer->read_stamp = delta;
34857a8e76a3SSteven Rostedt 		return;
34867a8e76a3SSteven Rostedt 
34877a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
34887a8e76a3SSteven Rostedt 		cpu_buffer->read_stamp += event->time_delta;
34897a8e76a3SSteven Rostedt 		return;
34907a8e76a3SSteven Rostedt 
34917a8e76a3SSteven Rostedt 	default:
34927a8e76a3SSteven Rostedt 		BUG();
34937a8e76a3SSteven Rostedt 	}
34947a8e76a3SSteven Rostedt 	return;
34957a8e76a3SSteven Rostedt }
34967a8e76a3SSteven Rostedt 
34977a8e76a3SSteven Rostedt static void
34987a8e76a3SSteven Rostedt rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
34997a8e76a3SSteven Rostedt 			  struct ring_buffer_event *event)
35007a8e76a3SSteven Rostedt {
35017a8e76a3SSteven Rostedt 	u64 delta;
35027a8e76a3SSteven Rostedt 
3503334d4169SLai Jiangshan 	switch (event->type_len) {
35047a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
35057a8e76a3SSteven Rostedt 		return;
35067a8e76a3SSteven Rostedt 
35077a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
3508*dc4e2801STom Zanussi 		delta = ring_buffer_event_time_stamp(event);
35097a8e76a3SSteven Rostedt 		iter->read_stamp += delta;
35107a8e76a3SSteven Rostedt 		return;
35117a8e76a3SSteven Rostedt 
35127a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
3513*dc4e2801STom Zanussi 		delta = ring_buffer_event_time_stamp(event);
3514*dc4e2801STom Zanussi 		iter->read_stamp = delta;
35157a8e76a3SSteven Rostedt 		return;
35167a8e76a3SSteven Rostedt 
35177a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
35187a8e76a3SSteven Rostedt 		iter->read_stamp += event->time_delta;
35197a8e76a3SSteven Rostedt 		return;
35207a8e76a3SSteven Rostedt 
35217a8e76a3SSteven Rostedt 	default:
35227a8e76a3SSteven Rostedt 		BUG();
35237a8e76a3SSteven Rostedt 	}
35247a8e76a3SSteven Rostedt 	return;
35257a8e76a3SSteven Rostedt }
35267a8e76a3SSteven Rostedt 
3527d769041fSSteven Rostedt static struct buffer_page *
3528d769041fSSteven Rostedt rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
35297a8e76a3SSteven Rostedt {
3530d769041fSSteven Rostedt 	struct buffer_page *reader = NULL;
353166a8cb95SSteven Rostedt 	unsigned long overwrite;
3532d769041fSSteven Rostedt 	unsigned long flags;
3533818e3dd3SSteven Rostedt 	int nr_loops = 0;
353477ae365eSSteven Rostedt 	int ret;
3535d769041fSSteven Rostedt 
35363e03fb7fSSteven Rostedt 	local_irq_save(flags);
35370199c4e6SThomas Gleixner 	arch_spin_lock(&cpu_buffer->lock);
3538d769041fSSteven Rostedt 
3539d769041fSSteven Rostedt  again:
3540818e3dd3SSteven Rostedt 	/*
3541818e3dd3SSteven Rostedt 	 * This should normally only loop twice. But because the
3542818e3dd3SSteven Rostedt 	 * start of the reader inserts an empty page, it causes
3543818e3dd3SSteven Rostedt 	 * a case where we will loop three times. There should be no
3544818e3dd3SSteven Rostedt 	 * reason to loop four times (that I know of).
3545818e3dd3SSteven Rostedt 	 */
35463e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3547818e3dd3SSteven Rostedt 		reader = NULL;
3548818e3dd3SSteven Rostedt 		goto out;
3549818e3dd3SSteven Rostedt 	}
3550818e3dd3SSteven Rostedt 
3551d769041fSSteven Rostedt 	reader = cpu_buffer->reader_page;
3552d769041fSSteven Rostedt 
3553d769041fSSteven Rostedt 	/* If there's more to read, return this page */
3554bf41a158SSteven Rostedt 	if (cpu_buffer->reader_page->read < rb_page_size(reader))
3555d769041fSSteven Rostedt 		goto out;
3556d769041fSSteven Rostedt 
3557d769041fSSteven Rostedt 	/* Never should we have an index greater than the size */
35583e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer,
35593e89c7bbSSteven Rostedt 		       cpu_buffer->reader_page->read > rb_page_size(reader)))
35603e89c7bbSSteven Rostedt 		goto out;
3561d769041fSSteven Rostedt 
3562d769041fSSteven Rostedt 	/* check if we caught up to the tail */
3563d769041fSSteven Rostedt 	reader = NULL;
3564bf41a158SSteven Rostedt 	if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3565d769041fSSteven Rostedt 		goto out;
35667a8e76a3SSteven Rostedt 
3567a5fb8331SSteven Rostedt 	/* Don't bother swapping if the ring buffer is empty */
3568a5fb8331SSteven Rostedt 	if (rb_num_of_entries(cpu_buffer) == 0)
3569a5fb8331SSteven Rostedt 		goto out;
3570a5fb8331SSteven Rostedt 
35717a8e76a3SSteven Rostedt 	/*
3572d769041fSSteven Rostedt 	 * Reset the reader page to size zero.
35737a8e76a3SSteven Rostedt 	 */
357477ae365eSSteven Rostedt 	local_set(&cpu_buffer->reader_page->write, 0);
357577ae365eSSteven Rostedt 	local_set(&cpu_buffer->reader_page->entries, 0);
357677ae365eSSteven Rostedt 	local_set(&cpu_buffer->reader_page->page->commit, 0);
3577ff0ff84aSSteven Rostedt 	cpu_buffer->reader_page->real_end = 0;
3578d769041fSSteven Rostedt 
357977ae365eSSteven Rostedt  spin:
358077ae365eSSteven Rostedt 	/*
358177ae365eSSteven Rostedt 	 * Splice the empty reader page into the list around the head.
358277ae365eSSteven Rostedt 	 */
358377ae365eSSteven Rostedt 	reader = rb_set_head_page(cpu_buffer);
358454f7be5bSSteven Rostedt 	if (!reader)
358554f7be5bSSteven Rostedt 		goto out;
35860e1ff5d7SSteven Rostedt 	cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3587d769041fSSteven Rostedt 	cpu_buffer->reader_page->list.prev = reader->list.prev;
3588bf41a158SSteven Rostedt 
35893adc54faSSteven Rostedt 	/*
35903adc54faSSteven Rostedt 	 * cpu_buffer->pages just needs to point to the buffer, it
35913adc54faSSteven Rostedt 	 *  has no specific buffer page to point to. Lets move it out
359225985edcSLucas De Marchi 	 *  of our way so we don't accidentally swap it.
35933adc54faSSteven Rostedt 	 */
35943adc54faSSteven Rostedt 	cpu_buffer->pages = reader->list.prev;
35953adc54faSSteven Rostedt 
359677ae365eSSteven Rostedt 	/* The reader page will be pointing to the new head */
359777ae365eSSteven Rostedt 	rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3598d769041fSSteven Rostedt 
3599d769041fSSteven Rostedt 	/*
360066a8cb95SSteven Rostedt 	 * We want to make sure we read the overruns after we set up our
360166a8cb95SSteven Rostedt 	 * pointers to the next object. The writer side does a
360266a8cb95SSteven Rostedt 	 * cmpxchg to cross pages which acts as the mb on the writer
360366a8cb95SSteven Rostedt 	 * side. Note, the reader will constantly fail the swap
360466a8cb95SSteven Rostedt 	 * while the writer is updating the pointers, so this
360566a8cb95SSteven Rostedt 	 * guarantees that the overwrite recorded here is the one we
360666a8cb95SSteven Rostedt 	 * want to compare with the last_overrun.
360766a8cb95SSteven Rostedt 	 */
360866a8cb95SSteven Rostedt 	smp_mb();
360966a8cb95SSteven Rostedt 	overwrite = local_read(&(cpu_buffer->overrun));
361066a8cb95SSteven Rostedt 
361166a8cb95SSteven Rostedt 	/*
361277ae365eSSteven Rostedt 	 * Here's the tricky part.
361377ae365eSSteven Rostedt 	 *
361477ae365eSSteven Rostedt 	 * We need to move the pointer past the header page.
361577ae365eSSteven Rostedt 	 * But we can only do that if a writer is not currently
361677ae365eSSteven Rostedt 	 * moving it. The page before the header page has the
361777ae365eSSteven Rostedt 	 * flag bit '1' set if it is pointing to the page we want.
361877ae365eSSteven Rostedt 	 * but if the writer is in the process of moving it
361977ae365eSSteven Rostedt 	 * than it will be '2' or already moved '0'.
3620d769041fSSteven Rostedt 	 */
3621d769041fSSteven Rostedt 
362277ae365eSSteven Rostedt 	ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
362377ae365eSSteven Rostedt 
362477ae365eSSteven Rostedt 	/*
362577ae365eSSteven Rostedt 	 * If we did not convert it, then we must try again.
362677ae365eSSteven Rostedt 	 */
362777ae365eSSteven Rostedt 	if (!ret)
362877ae365eSSteven Rostedt 		goto spin;
362977ae365eSSteven Rostedt 
363077ae365eSSteven Rostedt 	/*
363177ae365eSSteven Rostedt 	 * Yeah! We succeeded in replacing the page.
363277ae365eSSteven Rostedt 	 *
363377ae365eSSteven Rostedt 	 * Now make the new head point back to the reader page.
363477ae365eSSteven Rostedt 	 */
36355ded3dc6SDavid Sharp 	rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
36367a8e76a3SSteven Rostedt 	rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3637d769041fSSteven Rostedt 
3638d769041fSSteven Rostedt 	/* Finally update the reader page to the new head */
3639d769041fSSteven Rostedt 	cpu_buffer->reader_page = reader;
3640b81f472aSSteven Rostedt (Red Hat) 	cpu_buffer->reader_page->read = 0;
3641d769041fSSteven Rostedt 
364266a8cb95SSteven Rostedt 	if (overwrite != cpu_buffer->last_overrun) {
364366a8cb95SSteven Rostedt 		cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
364466a8cb95SSteven Rostedt 		cpu_buffer->last_overrun = overwrite;
364566a8cb95SSteven Rostedt 	}
364666a8cb95SSteven Rostedt 
3647d769041fSSteven Rostedt 	goto again;
3648d769041fSSteven Rostedt 
3649d769041fSSteven Rostedt  out:
3650b81f472aSSteven Rostedt (Red Hat) 	/* Update the read_stamp on the first event */
3651b81f472aSSteven Rostedt (Red Hat) 	if (reader && reader->read == 0)
3652b81f472aSSteven Rostedt (Red Hat) 		cpu_buffer->read_stamp = reader->page->time_stamp;
3653b81f472aSSteven Rostedt (Red Hat) 
36540199c4e6SThomas Gleixner 	arch_spin_unlock(&cpu_buffer->lock);
36553e03fb7fSSteven Rostedt 	local_irq_restore(flags);
3656d769041fSSteven Rostedt 
3657d769041fSSteven Rostedt 	return reader;
36587a8e76a3SSteven Rostedt }
36597a8e76a3SSteven Rostedt 
3660d769041fSSteven Rostedt static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3661d769041fSSteven Rostedt {
3662d769041fSSteven Rostedt 	struct ring_buffer_event *event;
3663d769041fSSteven Rostedt 	struct buffer_page *reader;
3664d769041fSSteven Rostedt 	unsigned length;
3665d769041fSSteven Rostedt 
3666d769041fSSteven Rostedt 	reader = rb_get_reader_page(cpu_buffer);
3667d769041fSSteven Rostedt 
3668d769041fSSteven Rostedt 	/* This function should not be called when buffer is empty */
36693e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, !reader))
36703e89c7bbSSteven Rostedt 		return;
3671d769041fSSteven Rostedt 
3672d769041fSSteven Rostedt 	event = rb_reader_event(cpu_buffer);
36737a8e76a3SSteven Rostedt 
3674a1863c21SSteven Rostedt 	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3675e4906effSSteven Rostedt 		cpu_buffer->read++;
36767a8e76a3SSteven Rostedt 
36777a8e76a3SSteven Rostedt 	rb_update_read_stamp(cpu_buffer, event);
36787a8e76a3SSteven Rostedt 
3679d769041fSSteven Rostedt 	length = rb_event_length(event);
36806f807acdSSteven Rostedt 	cpu_buffer->reader_page->read += length;
36817a8e76a3SSteven Rostedt }
36827a8e76a3SSteven Rostedt 
36837a8e76a3SSteven Rostedt static void rb_advance_iter(struct ring_buffer_iter *iter)
36847a8e76a3SSteven Rostedt {
36857a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
36867a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
36877a8e76a3SSteven Rostedt 	unsigned length;
36887a8e76a3SSteven Rostedt 
36897a8e76a3SSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
36907a8e76a3SSteven Rostedt 
36917a8e76a3SSteven Rostedt 	/*
36927a8e76a3SSteven Rostedt 	 * Check if we are at the end of the buffer.
36937a8e76a3SSteven Rostedt 	 */
3694bf41a158SSteven Rostedt 	if (iter->head >= rb_page_size(iter->head_page)) {
3695ea05b57cSSteven Rostedt 		/* discarded commits can make the page empty */
3696ea05b57cSSteven Rostedt 		if (iter->head_page == cpu_buffer->commit_page)
36973e89c7bbSSteven Rostedt 			return;
3698d769041fSSteven Rostedt 		rb_inc_iter(iter);
36997a8e76a3SSteven Rostedt 		return;
37007a8e76a3SSteven Rostedt 	}
37017a8e76a3SSteven Rostedt 
37027a8e76a3SSteven Rostedt 	event = rb_iter_head_event(iter);
37037a8e76a3SSteven Rostedt 
37047a8e76a3SSteven Rostedt 	length = rb_event_length(event);
37057a8e76a3SSteven Rostedt 
37067a8e76a3SSteven Rostedt 	/*
37077a8e76a3SSteven Rostedt 	 * This should not be called to advance the header if we are
37087a8e76a3SSteven Rostedt 	 * at the tail of the buffer.
37097a8e76a3SSteven Rostedt 	 */
37103e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer,
3711f536aafcSSteven Rostedt 		       (iter->head_page == cpu_buffer->commit_page) &&
37123e89c7bbSSteven Rostedt 		       (iter->head + length > rb_commit_index(cpu_buffer))))
37133e89c7bbSSteven Rostedt 		return;
37147a8e76a3SSteven Rostedt 
37157a8e76a3SSteven Rostedt 	rb_update_iter_read_stamp(iter, event);
37167a8e76a3SSteven Rostedt 
37177a8e76a3SSteven Rostedt 	iter->head += length;
37187a8e76a3SSteven Rostedt 
37197a8e76a3SSteven Rostedt 	/* check for end of page padding */
3720bf41a158SSteven Rostedt 	if ((iter->head >= rb_page_size(iter->head_page)) &&
3721bf41a158SSteven Rostedt 	    (iter->head_page != cpu_buffer->commit_page))
3722771e0384SSteven Rostedt 		rb_inc_iter(iter);
37237a8e76a3SSteven Rostedt }
37247a8e76a3SSteven Rostedt 
372566a8cb95SSteven Rostedt static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
372666a8cb95SSteven Rostedt {
372766a8cb95SSteven Rostedt 	return cpu_buffer->lost_events;
372866a8cb95SSteven Rostedt }
372966a8cb95SSteven Rostedt 
3730f83c9d0fSSteven Rostedt static struct ring_buffer_event *
373166a8cb95SSteven Rostedt rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
373266a8cb95SSteven Rostedt 	       unsigned long *lost_events)
37337a8e76a3SSteven Rostedt {
37347a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
3735d769041fSSteven Rostedt 	struct buffer_page *reader;
3736818e3dd3SSteven Rostedt 	int nr_loops = 0;
37377a8e76a3SSteven Rostedt 
3738*dc4e2801STom Zanussi 	if (ts)
3739*dc4e2801STom Zanussi 		*ts = 0;
37407a8e76a3SSteven Rostedt  again:
3741818e3dd3SSteven Rostedt 	/*
374269d1b839SSteven Rostedt 	 * We repeat when a time extend is encountered.
374369d1b839SSteven Rostedt 	 * Since the time extend is always attached to a data event,
374469d1b839SSteven Rostedt 	 * we should never loop more than once.
374569d1b839SSteven Rostedt 	 * (We never hit the following condition more than twice).
3746818e3dd3SSteven Rostedt 	 */
374769d1b839SSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3748818e3dd3SSteven Rostedt 		return NULL;
3749818e3dd3SSteven Rostedt 
3750d769041fSSteven Rostedt 	reader = rb_get_reader_page(cpu_buffer);
3751d769041fSSteven Rostedt 	if (!reader)
37527a8e76a3SSteven Rostedt 		return NULL;
37537a8e76a3SSteven Rostedt 
3754d769041fSSteven Rostedt 	event = rb_reader_event(cpu_buffer);
37557a8e76a3SSteven Rostedt 
3756334d4169SLai Jiangshan 	switch (event->type_len) {
37577a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
37582d622719STom Zanussi 		if (rb_null_event(event))
3759bf41a158SSteven Rostedt 			RB_WARN_ON(cpu_buffer, 1);
37602d622719STom Zanussi 		/*
37612d622719STom Zanussi 		 * Because the writer could be discarding every
37622d622719STom Zanussi 		 * event it creates (which would probably be bad)
37632d622719STom Zanussi 		 * if we were to go back to "again" then we may never
37642d622719STom Zanussi 		 * catch up, and will trigger the warn on, or lock
37652d622719STom Zanussi 		 * the box. Return the padding, and we will release
37662d622719STom Zanussi 		 * the current locks, and try again.
37672d622719STom Zanussi 		 */
37682d622719STom Zanussi 		return event;
37697a8e76a3SSteven Rostedt 
37707a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
37717a8e76a3SSteven Rostedt 		/* Internal data, OK to advance */
3772d769041fSSteven Rostedt 		rb_advance_reader(cpu_buffer);
37737a8e76a3SSteven Rostedt 		goto again;
37747a8e76a3SSteven Rostedt 
37757a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
3776*dc4e2801STom Zanussi 		if (ts) {
3777*dc4e2801STom Zanussi 			*ts = ring_buffer_event_time_stamp(event);
3778*dc4e2801STom Zanussi 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3779*dc4e2801STom Zanussi 							 cpu_buffer->cpu, ts);
3780*dc4e2801STom Zanussi 		}
3781*dc4e2801STom Zanussi 		/* Internal data, OK to advance */
3782d769041fSSteven Rostedt 		rb_advance_reader(cpu_buffer);
37837a8e76a3SSteven Rostedt 		goto again;
37847a8e76a3SSteven Rostedt 
37857a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
3786*dc4e2801STom Zanussi 		if (ts && !(*ts)) {
37877a8e76a3SSteven Rostedt 			*ts = cpu_buffer->read_stamp + event->time_delta;
3788d8eeb2d3SRobert Richter 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
378937886f6aSSteven Rostedt 							 cpu_buffer->cpu, ts);
37907a8e76a3SSteven Rostedt 		}
379166a8cb95SSteven Rostedt 		if (lost_events)
379266a8cb95SSteven Rostedt 			*lost_events = rb_lost_events(cpu_buffer);
37937a8e76a3SSteven Rostedt 		return event;
37947a8e76a3SSteven Rostedt 
37957a8e76a3SSteven Rostedt 	default:
37967a8e76a3SSteven Rostedt 		BUG();
37977a8e76a3SSteven Rostedt 	}
37987a8e76a3SSteven Rostedt 
37997a8e76a3SSteven Rostedt 	return NULL;
38007a8e76a3SSteven Rostedt }
3801c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_peek);
38027a8e76a3SSteven Rostedt 
3803f83c9d0fSSteven Rostedt static struct ring_buffer_event *
3804f83c9d0fSSteven Rostedt rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
38057a8e76a3SSteven Rostedt {
38067a8e76a3SSteven Rostedt 	struct ring_buffer *buffer;
38077a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
38087a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
3809818e3dd3SSteven Rostedt 	int nr_loops = 0;
38107a8e76a3SSteven Rostedt 
3811*dc4e2801STom Zanussi 	if (ts)
3812*dc4e2801STom Zanussi 		*ts = 0;
3813*dc4e2801STom Zanussi 
38147a8e76a3SSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
38157a8e76a3SSteven Rostedt 	buffer = cpu_buffer->buffer;
38167a8e76a3SSteven Rostedt 
3817492a74f4SSteven Rostedt 	/*
3818492a74f4SSteven Rostedt 	 * Check if someone performed a consuming read to
3819492a74f4SSteven Rostedt 	 * the buffer. A consuming read invalidates the iterator
3820492a74f4SSteven Rostedt 	 * and we need to reset the iterator in this case.
3821492a74f4SSteven Rostedt 	 */
3822492a74f4SSteven Rostedt 	if (unlikely(iter->cache_read != cpu_buffer->read ||
3823492a74f4SSteven Rostedt 		     iter->cache_reader_page != cpu_buffer->reader_page))
3824492a74f4SSteven Rostedt 		rb_iter_reset(iter);
3825492a74f4SSteven Rostedt 
38267a8e76a3SSteven Rostedt  again:
38273c05d748SSteven Rostedt 	if (ring_buffer_iter_empty(iter))
38283c05d748SSteven Rostedt 		return NULL;
38293c05d748SSteven Rostedt 
3830818e3dd3SSteven Rostedt 	/*
3831021de3d9SSteven Rostedt (Red Hat) 	 * We repeat when a time extend is encountered or we hit
3832021de3d9SSteven Rostedt (Red Hat) 	 * the end of the page. Since the time extend is always attached
3833021de3d9SSteven Rostedt (Red Hat) 	 * to a data event, we should never loop more than three times.
3834021de3d9SSteven Rostedt (Red Hat) 	 * Once for going to next page, once on time extend, and
3835021de3d9SSteven Rostedt (Red Hat) 	 * finally once to get the event.
3836021de3d9SSteven Rostedt (Red Hat) 	 * (We never hit the following condition more than thrice).
3837818e3dd3SSteven Rostedt 	 */
3838021de3d9SSteven Rostedt (Red Hat) 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
3839818e3dd3SSteven Rostedt 		return NULL;
3840818e3dd3SSteven Rostedt 
38417a8e76a3SSteven Rostedt 	if (rb_per_cpu_empty(cpu_buffer))
38427a8e76a3SSteven Rostedt 		return NULL;
38437a8e76a3SSteven Rostedt 
384410e83fd0SSteven Rostedt (Red Hat) 	if (iter->head >= rb_page_size(iter->head_page)) {
38453c05d748SSteven Rostedt 		rb_inc_iter(iter);
38463c05d748SSteven Rostedt 		goto again;
38473c05d748SSteven Rostedt 	}
38483c05d748SSteven Rostedt 
38497a8e76a3SSteven Rostedt 	event = rb_iter_head_event(iter);
38507a8e76a3SSteven Rostedt 
3851334d4169SLai Jiangshan 	switch (event->type_len) {
38527a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
38532d622719STom Zanussi 		if (rb_null_event(event)) {
3854d769041fSSteven Rostedt 			rb_inc_iter(iter);
38557a8e76a3SSteven Rostedt 			goto again;
38562d622719STom Zanussi 		}
38572d622719STom Zanussi 		rb_advance_iter(iter);
38582d622719STom Zanussi 		return event;
38597a8e76a3SSteven Rostedt 
38607a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
38617a8e76a3SSteven Rostedt 		/* Internal data, OK to advance */
38627a8e76a3SSteven Rostedt 		rb_advance_iter(iter);
38637a8e76a3SSteven Rostedt 		goto again;
38647a8e76a3SSteven Rostedt 
38657a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
3866*dc4e2801STom Zanussi 		if (ts) {
3867*dc4e2801STom Zanussi 			*ts = ring_buffer_event_time_stamp(event);
3868*dc4e2801STom Zanussi 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3869*dc4e2801STom Zanussi 							 cpu_buffer->cpu, ts);
3870*dc4e2801STom Zanussi 		}
3871*dc4e2801STom Zanussi 		/* Internal data, OK to advance */
38727a8e76a3SSteven Rostedt 		rb_advance_iter(iter);
38737a8e76a3SSteven Rostedt 		goto again;
38747a8e76a3SSteven Rostedt 
38757a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
3876*dc4e2801STom Zanussi 		if (ts && !(*ts)) {
38777a8e76a3SSteven Rostedt 			*ts = iter->read_stamp + event->time_delta;
387837886f6aSSteven Rostedt 			ring_buffer_normalize_time_stamp(buffer,
387937886f6aSSteven Rostedt 							 cpu_buffer->cpu, ts);
38807a8e76a3SSteven Rostedt 		}
38817a8e76a3SSteven Rostedt 		return event;
38827a8e76a3SSteven Rostedt 
38837a8e76a3SSteven Rostedt 	default:
38847a8e76a3SSteven Rostedt 		BUG();
38857a8e76a3SSteven Rostedt 	}
38867a8e76a3SSteven Rostedt 
38877a8e76a3SSteven Rostedt 	return NULL;
38887a8e76a3SSteven Rostedt }
3889c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
38907a8e76a3SSteven Rostedt 
3891289a5a25SSteven Rostedt (Red Hat) static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
38928d707e8eSSteven Rostedt {
3893289a5a25SSteven Rostedt (Red Hat) 	if (likely(!in_nmi())) {
3894289a5a25SSteven Rostedt (Red Hat) 		raw_spin_lock(&cpu_buffer->reader_lock);
3895289a5a25SSteven Rostedt (Red Hat) 		return true;
3896289a5a25SSteven Rostedt (Red Hat) 	}
3897289a5a25SSteven Rostedt (Red Hat) 
38988d707e8eSSteven Rostedt 	/*
38998d707e8eSSteven Rostedt 	 * If an NMI die dumps out the content of the ring buffer
3900289a5a25SSteven Rostedt (Red Hat) 	 * trylock must be used to prevent a deadlock if the NMI
3901289a5a25SSteven Rostedt (Red Hat) 	 * preempted a task that holds the ring buffer locks. If
3902289a5a25SSteven Rostedt (Red Hat) 	 * we get the lock then all is fine, if not, then continue
3903289a5a25SSteven Rostedt (Red Hat) 	 * to do the read, but this can corrupt the ring buffer,
3904289a5a25SSteven Rostedt (Red Hat) 	 * so it must be permanently disabled from future writes.
3905289a5a25SSteven Rostedt (Red Hat) 	 * Reading from NMI is a oneshot deal.
39068d707e8eSSteven Rostedt 	 */
3907289a5a25SSteven Rostedt (Red Hat) 	if (raw_spin_trylock(&cpu_buffer->reader_lock))
3908289a5a25SSteven Rostedt (Red Hat) 		return true;
39098d707e8eSSteven Rostedt 
3910289a5a25SSteven Rostedt (Red Hat) 	/* Continue without locking, but disable the ring buffer */
3911289a5a25SSteven Rostedt (Red Hat) 	atomic_inc(&cpu_buffer->record_disabled);
3912289a5a25SSteven Rostedt (Red Hat) 	return false;
3913289a5a25SSteven Rostedt (Red Hat) }
3914289a5a25SSteven Rostedt (Red Hat) 
3915289a5a25SSteven Rostedt (Red Hat) static inline void
3916289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
3917289a5a25SSteven Rostedt (Red Hat) {
3918289a5a25SSteven Rostedt (Red Hat) 	if (likely(locked))
3919289a5a25SSteven Rostedt (Red Hat) 		raw_spin_unlock(&cpu_buffer->reader_lock);
3920289a5a25SSteven Rostedt (Red Hat) 	return;
39218d707e8eSSteven Rostedt }
39228d707e8eSSteven Rostedt 
39237a8e76a3SSteven Rostedt /**
3924f83c9d0fSSteven Rostedt  * ring_buffer_peek - peek at the next event to be read
3925f83c9d0fSSteven Rostedt  * @buffer: The ring buffer to read
3926f83c9d0fSSteven Rostedt  * @cpu: The cpu to peak at
3927f83c9d0fSSteven Rostedt  * @ts: The timestamp counter of this event.
392866a8cb95SSteven Rostedt  * @lost_events: a variable to store if events were lost (may be NULL)
3929f83c9d0fSSteven Rostedt  *
3930f83c9d0fSSteven Rostedt  * This will return the event that will be read next, but does
3931f83c9d0fSSteven Rostedt  * not consume the data.
3932f83c9d0fSSteven Rostedt  */
3933f83c9d0fSSteven Rostedt struct ring_buffer_event *
393466a8cb95SSteven Rostedt ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
393566a8cb95SSteven Rostedt 		 unsigned long *lost_events)
3936f83c9d0fSSteven Rostedt {
3937f83c9d0fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
39388aabee57SSteven Rostedt 	struct ring_buffer_event *event;
3939f83c9d0fSSteven Rostedt 	unsigned long flags;
3940289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
3941f83c9d0fSSteven Rostedt 
3942554f786eSSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
39438aabee57SSteven Rostedt 		return NULL;
3944554f786eSSteven Rostedt 
39452d622719STom Zanussi  again:
39468d707e8eSSteven Rostedt 	local_irq_save(flags);
3947289a5a25SSteven Rostedt (Red Hat) 	dolock = rb_reader_lock(cpu_buffer);
394866a8cb95SSteven Rostedt 	event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3949469535a5SRobert Richter 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
3950469535a5SRobert Richter 		rb_advance_reader(cpu_buffer);
3951289a5a25SSteven Rostedt (Red Hat) 	rb_reader_unlock(cpu_buffer, dolock);
39528d707e8eSSteven Rostedt 	local_irq_restore(flags);
3953f83c9d0fSSteven Rostedt 
39541b959e18SSteven Rostedt 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
39552d622719STom Zanussi 		goto again;
39562d622719STom Zanussi 
3957f83c9d0fSSteven Rostedt 	return event;
3958f83c9d0fSSteven Rostedt }
3959f83c9d0fSSteven Rostedt 
3960f83c9d0fSSteven Rostedt /**
3961f83c9d0fSSteven Rostedt  * ring_buffer_iter_peek - peek at the next event to be read
3962f83c9d0fSSteven Rostedt  * @iter: The ring buffer iterator
3963f83c9d0fSSteven Rostedt  * @ts: The timestamp counter of this event.
3964f83c9d0fSSteven Rostedt  *
3965f83c9d0fSSteven Rostedt  * This will return the event that will be read next, but does
3966f83c9d0fSSteven Rostedt  * not increment the iterator.
3967f83c9d0fSSteven Rostedt  */
3968f83c9d0fSSteven Rostedt struct ring_buffer_event *
3969f83c9d0fSSteven Rostedt ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3970f83c9d0fSSteven Rostedt {
3971f83c9d0fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3972f83c9d0fSSteven Rostedt 	struct ring_buffer_event *event;
3973f83c9d0fSSteven Rostedt 	unsigned long flags;
3974f83c9d0fSSteven Rostedt 
39752d622719STom Zanussi  again:
39765389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3977f83c9d0fSSteven Rostedt 	event = rb_iter_peek(iter, ts);
39785389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3979f83c9d0fSSteven Rostedt 
39801b959e18SSteven Rostedt 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
39812d622719STom Zanussi 		goto again;
39822d622719STom Zanussi 
3983f83c9d0fSSteven Rostedt 	return event;
3984f83c9d0fSSteven Rostedt }
3985f83c9d0fSSteven Rostedt 
3986f83c9d0fSSteven Rostedt /**
39877a8e76a3SSteven Rostedt  * ring_buffer_consume - return an event and consume it
39887a8e76a3SSteven Rostedt  * @buffer: The ring buffer to get the next event from
398966a8cb95SSteven Rostedt  * @cpu: the cpu to read the buffer from
399066a8cb95SSteven Rostedt  * @ts: a variable to store the timestamp (may be NULL)
399166a8cb95SSteven Rostedt  * @lost_events: a variable to store if events were lost (may be NULL)
39927a8e76a3SSteven Rostedt  *
39937a8e76a3SSteven Rostedt  * Returns the next event in the ring buffer, and that event is consumed.
39947a8e76a3SSteven Rostedt  * Meaning, that sequential reads will keep returning a different event,
39957a8e76a3SSteven Rostedt  * and eventually empty the ring buffer if the producer is slower.
39967a8e76a3SSteven Rostedt  */
39977a8e76a3SSteven Rostedt struct ring_buffer_event *
399866a8cb95SSteven Rostedt ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
399966a8cb95SSteven Rostedt 		    unsigned long *lost_events)
40007a8e76a3SSteven Rostedt {
4001554f786eSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
4002554f786eSSteven Rostedt 	struct ring_buffer_event *event = NULL;
4003f83c9d0fSSteven Rostedt 	unsigned long flags;
4004289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
40057a8e76a3SSteven Rostedt 
40062d622719STom Zanussi  again:
4007554f786eSSteven Rostedt 	/* might be called in atomic */
4008554f786eSSteven Rostedt 	preempt_disable();
40097a8e76a3SSteven Rostedt 
4010554f786eSSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
4011554f786eSSteven Rostedt 		goto out;
4012554f786eSSteven Rostedt 
4013554f786eSSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
40148d707e8eSSteven Rostedt 	local_irq_save(flags);
4015289a5a25SSteven Rostedt (Red Hat) 	dolock = rb_reader_lock(cpu_buffer);
40167a8e76a3SSteven Rostedt 
401766a8cb95SSteven Rostedt 	event = rb_buffer_peek(cpu_buffer, ts, lost_events);
401866a8cb95SSteven Rostedt 	if (event) {
401966a8cb95SSteven Rostedt 		cpu_buffer->lost_events = 0;
4020d769041fSSteven Rostedt 		rb_advance_reader(cpu_buffer);
402166a8cb95SSteven Rostedt 	}
40227a8e76a3SSteven Rostedt 
4023289a5a25SSteven Rostedt (Red Hat) 	rb_reader_unlock(cpu_buffer, dolock);
40248d707e8eSSteven Rostedt 	local_irq_restore(flags);
4025f83c9d0fSSteven Rostedt 
4026554f786eSSteven Rostedt  out:
4027554f786eSSteven Rostedt 	preempt_enable();
4028554f786eSSteven Rostedt 
40291b959e18SSteven Rostedt 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
40302d622719STom Zanussi 		goto again;
40312d622719STom Zanussi 
40327a8e76a3SSteven Rostedt 	return event;
40337a8e76a3SSteven Rostedt }
4034c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_consume);
40357a8e76a3SSteven Rostedt 
40367a8e76a3SSteven Rostedt /**
403772c9ddfdSDavid Miller  * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
40387a8e76a3SSteven Rostedt  * @buffer: The ring buffer to read from
40397a8e76a3SSteven Rostedt  * @cpu: The cpu buffer to iterate over
40407a8e76a3SSteven Rostedt  *
404172c9ddfdSDavid Miller  * This performs the initial preparations necessary to iterate
404272c9ddfdSDavid Miller  * through the buffer.  Memory is allocated, buffer recording
404372c9ddfdSDavid Miller  * is disabled, and the iterator pointer is returned to the caller.
40447a8e76a3SSteven Rostedt  *
404572c9ddfdSDavid Miller  * Disabling buffer recordng prevents the reading from being
404672c9ddfdSDavid Miller  * corrupted. This is not a consuming read, so a producer is not
404772c9ddfdSDavid Miller  * expected.
404872c9ddfdSDavid Miller  *
404972c9ddfdSDavid Miller  * After a sequence of ring_buffer_read_prepare calls, the user is
4050d611851bSzhangwei(Jovi)  * expected to make at least one call to ring_buffer_read_prepare_sync.
405172c9ddfdSDavid Miller  * Afterwards, ring_buffer_read_start is invoked to get things going
405272c9ddfdSDavid Miller  * for real.
405372c9ddfdSDavid Miller  *
4054d611851bSzhangwei(Jovi)  * This overall must be paired with ring_buffer_read_finish.
40557a8e76a3SSteven Rostedt  */
40567a8e76a3SSteven Rostedt struct ring_buffer_iter *
405772c9ddfdSDavid Miller ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
40587a8e76a3SSteven Rostedt {
40597a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
40608aabee57SSteven Rostedt 	struct ring_buffer_iter *iter;
40617a8e76a3SSteven Rostedt 
40629e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
40638aabee57SSteven Rostedt 		return NULL;
40647a8e76a3SSteven Rostedt 
40657a8e76a3SSteven Rostedt 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
40667a8e76a3SSteven Rostedt 	if (!iter)
40678aabee57SSteven Rostedt 		return NULL;
40687a8e76a3SSteven Rostedt 
40697a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
40707a8e76a3SSteven Rostedt 
40717a8e76a3SSteven Rostedt 	iter->cpu_buffer = cpu_buffer;
40727a8e76a3SSteven Rostedt 
407383f40318SVaibhav Nagarnaik 	atomic_inc(&buffer->resize_disabled);
40747a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer->record_disabled);
407572c9ddfdSDavid Miller 
407672c9ddfdSDavid Miller 	return iter;
407772c9ddfdSDavid Miller }
407872c9ddfdSDavid Miller EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
407972c9ddfdSDavid Miller 
408072c9ddfdSDavid Miller /**
408172c9ddfdSDavid Miller  * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
408272c9ddfdSDavid Miller  *
408372c9ddfdSDavid Miller  * All previously invoked ring_buffer_read_prepare calls to prepare
408472c9ddfdSDavid Miller  * iterators will be synchronized.  Afterwards, read_buffer_read_start
408572c9ddfdSDavid Miller  * calls on those iterators are allowed.
408672c9ddfdSDavid Miller  */
408772c9ddfdSDavid Miller void
408872c9ddfdSDavid Miller ring_buffer_read_prepare_sync(void)
408972c9ddfdSDavid Miller {
40907a8e76a3SSteven Rostedt 	synchronize_sched();
409172c9ddfdSDavid Miller }
409272c9ddfdSDavid Miller EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
409372c9ddfdSDavid Miller 
409472c9ddfdSDavid Miller /**
409572c9ddfdSDavid Miller  * ring_buffer_read_start - start a non consuming read of the buffer
409672c9ddfdSDavid Miller  * @iter: The iterator returned by ring_buffer_read_prepare
409772c9ddfdSDavid Miller  *
409872c9ddfdSDavid Miller  * This finalizes the startup of an iteration through the buffer.
409972c9ddfdSDavid Miller  * The iterator comes from a call to ring_buffer_read_prepare and
410072c9ddfdSDavid Miller  * an intervening ring_buffer_read_prepare_sync must have been
410172c9ddfdSDavid Miller  * performed.
410272c9ddfdSDavid Miller  *
4103d611851bSzhangwei(Jovi)  * Must be paired with ring_buffer_read_finish.
410472c9ddfdSDavid Miller  */
410572c9ddfdSDavid Miller void
410672c9ddfdSDavid Miller ring_buffer_read_start(struct ring_buffer_iter *iter)
410772c9ddfdSDavid Miller {
410872c9ddfdSDavid Miller 	struct ring_buffer_per_cpu *cpu_buffer;
410972c9ddfdSDavid Miller 	unsigned long flags;
411072c9ddfdSDavid Miller 
411172c9ddfdSDavid Miller 	if (!iter)
411272c9ddfdSDavid Miller 		return;
411372c9ddfdSDavid Miller 
411472c9ddfdSDavid Miller 	cpu_buffer = iter->cpu_buffer;
41157a8e76a3SSteven Rostedt 
41165389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
41170199c4e6SThomas Gleixner 	arch_spin_lock(&cpu_buffer->lock);
4118642edba5SSteven Rostedt 	rb_iter_reset(iter);
41190199c4e6SThomas Gleixner 	arch_spin_unlock(&cpu_buffer->lock);
41205389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
41217a8e76a3SSteven Rostedt }
4122c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read_start);
41237a8e76a3SSteven Rostedt 
41247a8e76a3SSteven Rostedt /**
4125d611851bSzhangwei(Jovi)  * ring_buffer_read_finish - finish reading the iterator of the buffer
41267a8e76a3SSteven Rostedt  * @iter: The iterator retrieved by ring_buffer_start
41277a8e76a3SSteven Rostedt  *
41287a8e76a3SSteven Rostedt  * This re-enables the recording to the buffer, and frees the
41297a8e76a3SSteven Rostedt  * iterator.
41307a8e76a3SSteven Rostedt  */
41317a8e76a3SSteven Rostedt void
41327a8e76a3SSteven Rostedt ring_buffer_read_finish(struct ring_buffer_iter *iter)
41337a8e76a3SSteven Rostedt {
41347a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
41359366c1baSSteven Rostedt 	unsigned long flags;
41367a8e76a3SSteven Rostedt 
4137659f451fSSteven Rostedt 	/*
4138659f451fSSteven Rostedt 	 * Ring buffer is disabled from recording, here's a good place
4139659f451fSSteven Rostedt 	 * to check the integrity of the ring buffer.
41409366c1baSSteven Rostedt 	 * Must prevent readers from trying to read, as the check
41419366c1baSSteven Rostedt 	 * clears the HEAD page and readers require it.
4142659f451fSSteven Rostedt 	 */
41439366c1baSSteven Rostedt 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4144659f451fSSteven Rostedt 	rb_check_pages(cpu_buffer);
41459366c1baSSteven Rostedt 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4146659f451fSSteven Rostedt 
41477a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer->record_disabled);
414883f40318SVaibhav Nagarnaik 	atomic_dec(&cpu_buffer->buffer->resize_disabled);
41497a8e76a3SSteven Rostedt 	kfree(iter);
41507a8e76a3SSteven Rostedt }
4151c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
41527a8e76a3SSteven Rostedt 
41537a8e76a3SSteven Rostedt /**
41547a8e76a3SSteven Rostedt  * ring_buffer_read - read the next item in the ring buffer by the iterator
41557a8e76a3SSteven Rostedt  * @iter: The ring buffer iterator
41567a8e76a3SSteven Rostedt  * @ts: The time stamp of the event read.
41577a8e76a3SSteven Rostedt  *
41587a8e76a3SSteven Rostedt  * This reads the next event in the ring buffer and increments the iterator.
41597a8e76a3SSteven Rostedt  */
41607a8e76a3SSteven Rostedt struct ring_buffer_event *
41617a8e76a3SSteven Rostedt ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
41627a8e76a3SSteven Rostedt {
41637a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
4164f83c9d0fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4165f83c9d0fSSteven Rostedt 	unsigned long flags;
41667a8e76a3SSteven Rostedt 
41675389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
41687e9391cfSSteven Rostedt  again:
4169f83c9d0fSSteven Rostedt 	event = rb_iter_peek(iter, ts);
41707a8e76a3SSteven Rostedt 	if (!event)
4171f83c9d0fSSteven Rostedt 		goto out;
41727a8e76a3SSteven Rostedt 
41737e9391cfSSteven Rostedt 	if (event->type_len == RINGBUF_TYPE_PADDING)
41747e9391cfSSteven Rostedt 		goto again;
41757e9391cfSSteven Rostedt 
41767a8e76a3SSteven Rostedt 	rb_advance_iter(iter);
4177f83c9d0fSSteven Rostedt  out:
41785389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
41797a8e76a3SSteven Rostedt 
41807a8e76a3SSteven Rostedt 	return event;
41817a8e76a3SSteven Rostedt }
4182c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read);
41837a8e76a3SSteven Rostedt 
41847a8e76a3SSteven Rostedt /**
41857a8e76a3SSteven Rostedt  * ring_buffer_size - return the size of the ring buffer (in bytes)
41867a8e76a3SSteven Rostedt  * @buffer: The ring buffer.
41877a8e76a3SSteven Rostedt  */
4188438ced17SVaibhav Nagarnaik unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
41897a8e76a3SSteven Rostedt {
4190438ced17SVaibhav Nagarnaik 	/*
4191438ced17SVaibhav Nagarnaik 	 * Earlier, this method returned
4192438ced17SVaibhav Nagarnaik 	 *	BUF_PAGE_SIZE * buffer->nr_pages
4193438ced17SVaibhav Nagarnaik 	 * Since the nr_pages field is now removed, we have converted this to
4194438ced17SVaibhav Nagarnaik 	 * return the per cpu buffer value.
4195438ced17SVaibhav Nagarnaik 	 */
4196438ced17SVaibhav Nagarnaik 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
4197438ced17SVaibhav Nagarnaik 		return 0;
4198438ced17SVaibhav Nagarnaik 
4199438ced17SVaibhav Nagarnaik 	return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
42007a8e76a3SSteven Rostedt }
4201c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_size);
42027a8e76a3SSteven Rostedt 
42037a8e76a3SSteven Rostedt static void
42047a8e76a3SSteven Rostedt rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
42057a8e76a3SSteven Rostedt {
420677ae365eSSteven Rostedt 	rb_head_page_deactivate(cpu_buffer);
420777ae365eSSteven Rostedt 
42087a8e76a3SSteven Rostedt 	cpu_buffer->head_page
42093adc54faSSteven Rostedt 		= list_entry(cpu_buffer->pages, struct buffer_page, list);
4210bf41a158SSteven Rostedt 	local_set(&cpu_buffer->head_page->write, 0);
4211778c55d4SSteven Rostedt 	local_set(&cpu_buffer->head_page->entries, 0);
4212abc9b56dSSteven Rostedt 	local_set(&cpu_buffer->head_page->page->commit, 0);
42137a8e76a3SSteven Rostedt 
42146f807acdSSteven Rostedt 	cpu_buffer->head_page->read = 0;
4215bf41a158SSteven Rostedt 
4216bf41a158SSteven Rostedt 	cpu_buffer->tail_page = cpu_buffer->head_page;
4217bf41a158SSteven Rostedt 	cpu_buffer->commit_page = cpu_buffer->head_page;
4218bf41a158SSteven Rostedt 
4219bf41a158SSteven Rostedt 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
42205040b4b7SVaibhav Nagarnaik 	INIT_LIST_HEAD(&cpu_buffer->new_pages);
4221bf41a158SSteven Rostedt 	local_set(&cpu_buffer->reader_page->write, 0);
4222778c55d4SSteven Rostedt 	local_set(&cpu_buffer->reader_page->entries, 0);
4223abc9b56dSSteven Rostedt 	local_set(&cpu_buffer->reader_page->page->commit, 0);
42246f807acdSSteven Rostedt 	cpu_buffer->reader_page->read = 0;
4225d769041fSSteven Rostedt 
4226c64e148aSVaibhav Nagarnaik 	local_set(&cpu_buffer->entries_bytes, 0);
422777ae365eSSteven Rostedt 	local_set(&cpu_buffer->overrun, 0);
4228884bfe89SSlava Pestov 	local_set(&cpu_buffer->commit_overrun, 0);
4229884bfe89SSlava Pestov 	local_set(&cpu_buffer->dropped_events, 0);
4230e4906effSSteven Rostedt 	local_set(&cpu_buffer->entries, 0);
4231fa743953SSteven Rostedt 	local_set(&cpu_buffer->committing, 0);
4232fa743953SSteven Rostedt 	local_set(&cpu_buffer->commits, 0);
423377ae365eSSteven Rostedt 	cpu_buffer->read = 0;
4234c64e148aSVaibhav Nagarnaik 	cpu_buffer->read_bytes = 0;
423569507c06SSteven Rostedt 
423669507c06SSteven Rostedt 	cpu_buffer->write_stamp = 0;
423769507c06SSteven Rostedt 	cpu_buffer->read_stamp = 0;
423877ae365eSSteven Rostedt 
423966a8cb95SSteven Rostedt 	cpu_buffer->lost_events = 0;
424066a8cb95SSteven Rostedt 	cpu_buffer->last_overrun = 0;
424166a8cb95SSteven Rostedt 
424277ae365eSSteven Rostedt 	rb_head_page_activate(cpu_buffer);
42437a8e76a3SSteven Rostedt }
42447a8e76a3SSteven Rostedt 
42457a8e76a3SSteven Rostedt /**
42467a8e76a3SSteven Rostedt  * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
42477a8e76a3SSteven Rostedt  * @buffer: The ring buffer to reset a per cpu buffer of
42487a8e76a3SSteven Rostedt  * @cpu: The CPU buffer to be reset
42497a8e76a3SSteven Rostedt  */
42507a8e76a3SSteven Rostedt void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
42517a8e76a3SSteven Rostedt {
42527a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
42537a8e76a3SSteven Rostedt 	unsigned long flags;
42547a8e76a3SSteven Rostedt 
42559e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
42568aabee57SSteven Rostedt 		return;
42577a8e76a3SSteven Rostedt 
425883f40318SVaibhav Nagarnaik 	atomic_inc(&buffer->resize_disabled);
425941ede23eSSteven Rostedt 	atomic_inc(&cpu_buffer->record_disabled);
426041ede23eSSteven Rostedt 
426183f40318SVaibhav Nagarnaik 	/* Make sure all commits have finished */
426283f40318SVaibhav Nagarnaik 	synchronize_sched();
426383f40318SVaibhav Nagarnaik 
42645389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4265f83c9d0fSSteven Rostedt 
426641b6a95dSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
426741b6a95dSSteven Rostedt 		goto out;
426841b6a95dSSteven Rostedt 
42690199c4e6SThomas Gleixner 	arch_spin_lock(&cpu_buffer->lock);
42707a8e76a3SSteven Rostedt 
42717a8e76a3SSteven Rostedt 	rb_reset_cpu(cpu_buffer);
42727a8e76a3SSteven Rostedt 
42730199c4e6SThomas Gleixner 	arch_spin_unlock(&cpu_buffer->lock);
4274f83c9d0fSSteven Rostedt 
427541b6a95dSSteven Rostedt  out:
42765389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
427741ede23eSSteven Rostedt 
427841ede23eSSteven Rostedt 	atomic_dec(&cpu_buffer->record_disabled);
427983f40318SVaibhav Nagarnaik 	atomic_dec(&buffer->resize_disabled);
42807a8e76a3SSteven Rostedt }
4281c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
42827a8e76a3SSteven Rostedt 
42837a8e76a3SSteven Rostedt /**
42847a8e76a3SSteven Rostedt  * ring_buffer_reset - reset a ring buffer
42857a8e76a3SSteven Rostedt  * @buffer: The ring buffer to reset all cpu buffers
42867a8e76a3SSteven Rostedt  */
42877a8e76a3SSteven Rostedt void ring_buffer_reset(struct ring_buffer *buffer)
42887a8e76a3SSteven Rostedt {
42897a8e76a3SSteven Rostedt 	int cpu;
42907a8e76a3SSteven Rostedt 
42917a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu)
4292d769041fSSteven Rostedt 		ring_buffer_reset_cpu(buffer, cpu);
42937a8e76a3SSteven Rostedt }
4294c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_reset);
42957a8e76a3SSteven Rostedt 
42967a8e76a3SSteven Rostedt /**
42977a8e76a3SSteven Rostedt  * rind_buffer_empty - is the ring buffer empty?
42987a8e76a3SSteven Rostedt  * @buffer: The ring buffer to test
42997a8e76a3SSteven Rostedt  */
43003d4e204dSYaowei Bai bool ring_buffer_empty(struct ring_buffer *buffer)
43017a8e76a3SSteven Rostedt {
43027a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
4303d4788207SSteven Rostedt 	unsigned long flags;
4304289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
43057a8e76a3SSteven Rostedt 	int cpu;
4306d4788207SSteven Rostedt 	int ret;
43077a8e76a3SSteven Rostedt 
43087a8e76a3SSteven Rostedt 	/* yes this is racy, but if you don't like the race, lock the buffer */
43097a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
43107a8e76a3SSteven Rostedt 		cpu_buffer = buffer->buffers[cpu];
43118d707e8eSSteven Rostedt 		local_irq_save(flags);
4312289a5a25SSteven Rostedt (Red Hat) 		dolock = rb_reader_lock(cpu_buffer);
4313d4788207SSteven Rostedt 		ret = rb_per_cpu_empty(cpu_buffer);
4314289a5a25SSteven Rostedt (Red Hat) 		rb_reader_unlock(cpu_buffer, dolock);
43158d707e8eSSteven Rostedt 		local_irq_restore(flags);
43168d707e8eSSteven Rostedt 
4317d4788207SSteven Rostedt 		if (!ret)
43183d4e204dSYaowei Bai 			return false;
43197a8e76a3SSteven Rostedt 	}
4320554f786eSSteven Rostedt 
43213d4e204dSYaowei Bai 	return true;
43227a8e76a3SSteven Rostedt }
4323c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_empty);
43247a8e76a3SSteven Rostedt 
43257a8e76a3SSteven Rostedt /**
43267a8e76a3SSteven Rostedt  * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
43277a8e76a3SSteven Rostedt  * @buffer: The ring buffer
43287a8e76a3SSteven Rostedt  * @cpu: The CPU buffer to test
43297a8e76a3SSteven Rostedt  */
43303d4e204dSYaowei Bai bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
43317a8e76a3SSteven Rostedt {
43327a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
4333d4788207SSteven Rostedt 	unsigned long flags;
4334289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
43358aabee57SSteven Rostedt 	int ret;
43367a8e76a3SSteven Rostedt 
43379e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
43383d4e204dSYaowei Bai 		return true;
43397a8e76a3SSteven Rostedt 
43407a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
43418d707e8eSSteven Rostedt 	local_irq_save(flags);
4342289a5a25SSteven Rostedt (Red Hat) 	dolock = rb_reader_lock(cpu_buffer);
4343554f786eSSteven Rostedt 	ret = rb_per_cpu_empty(cpu_buffer);
4344289a5a25SSteven Rostedt (Red Hat) 	rb_reader_unlock(cpu_buffer, dolock);
43458d707e8eSSteven Rostedt 	local_irq_restore(flags);
4346554f786eSSteven Rostedt 
4347554f786eSSteven Rostedt 	return ret;
43487a8e76a3SSteven Rostedt }
4349c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
43507a8e76a3SSteven Rostedt 
435185bac32cSSteven Rostedt #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
43527a8e76a3SSteven Rostedt /**
43537a8e76a3SSteven Rostedt  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
43547a8e76a3SSteven Rostedt  * @buffer_a: One buffer to swap with
43557a8e76a3SSteven Rostedt  * @buffer_b: The other buffer to swap with
43567a8e76a3SSteven Rostedt  *
43577a8e76a3SSteven Rostedt  * This function is useful for tracers that want to take a "snapshot"
43587a8e76a3SSteven Rostedt  * of a CPU buffer and has another back up buffer lying around.
43597a8e76a3SSteven Rostedt  * it is expected that the tracer handles the cpu buffer not being
43607a8e76a3SSteven Rostedt  * used at the moment.
43617a8e76a3SSteven Rostedt  */
43627a8e76a3SSteven Rostedt int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
43637a8e76a3SSteven Rostedt 			 struct ring_buffer *buffer_b, int cpu)
43647a8e76a3SSteven Rostedt {
43657a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer_a;
43667a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer_b;
4367554f786eSSteven Rostedt 	int ret = -EINVAL;
4368554f786eSSteven Rostedt 
43699e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
43709e01c1b7SRusty Russell 	    !cpumask_test_cpu(cpu, buffer_b->cpumask))
4371554f786eSSteven Rostedt 		goto out;
43727a8e76a3SSteven Rostedt 
4373438ced17SVaibhav Nagarnaik 	cpu_buffer_a = buffer_a->buffers[cpu];
4374438ced17SVaibhav Nagarnaik 	cpu_buffer_b = buffer_b->buffers[cpu];
4375438ced17SVaibhav Nagarnaik 
43767a8e76a3SSteven Rostedt 	/* At least make sure the two buffers are somewhat the same */
4377438ced17SVaibhav Nagarnaik 	if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4378554f786eSSteven Rostedt 		goto out;
4379554f786eSSteven Rostedt 
4380554f786eSSteven Rostedt 	ret = -EAGAIN;
43817a8e76a3SSteven Rostedt 
438297b17efeSSteven Rostedt 	if (atomic_read(&buffer_a->record_disabled))
4383554f786eSSteven Rostedt 		goto out;
438497b17efeSSteven Rostedt 
438597b17efeSSteven Rostedt 	if (atomic_read(&buffer_b->record_disabled))
4386554f786eSSteven Rostedt 		goto out;
438797b17efeSSteven Rostedt 
438897b17efeSSteven Rostedt 	if (atomic_read(&cpu_buffer_a->record_disabled))
4389554f786eSSteven Rostedt 		goto out;
439097b17efeSSteven Rostedt 
439197b17efeSSteven Rostedt 	if (atomic_read(&cpu_buffer_b->record_disabled))
4392554f786eSSteven Rostedt 		goto out;
439397b17efeSSteven Rostedt 
43947a8e76a3SSteven Rostedt 	/*
43957a8e76a3SSteven Rostedt 	 * We can't do a synchronize_sched here because this
43967a8e76a3SSteven Rostedt 	 * function can be called in atomic context.
43977a8e76a3SSteven Rostedt 	 * Normally this will be called from the same CPU as cpu.
43987a8e76a3SSteven Rostedt 	 * If not it's up to the caller to protect this.
43997a8e76a3SSteven Rostedt 	 */
44007a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer_a->record_disabled);
44017a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer_b->record_disabled);
44027a8e76a3SSteven Rostedt 
440398277991SSteven Rostedt 	ret = -EBUSY;
440498277991SSteven Rostedt 	if (local_read(&cpu_buffer_a->committing))
440598277991SSteven Rostedt 		goto out_dec;
440698277991SSteven Rostedt 	if (local_read(&cpu_buffer_b->committing))
440798277991SSteven Rostedt 		goto out_dec;
440898277991SSteven Rostedt 
44097a8e76a3SSteven Rostedt 	buffer_a->buffers[cpu] = cpu_buffer_b;
44107a8e76a3SSteven Rostedt 	buffer_b->buffers[cpu] = cpu_buffer_a;
44117a8e76a3SSteven Rostedt 
44127a8e76a3SSteven Rostedt 	cpu_buffer_b->buffer = buffer_a;
44137a8e76a3SSteven Rostedt 	cpu_buffer_a->buffer = buffer_b;
44147a8e76a3SSteven Rostedt 
441598277991SSteven Rostedt 	ret = 0;
441698277991SSteven Rostedt 
441798277991SSteven Rostedt out_dec:
44187a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer_a->record_disabled);
44197a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer_b->record_disabled);
4420554f786eSSteven Rostedt out:
4421554f786eSSteven Rostedt 	return ret;
44227a8e76a3SSteven Rostedt }
4423c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
442485bac32cSSteven Rostedt #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
44257a8e76a3SSteven Rostedt 
44268789a9e7SSteven Rostedt /**
44278789a9e7SSteven Rostedt  * ring_buffer_alloc_read_page - allocate a page to read from buffer
44288789a9e7SSteven Rostedt  * @buffer: the buffer to allocate for.
4429d611851bSzhangwei(Jovi)  * @cpu: the cpu buffer to allocate.
44308789a9e7SSteven Rostedt  *
44318789a9e7SSteven Rostedt  * This function is used in conjunction with ring_buffer_read_page.
44328789a9e7SSteven Rostedt  * When reading a full page from the ring buffer, these functions
44338789a9e7SSteven Rostedt  * can be used to speed up the process. The calling function should
44348789a9e7SSteven Rostedt  * allocate a few pages first with this function. Then when it
44358789a9e7SSteven Rostedt  * needs to get pages from the ring buffer, it passes the result
44368789a9e7SSteven Rostedt  * of this function into ring_buffer_read_page, which will swap
44378789a9e7SSteven Rostedt  * the page that was allocated, with the read page of the buffer.
44388789a9e7SSteven Rostedt  *
44398789a9e7SSteven Rostedt  * Returns:
4440a7e52ad7SSteven Rostedt (VMware)  *  The page allocated, or ERR_PTR
44418789a9e7SSteven Rostedt  */
44427ea59064SVaibhav Nagarnaik void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
44438789a9e7SSteven Rostedt {
4444a7e52ad7SSteven Rostedt (VMware) 	struct ring_buffer_per_cpu *cpu_buffer;
444573a757e6SSteven Rostedt (VMware) 	struct buffer_data_page *bpage = NULL;
444673a757e6SSteven Rostedt (VMware) 	unsigned long flags;
44477ea59064SVaibhav Nagarnaik 	struct page *page;
44488789a9e7SSteven Rostedt 
4449a7e52ad7SSteven Rostedt (VMware) 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
4450a7e52ad7SSteven Rostedt (VMware) 		return ERR_PTR(-ENODEV);
4451a7e52ad7SSteven Rostedt (VMware) 
4452a7e52ad7SSteven Rostedt (VMware) 	cpu_buffer = buffer->buffers[cpu];
445373a757e6SSteven Rostedt (VMware) 	local_irq_save(flags);
445473a757e6SSteven Rostedt (VMware) 	arch_spin_lock(&cpu_buffer->lock);
445573a757e6SSteven Rostedt (VMware) 
445673a757e6SSteven Rostedt (VMware) 	if (cpu_buffer->free_page) {
445773a757e6SSteven Rostedt (VMware) 		bpage = cpu_buffer->free_page;
445873a757e6SSteven Rostedt (VMware) 		cpu_buffer->free_page = NULL;
445973a757e6SSteven Rostedt (VMware) 	}
446073a757e6SSteven Rostedt (VMware) 
446173a757e6SSteven Rostedt (VMware) 	arch_spin_unlock(&cpu_buffer->lock);
446273a757e6SSteven Rostedt (VMware) 	local_irq_restore(flags);
446373a757e6SSteven Rostedt (VMware) 
446473a757e6SSteven Rostedt (VMware) 	if (bpage)
446573a757e6SSteven Rostedt (VMware) 		goto out;
446673a757e6SSteven Rostedt (VMware) 
4467d7ec4bfeSVaibhav Nagarnaik 	page = alloc_pages_node(cpu_to_node(cpu),
4468d7ec4bfeSVaibhav Nagarnaik 				GFP_KERNEL | __GFP_NORETRY, 0);
44697ea59064SVaibhav Nagarnaik 	if (!page)
4470a7e52ad7SSteven Rostedt (VMware) 		return ERR_PTR(-ENOMEM);
44718789a9e7SSteven Rostedt 
44727ea59064SVaibhav Nagarnaik 	bpage = page_address(page);
44738789a9e7SSteven Rostedt 
447473a757e6SSteven Rostedt (VMware)  out:
4475ef7a4a16SSteven Rostedt 	rb_init_page(bpage);
4476ef7a4a16SSteven Rostedt 
4477044fa782SSteven Rostedt 	return bpage;
44788789a9e7SSteven Rostedt }
4479d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
44808789a9e7SSteven Rostedt 
44818789a9e7SSteven Rostedt /**
44828789a9e7SSteven Rostedt  * ring_buffer_free_read_page - free an allocated read page
44838789a9e7SSteven Rostedt  * @buffer: the buffer the page was allocate for
448473a757e6SSteven Rostedt (VMware)  * @cpu: the cpu buffer the page came from
44858789a9e7SSteven Rostedt  * @data: the page to free
44868789a9e7SSteven Rostedt  *
44878789a9e7SSteven Rostedt  * Free a page allocated from ring_buffer_alloc_read_page.
44888789a9e7SSteven Rostedt  */
448973a757e6SSteven Rostedt (VMware) void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
44908789a9e7SSteven Rostedt {
449173a757e6SSteven Rostedt (VMware) 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
449273a757e6SSteven Rostedt (VMware) 	struct buffer_data_page *bpage = data;
4493ae415fa4SSteven Rostedt (VMware) 	struct page *page = virt_to_page(bpage);
449473a757e6SSteven Rostedt (VMware) 	unsigned long flags;
449573a757e6SSteven Rostedt (VMware) 
4496ae415fa4SSteven Rostedt (VMware) 	/* If the page is still in use someplace else, we can't reuse it */
4497ae415fa4SSteven Rostedt (VMware) 	if (page_ref_count(page) > 1)
4498ae415fa4SSteven Rostedt (VMware) 		goto out;
4499ae415fa4SSteven Rostedt (VMware) 
450073a757e6SSteven Rostedt (VMware) 	local_irq_save(flags);
450173a757e6SSteven Rostedt (VMware) 	arch_spin_lock(&cpu_buffer->lock);
450273a757e6SSteven Rostedt (VMware) 
450373a757e6SSteven Rostedt (VMware) 	if (!cpu_buffer->free_page) {
450473a757e6SSteven Rostedt (VMware) 		cpu_buffer->free_page = bpage;
450573a757e6SSteven Rostedt (VMware) 		bpage = NULL;
450673a757e6SSteven Rostedt (VMware) 	}
450773a757e6SSteven Rostedt (VMware) 
450873a757e6SSteven Rostedt (VMware) 	arch_spin_unlock(&cpu_buffer->lock);
450973a757e6SSteven Rostedt (VMware) 	local_irq_restore(flags);
451073a757e6SSteven Rostedt (VMware) 
4511ae415fa4SSteven Rostedt (VMware)  out:
451273a757e6SSteven Rostedt (VMware) 	free_page((unsigned long)bpage);
45138789a9e7SSteven Rostedt }
4514d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
45158789a9e7SSteven Rostedt 
45168789a9e7SSteven Rostedt /**
45178789a9e7SSteven Rostedt  * ring_buffer_read_page - extract a page from the ring buffer
45188789a9e7SSteven Rostedt  * @buffer: buffer to extract from
45198789a9e7SSteven Rostedt  * @data_page: the page to use allocated from ring_buffer_alloc_read_page
4520ef7a4a16SSteven Rostedt  * @len: amount to extract
45218789a9e7SSteven Rostedt  * @cpu: the cpu of the buffer to extract
45228789a9e7SSteven Rostedt  * @full: should the extraction only happen when the page is full.
45238789a9e7SSteven Rostedt  *
45248789a9e7SSteven Rostedt  * This function will pull out a page from the ring buffer and consume it.
45258789a9e7SSteven Rostedt  * @data_page must be the address of the variable that was returned
45268789a9e7SSteven Rostedt  * from ring_buffer_alloc_read_page. This is because the page might be used
45278789a9e7SSteven Rostedt  * to swap with a page in the ring buffer.
45288789a9e7SSteven Rostedt  *
45298789a9e7SSteven Rostedt  * for example:
4530d611851bSzhangwei(Jovi)  *	rpage = ring_buffer_alloc_read_page(buffer, cpu);
4531a7e52ad7SSteven Rostedt (VMware)  *	if (IS_ERR(rpage))
4532a7e52ad7SSteven Rostedt (VMware)  *		return PTR_ERR(rpage);
4533ef7a4a16SSteven Rostedt  *	ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
4534667d2412SLai Jiangshan  *	if (ret >= 0)
4535667d2412SLai Jiangshan  *		process_page(rpage, ret);
45368789a9e7SSteven Rostedt  *
45378789a9e7SSteven Rostedt  * When @full is set, the function will not return true unless
45388789a9e7SSteven Rostedt  * the writer is off the reader page.
45398789a9e7SSteven Rostedt  *
45408789a9e7SSteven Rostedt  * Note: it is up to the calling functions to handle sleeps and wakeups.
45418789a9e7SSteven Rostedt  *  The ring buffer can be used anywhere in the kernel and can not
45428789a9e7SSteven Rostedt  *  blindly call wake_up. The layer that uses the ring buffer must be
45438789a9e7SSteven Rostedt  *  responsible for that.
45448789a9e7SSteven Rostedt  *
45458789a9e7SSteven Rostedt  * Returns:
4546667d2412SLai Jiangshan  *  >=0 if data has been transferred, returns the offset of consumed data.
4547667d2412SLai Jiangshan  *  <0 if no data has been transferred.
45488789a9e7SSteven Rostedt  */
45498789a9e7SSteven Rostedt int ring_buffer_read_page(struct ring_buffer *buffer,
4550ef7a4a16SSteven Rostedt 			  void **data_page, size_t len, int cpu, int full)
45518789a9e7SSteven Rostedt {
45528789a9e7SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
45538789a9e7SSteven Rostedt 	struct ring_buffer_event *event;
4554044fa782SSteven Rostedt 	struct buffer_data_page *bpage;
4555ef7a4a16SSteven Rostedt 	struct buffer_page *reader;
4556ff0ff84aSSteven Rostedt 	unsigned long missed_events;
45578789a9e7SSteven Rostedt 	unsigned long flags;
4558ef7a4a16SSteven Rostedt 	unsigned int commit;
4559667d2412SLai Jiangshan 	unsigned int read;
45604f3640f8SSteven Rostedt 	u64 save_timestamp;
4561667d2412SLai Jiangshan 	int ret = -1;
45628789a9e7SSteven Rostedt 
4563554f786eSSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
4564554f786eSSteven Rostedt 		goto out;
4565554f786eSSteven Rostedt 
4566474d32b6SSteven Rostedt 	/*
4567474d32b6SSteven Rostedt 	 * If len is not big enough to hold the page header, then
4568474d32b6SSteven Rostedt 	 * we can not copy anything.
4569474d32b6SSteven Rostedt 	 */
4570474d32b6SSteven Rostedt 	if (len <= BUF_PAGE_HDR_SIZE)
4571554f786eSSteven Rostedt 		goto out;
4572474d32b6SSteven Rostedt 
4573474d32b6SSteven Rostedt 	len -= BUF_PAGE_HDR_SIZE;
4574474d32b6SSteven Rostedt 
45758789a9e7SSteven Rostedt 	if (!data_page)
4576554f786eSSteven Rostedt 		goto out;
45778789a9e7SSteven Rostedt 
4578044fa782SSteven Rostedt 	bpage = *data_page;
4579044fa782SSteven Rostedt 	if (!bpage)
4580554f786eSSteven Rostedt 		goto out;
45818789a9e7SSteven Rostedt 
45825389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
45838789a9e7SSteven Rostedt 
4584ef7a4a16SSteven Rostedt 	reader = rb_get_reader_page(cpu_buffer);
4585ef7a4a16SSteven Rostedt 	if (!reader)
4586554f786eSSteven Rostedt 		goto out_unlock;
45878789a9e7SSteven Rostedt 
4588ef7a4a16SSteven Rostedt 	event = rb_reader_event(cpu_buffer);
4589667d2412SLai Jiangshan 
4590ef7a4a16SSteven Rostedt 	read = reader->read;
4591ef7a4a16SSteven Rostedt 	commit = rb_page_commit(reader);
4592ef7a4a16SSteven Rostedt 
459366a8cb95SSteven Rostedt 	/* Check if any events were dropped */
4594ff0ff84aSSteven Rostedt 	missed_events = cpu_buffer->lost_events;
459566a8cb95SSteven Rostedt 
45968789a9e7SSteven Rostedt 	/*
4597474d32b6SSteven Rostedt 	 * If this page has been partially read or
4598474d32b6SSteven Rostedt 	 * if len is not big enough to read the rest of the page or
4599474d32b6SSteven Rostedt 	 * a writer is still on the page, then
4600474d32b6SSteven Rostedt 	 * we must copy the data from the page to the buffer.
4601474d32b6SSteven Rostedt 	 * Otherwise, we can simply swap the page with the one passed in.
46028789a9e7SSteven Rostedt 	 */
4603474d32b6SSteven Rostedt 	if (read || (len < (commit - read)) ||
4604ef7a4a16SSteven Rostedt 	    cpu_buffer->reader_page == cpu_buffer->commit_page) {
4605667d2412SLai Jiangshan 		struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4606474d32b6SSteven Rostedt 		unsigned int rpos = read;
4607474d32b6SSteven Rostedt 		unsigned int pos = 0;
4608ef7a4a16SSteven Rostedt 		unsigned int size;
46098789a9e7SSteven Rostedt 
46108789a9e7SSteven Rostedt 		if (full)
4611554f786eSSteven Rostedt 			goto out_unlock;
46128789a9e7SSteven Rostedt 
4613ef7a4a16SSteven Rostedt 		if (len > (commit - read))
4614ef7a4a16SSteven Rostedt 			len = (commit - read);
4615ef7a4a16SSteven Rostedt 
461669d1b839SSteven Rostedt 		/* Always keep the time extend and data together */
461769d1b839SSteven Rostedt 		size = rb_event_ts_length(event);
4618ef7a4a16SSteven Rostedt 
4619ef7a4a16SSteven Rostedt 		if (len < size)
4620554f786eSSteven Rostedt 			goto out_unlock;
4621ef7a4a16SSteven Rostedt 
46224f3640f8SSteven Rostedt 		/* save the current timestamp, since the user will need it */
46234f3640f8SSteven Rostedt 		save_timestamp = cpu_buffer->read_stamp;
46244f3640f8SSteven Rostedt 
4625ef7a4a16SSteven Rostedt 		/* Need to copy one event at a time */
4626ef7a4a16SSteven Rostedt 		do {
4627e1e35927SDavid Sharp 			/* We need the size of one event, because
4628e1e35927SDavid Sharp 			 * rb_advance_reader only advances by one event,
4629e1e35927SDavid Sharp 			 * whereas rb_event_ts_length may include the size of
4630e1e35927SDavid Sharp 			 * one or two events.
4631e1e35927SDavid Sharp 			 * We have already ensured there's enough space if this
4632e1e35927SDavid Sharp 			 * is a time extend. */
4633e1e35927SDavid Sharp 			size = rb_event_length(event);
4634474d32b6SSteven Rostedt 			memcpy(bpage->data + pos, rpage->data + rpos, size);
4635ef7a4a16SSteven Rostedt 
4636ef7a4a16SSteven Rostedt 			len -= size;
4637ef7a4a16SSteven Rostedt 
4638ef7a4a16SSteven Rostedt 			rb_advance_reader(cpu_buffer);
4639474d32b6SSteven Rostedt 			rpos = reader->read;
4640474d32b6SSteven Rostedt 			pos += size;
4641ef7a4a16SSteven Rostedt 
464218fab912SHuang Ying 			if (rpos >= commit)
464318fab912SHuang Ying 				break;
464418fab912SHuang Ying 
4645ef7a4a16SSteven Rostedt 			event = rb_reader_event(cpu_buffer);
464669d1b839SSteven Rostedt 			/* Always keep the time extend and data together */
464769d1b839SSteven Rostedt 			size = rb_event_ts_length(event);
4648e1e35927SDavid Sharp 		} while (len >= size);
4649667d2412SLai Jiangshan 
4650667d2412SLai Jiangshan 		/* update bpage */
4651ef7a4a16SSteven Rostedt 		local_set(&bpage->commit, pos);
46524f3640f8SSteven Rostedt 		bpage->time_stamp = save_timestamp;
4653ef7a4a16SSteven Rostedt 
4654474d32b6SSteven Rostedt 		/* we copied everything to the beginning */
4655474d32b6SSteven Rostedt 		read = 0;
46568789a9e7SSteven Rostedt 	} else {
4657afbab76aSSteven Rostedt 		/* update the entry counter */
465877ae365eSSteven Rostedt 		cpu_buffer->read += rb_page_entries(reader);
4659c64e148aSVaibhav Nagarnaik 		cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4660afbab76aSSteven Rostedt 
46618789a9e7SSteven Rostedt 		/* swap the pages */
4662044fa782SSteven Rostedt 		rb_init_page(bpage);
4663ef7a4a16SSteven Rostedt 		bpage = reader->page;
4664ef7a4a16SSteven Rostedt 		reader->page = *data_page;
4665ef7a4a16SSteven Rostedt 		local_set(&reader->write, 0);
4666778c55d4SSteven Rostedt 		local_set(&reader->entries, 0);
4667ef7a4a16SSteven Rostedt 		reader->read = 0;
4668044fa782SSteven Rostedt 		*data_page = bpage;
4669ff0ff84aSSteven Rostedt 
4670ff0ff84aSSteven Rostedt 		/*
4671ff0ff84aSSteven Rostedt 		 * Use the real_end for the data size,
4672ff0ff84aSSteven Rostedt 		 * This gives us a chance to store the lost events
4673ff0ff84aSSteven Rostedt 		 * on the page.
4674ff0ff84aSSteven Rostedt 		 */
4675ff0ff84aSSteven Rostedt 		if (reader->real_end)
4676ff0ff84aSSteven Rostedt 			local_set(&bpage->commit, reader->real_end);
4677ef7a4a16SSteven Rostedt 	}
4678ef7a4a16SSteven Rostedt 	ret = read;
4679ef7a4a16SSteven Rostedt 
468066a8cb95SSteven Rostedt 	cpu_buffer->lost_events = 0;
46812711ca23SSteven Rostedt 
46822711ca23SSteven Rostedt 	commit = local_read(&bpage->commit);
468366a8cb95SSteven Rostedt 	/*
468466a8cb95SSteven Rostedt 	 * Set a flag in the commit field if we lost events
468566a8cb95SSteven Rostedt 	 */
4686ff0ff84aSSteven Rostedt 	if (missed_events) {
4687ff0ff84aSSteven Rostedt 		/* If there is room at the end of the page to save the
4688ff0ff84aSSteven Rostedt 		 * missed events, then record it there.
4689ff0ff84aSSteven Rostedt 		 */
4690ff0ff84aSSteven Rostedt 		if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4691ff0ff84aSSteven Rostedt 			memcpy(&bpage->data[commit], &missed_events,
4692ff0ff84aSSteven Rostedt 			       sizeof(missed_events));
4693ff0ff84aSSteven Rostedt 			local_add(RB_MISSED_STORED, &bpage->commit);
46942711ca23SSteven Rostedt 			commit += sizeof(missed_events);
4695ff0ff84aSSteven Rostedt 		}
469666a8cb95SSteven Rostedt 		local_add(RB_MISSED_EVENTS, &bpage->commit);
4697ff0ff84aSSteven Rostedt 	}
469866a8cb95SSteven Rostedt 
46992711ca23SSteven Rostedt 	/*
47002711ca23SSteven Rostedt 	 * This page may be off to user land. Zero it out here.
47012711ca23SSteven Rostedt 	 */
47022711ca23SSteven Rostedt 	if (commit < BUF_PAGE_SIZE)
47032711ca23SSteven Rostedt 		memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
47042711ca23SSteven Rostedt 
4705554f786eSSteven Rostedt  out_unlock:
47065389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
47078789a9e7SSteven Rostedt 
4708554f786eSSteven Rostedt  out:
47098789a9e7SSteven Rostedt 	return ret;
47108789a9e7SSteven Rostedt }
4711d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_read_page);
47128789a9e7SSteven Rostedt 
4713b32614c0SSebastian Andrzej Siewior /*
4714b32614c0SSebastian Andrzej Siewior  * We only allocate new buffers, never free them if the CPU goes down.
4715b32614c0SSebastian Andrzej Siewior  * If we were to free the buffer, then the user would lose any trace that was in
4716b32614c0SSebastian Andrzej Siewior  * the buffer.
4717b32614c0SSebastian Andrzej Siewior  */
4718b32614c0SSebastian Andrzej Siewior int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
4719554f786eSSteven Rostedt {
4720b32614c0SSebastian Andrzej Siewior 	struct ring_buffer *buffer;
47219b94a8fbSSteven Rostedt (Red Hat) 	long nr_pages_same;
47229b94a8fbSSteven Rostedt (Red Hat) 	int cpu_i;
47239b94a8fbSSteven Rostedt (Red Hat) 	unsigned long nr_pages;
4724554f786eSSteven Rostedt 
4725b32614c0SSebastian Andrzej Siewior 	buffer = container_of(node, struct ring_buffer, node);
47263f237a79SRusty Russell 	if (cpumask_test_cpu(cpu, buffer->cpumask))
4727b32614c0SSebastian Andrzej Siewior 		return 0;
4728554f786eSSteven Rostedt 
4729438ced17SVaibhav Nagarnaik 	nr_pages = 0;
4730438ced17SVaibhav Nagarnaik 	nr_pages_same = 1;
4731438ced17SVaibhav Nagarnaik 	/* check if all cpu sizes are same */
4732438ced17SVaibhav Nagarnaik 	for_each_buffer_cpu(buffer, cpu_i) {
4733438ced17SVaibhav Nagarnaik 		/* fill in the size from first enabled cpu */
4734438ced17SVaibhav Nagarnaik 		if (nr_pages == 0)
4735438ced17SVaibhav Nagarnaik 			nr_pages = buffer->buffers[cpu_i]->nr_pages;
4736438ced17SVaibhav Nagarnaik 		if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4737438ced17SVaibhav Nagarnaik 			nr_pages_same = 0;
4738438ced17SVaibhav Nagarnaik 			break;
4739438ced17SVaibhav Nagarnaik 		}
4740438ced17SVaibhav Nagarnaik 	}
4741438ced17SVaibhav Nagarnaik 	/* allocate minimum pages, user can later expand it */
4742438ced17SVaibhav Nagarnaik 	if (!nr_pages_same)
4743438ced17SVaibhav Nagarnaik 		nr_pages = 2;
4744554f786eSSteven Rostedt 	buffer->buffers[cpu] =
4745438ced17SVaibhav Nagarnaik 		rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4746554f786eSSteven Rostedt 	if (!buffer->buffers[cpu]) {
4747b32614c0SSebastian Andrzej Siewior 		WARN(1, "failed to allocate ring buffer on CPU %u\n",
4748554f786eSSteven Rostedt 		     cpu);
4749b32614c0SSebastian Andrzej Siewior 		return -ENOMEM;
4750554f786eSSteven Rostedt 	}
4751554f786eSSteven Rostedt 	smp_wmb();
47523f237a79SRusty Russell 	cpumask_set_cpu(cpu, buffer->cpumask);
4753b32614c0SSebastian Andrzej Siewior 	return 0;
4754554f786eSSteven Rostedt }
47556c43e554SSteven Rostedt (Red Hat) 
47566c43e554SSteven Rostedt (Red Hat) #ifdef CONFIG_RING_BUFFER_STARTUP_TEST
47576c43e554SSteven Rostedt (Red Hat) /*
47586c43e554SSteven Rostedt (Red Hat)  * This is a basic integrity check of the ring buffer.
47596c43e554SSteven Rostedt (Red Hat)  * Late in the boot cycle this test will run when configured in.
47606c43e554SSteven Rostedt (Red Hat)  * It will kick off a thread per CPU that will go into a loop
47616c43e554SSteven Rostedt (Red Hat)  * writing to the per cpu ring buffer various sizes of data.
47626c43e554SSteven Rostedt (Red Hat)  * Some of the data will be large items, some small.
47636c43e554SSteven Rostedt (Red Hat)  *
47646c43e554SSteven Rostedt (Red Hat)  * Another thread is created that goes into a spin, sending out
47656c43e554SSteven Rostedt (Red Hat)  * IPIs to the other CPUs to also write into the ring buffer.
47666c43e554SSteven Rostedt (Red Hat)  * this is to test the nesting ability of the buffer.
47676c43e554SSteven Rostedt (Red Hat)  *
47686c43e554SSteven Rostedt (Red Hat)  * Basic stats are recorded and reported. If something in the
47696c43e554SSteven Rostedt (Red Hat)  * ring buffer should happen that's not expected, a big warning
47706c43e554SSteven Rostedt (Red Hat)  * is displayed and all ring buffers are disabled.
47716c43e554SSteven Rostedt (Red Hat)  */
47726c43e554SSteven Rostedt (Red Hat) static struct task_struct *rb_threads[NR_CPUS] __initdata;
47736c43e554SSteven Rostedt (Red Hat) 
47746c43e554SSteven Rostedt (Red Hat) struct rb_test_data {
47756c43e554SSteven Rostedt (Red Hat) 	struct ring_buffer	*buffer;
47766c43e554SSteven Rostedt (Red Hat) 	unsigned long		events;
47776c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_written;
47786c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_alloc;
47796c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_dropped;
47806c43e554SSteven Rostedt (Red Hat) 	unsigned long		events_nested;
47816c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_written_nested;
47826c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_alloc_nested;
47836c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_dropped_nested;
47846c43e554SSteven Rostedt (Red Hat) 	int			min_size_nested;
47856c43e554SSteven Rostedt (Red Hat) 	int			max_size_nested;
47866c43e554SSteven Rostedt (Red Hat) 	int			max_size;
47876c43e554SSteven Rostedt (Red Hat) 	int			min_size;
47886c43e554SSteven Rostedt (Red Hat) 	int			cpu;
47896c43e554SSteven Rostedt (Red Hat) 	int			cnt;
47906c43e554SSteven Rostedt (Red Hat) };
47916c43e554SSteven Rostedt (Red Hat) 
47926c43e554SSteven Rostedt (Red Hat) static struct rb_test_data rb_data[NR_CPUS] __initdata;
47936c43e554SSteven Rostedt (Red Hat) 
47946c43e554SSteven Rostedt (Red Hat) /* 1 meg per cpu */
47956c43e554SSteven Rostedt (Red Hat) #define RB_TEST_BUFFER_SIZE	1048576
47966c43e554SSteven Rostedt (Red Hat) 
47976c43e554SSteven Rostedt (Red Hat) static char rb_string[] __initdata =
47986c43e554SSteven Rostedt (Red Hat) 	"abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
47996c43e554SSteven Rostedt (Red Hat) 	"?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
48006c43e554SSteven Rostedt (Red Hat) 	"!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
48016c43e554SSteven Rostedt (Red Hat) 
48026c43e554SSteven Rostedt (Red Hat) static bool rb_test_started __initdata;
48036c43e554SSteven Rostedt (Red Hat) 
48046c43e554SSteven Rostedt (Red Hat) struct rb_item {
48056c43e554SSteven Rostedt (Red Hat) 	int size;
48066c43e554SSteven Rostedt (Red Hat) 	char str[];
48076c43e554SSteven Rostedt (Red Hat) };
48086c43e554SSteven Rostedt (Red Hat) 
48096c43e554SSteven Rostedt (Red Hat) static __init int rb_write_something(struct rb_test_data *data, bool nested)
48106c43e554SSteven Rostedt (Red Hat) {
48116c43e554SSteven Rostedt (Red Hat) 	struct ring_buffer_event *event;
48126c43e554SSteven Rostedt (Red Hat) 	struct rb_item *item;
48136c43e554SSteven Rostedt (Red Hat) 	bool started;
48146c43e554SSteven Rostedt (Red Hat) 	int event_len;
48156c43e554SSteven Rostedt (Red Hat) 	int size;
48166c43e554SSteven Rostedt (Red Hat) 	int len;
48176c43e554SSteven Rostedt (Red Hat) 	int cnt;
48186c43e554SSteven Rostedt (Red Hat) 
48196c43e554SSteven Rostedt (Red Hat) 	/* Have nested writes different that what is written */
48206c43e554SSteven Rostedt (Red Hat) 	cnt = data->cnt + (nested ? 27 : 0);
48216c43e554SSteven Rostedt (Red Hat) 
48226c43e554SSteven Rostedt (Red Hat) 	/* Multiply cnt by ~e, to make some unique increment */
48236c43e554SSteven Rostedt (Red Hat) 	size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
48246c43e554SSteven Rostedt (Red Hat) 
48256c43e554SSteven Rostedt (Red Hat) 	len = size + sizeof(struct rb_item);
48266c43e554SSteven Rostedt (Red Hat) 
48276c43e554SSteven Rostedt (Red Hat) 	started = rb_test_started;
48286c43e554SSteven Rostedt (Red Hat) 	/* read rb_test_started before checking buffer enabled */
48296c43e554SSteven Rostedt (Red Hat) 	smp_rmb();
48306c43e554SSteven Rostedt (Red Hat) 
48316c43e554SSteven Rostedt (Red Hat) 	event = ring_buffer_lock_reserve(data->buffer, len);
48326c43e554SSteven Rostedt (Red Hat) 	if (!event) {
48336c43e554SSteven Rostedt (Red Hat) 		/* Ignore dropped events before test starts. */
48346c43e554SSteven Rostedt (Red Hat) 		if (started) {
48356c43e554SSteven Rostedt (Red Hat) 			if (nested)
48366c43e554SSteven Rostedt (Red Hat) 				data->bytes_dropped += len;
48376c43e554SSteven Rostedt (Red Hat) 			else
48386c43e554SSteven Rostedt (Red Hat) 				data->bytes_dropped_nested += len;
48396c43e554SSteven Rostedt (Red Hat) 		}
48406c43e554SSteven Rostedt (Red Hat) 		return len;
48416c43e554SSteven Rostedt (Red Hat) 	}
48426c43e554SSteven Rostedt (Red Hat) 
48436c43e554SSteven Rostedt (Red Hat) 	event_len = ring_buffer_event_length(event);
48446c43e554SSteven Rostedt (Red Hat) 
48456c43e554SSteven Rostedt (Red Hat) 	if (RB_WARN_ON(data->buffer, event_len < len))
48466c43e554SSteven Rostedt (Red Hat) 		goto out;
48476c43e554SSteven Rostedt (Red Hat) 
48486c43e554SSteven Rostedt (Red Hat) 	item = ring_buffer_event_data(event);
48496c43e554SSteven Rostedt (Red Hat) 	item->size = size;
48506c43e554SSteven Rostedt (Red Hat) 	memcpy(item->str, rb_string, size);
48516c43e554SSteven Rostedt (Red Hat) 
48526c43e554SSteven Rostedt (Red Hat) 	if (nested) {
48536c43e554SSteven Rostedt (Red Hat) 		data->bytes_alloc_nested += event_len;
48546c43e554SSteven Rostedt (Red Hat) 		data->bytes_written_nested += len;
48556c43e554SSteven Rostedt (Red Hat) 		data->events_nested++;
48566c43e554SSteven Rostedt (Red Hat) 		if (!data->min_size_nested || len < data->min_size_nested)
48576c43e554SSteven Rostedt (Red Hat) 			data->min_size_nested = len;
48586c43e554SSteven Rostedt (Red Hat) 		if (len > data->max_size_nested)
48596c43e554SSteven Rostedt (Red Hat) 			data->max_size_nested = len;
48606c43e554SSteven Rostedt (Red Hat) 	} else {
48616c43e554SSteven Rostedt (Red Hat) 		data->bytes_alloc += event_len;
48626c43e554SSteven Rostedt (Red Hat) 		data->bytes_written += len;
48636c43e554SSteven Rostedt (Red Hat) 		data->events++;
48646c43e554SSteven Rostedt (Red Hat) 		if (!data->min_size || len < data->min_size)
48656c43e554SSteven Rostedt (Red Hat) 			data->max_size = len;
48666c43e554SSteven Rostedt (Red Hat) 		if (len > data->max_size)
48676c43e554SSteven Rostedt (Red Hat) 			data->max_size = len;
48686c43e554SSteven Rostedt (Red Hat) 	}
48696c43e554SSteven Rostedt (Red Hat) 
48706c43e554SSteven Rostedt (Red Hat)  out:
48716c43e554SSteven Rostedt (Red Hat) 	ring_buffer_unlock_commit(data->buffer, event);
48726c43e554SSteven Rostedt (Red Hat) 
48736c43e554SSteven Rostedt (Red Hat) 	return 0;
48746c43e554SSteven Rostedt (Red Hat) }
48756c43e554SSteven Rostedt (Red Hat) 
48766c43e554SSteven Rostedt (Red Hat) static __init int rb_test(void *arg)
48776c43e554SSteven Rostedt (Red Hat) {
48786c43e554SSteven Rostedt (Red Hat) 	struct rb_test_data *data = arg;
48796c43e554SSteven Rostedt (Red Hat) 
48806c43e554SSteven Rostedt (Red Hat) 	while (!kthread_should_stop()) {
48816c43e554SSteven Rostedt (Red Hat) 		rb_write_something(data, false);
48826c43e554SSteven Rostedt (Red Hat) 		data->cnt++;
48836c43e554SSteven Rostedt (Red Hat) 
48846c43e554SSteven Rostedt (Red Hat) 		set_current_state(TASK_INTERRUPTIBLE);
48856c43e554SSteven Rostedt (Red Hat) 		/* Now sleep between a min of 100-300us and a max of 1ms */
48866c43e554SSteven Rostedt (Red Hat) 		usleep_range(((data->cnt % 3) + 1) * 100, 1000);
48876c43e554SSteven Rostedt (Red Hat) 	}
48886c43e554SSteven Rostedt (Red Hat) 
48896c43e554SSteven Rostedt (Red Hat) 	return 0;
48906c43e554SSteven Rostedt (Red Hat) }
48916c43e554SSteven Rostedt (Red Hat) 
48926c43e554SSteven Rostedt (Red Hat) static __init void rb_ipi(void *ignore)
48936c43e554SSteven Rostedt (Red Hat) {
48946c43e554SSteven Rostedt (Red Hat) 	struct rb_test_data *data;
48956c43e554SSteven Rostedt (Red Hat) 	int cpu = smp_processor_id();
48966c43e554SSteven Rostedt (Red Hat) 
48976c43e554SSteven Rostedt (Red Hat) 	data = &rb_data[cpu];
48986c43e554SSteven Rostedt (Red Hat) 	rb_write_something(data, true);
48996c43e554SSteven Rostedt (Red Hat) }
49006c43e554SSteven Rostedt (Red Hat) 
49016c43e554SSteven Rostedt (Red Hat) static __init int rb_hammer_test(void *arg)
49026c43e554SSteven Rostedt (Red Hat) {
49036c43e554SSteven Rostedt (Red Hat) 	while (!kthread_should_stop()) {
49046c43e554SSteven Rostedt (Red Hat) 
49056c43e554SSteven Rostedt (Red Hat) 		/* Send an IPI to all cpus to write data! */
49066c43e554SSteven Rostedt (Red Hat) 		smp_call_function(rb_ipi, NULL, 1);
49076c43e554SSteven Rostedt (Red Hat) 		/* No sleep, but for non preempt, let others run */
49086c43e554SSteven Rostedt (Red Hat) 		schedule();
49096c43e554SSteven Rostedt (Red Hat) 	}
49106c43e554SSteven Rostedt (Red Hat) 
49116c43e554SSteven Rostedt (Red Hat) 	return 0;
49126c43e554SSteven Rostedt (Red Hat) }
49136c43e554SSteven Rostedt (Red Hat) 
49146c43e554SSteven Rostedt (Red Hat) static __init int test_ringbuffer(void)
49156c43e554SSteven Rostedt (Red Hat) {
49166c43e554SSteven Rostedt (Red Hat) 	struct task_struct *rb_hammer;
49176c43e554SSteven Rostedt (Red Hat) 	struct ring_buffer *buffer;
49186c43e554SSteven Rostedt (Red Hat) 	int cpu;
49196c43e554SSteven Rostedt (Red Hat) 	int ret = 0;
49206c43e554SSteven Rostedt (Red Hat) 
49216c43e554SSteven Rostedt (Red Hat) 	pr_info("Running ring buffer tests...\n");
49226c43e554SSteven Rostedt (Red Hat) 
49236c43e554SSteven Rostedt (Red Hat) 	buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
49246c43e554SSteven Rostedt (Red Hat) 	if (WARN_ON(!buffer))
49256c43e554SSteven Rostedt (Red Hat) 		return 0;
49266c43e554SSteven Rostedt (Red Hat) 
49276c43e554SSteven Rostedt (Red Hat) 	/* Disable buffer so that threads can't write to it yet */
49286c43e554SSteven Rostedt (Red Hat) 	ring_buffer_record_off(buffer);
49296c43e554SSteven Rostedt (Red Hat) 
49306c43e554SSteven Rostedt (Red Hat) 	for_each_online_cpu(cpu) {
49316c43e554SSteven Rostedt (Red Hat) 		rb_data[cpu].buffer = buffer;
49326c43e554SSteven Rostedt (Red Hat) 		rb_data[cpu].cpu = cpu;
49336c43e554SSteven Rostedt (Red Hat) 		rb_data[cpu].cnt = cpu;
49346c43e554SSteven Rostedt (Red Hat) 		rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
49356c43e554SSteven Rostedt (Red Hat) 						 "rbtester/%d", cpu);
493662277de7SWei Yongjun 		if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
49376c43e554SSteven Rostedt (Red Hat) 			pr_cont("FAILED\n");
493862277de7SWei Yongjun 			ret = PTR_ERR(rb_threads[cpu]);
49396c43e554SSteven Rostedt (Red Hat) 			goto out_free;
49406c43e554SSteven Rostedt (Red Hat) 		}
49416c43e554SSteven Rostedt (Red Hat) 
49426c43e554SSteven Rostedt (Red Hat) 		kthread_bind(rb_threads[cpu], cpu);
49436c43e554SSteven Rostedt (Red Hat)  		wake_up_process(rb_threads[cpu]);
49446c43e554SSteven Rostedt (Red Hat) 	}
49456c43e554SSteven Rostedt (Red Hat) 
49466c43e554SSteven Rostedt (Red Hat) 	/* Now create the rb hammer! */
49476c43e554SSteven Rostedt (Red Hat) 	rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
494862277de7SWei Yongjun 	if (WARN_ON(IS_ERR(rb_hammer))) {
49496c43e554SSteven Rostedt (Red Hat) 		pr_cont("FAILED\n");
495062277de7SWei Yongjun 		ret = PTR_ERR(rb_hammer);
49516c43e554SSteven Rostedt (Red Hat) 		goto out_free;
49526c43e554SSteven Rostedt (Red Hat) 	}
49536c43e554SSteven Rostedt (Red Hat) 
49546c43e554SSteven Rostedt (Red Hat) 	ring_buffer_record_on(buffer);
49556c43e554SSteven Rostedt (Red Hat) 	/*
49566c43e554SSteven Rostedt (Red Hat) 	 * Show buffer is enabled before setting rb_test_started.
49576c43e554SSteven Rostedt (Red Hat) 	 * Yes there's a small race window where events could be
49586c43e554SSteven Rostedt (Red Hat) 	 * dropped and the thread wont catch it. But when a ring
49596c43e554SSteven Rostedt (Red Hat) 	 * buffer gets enabled, there will always be some kind of
49606c43e554SSteven Rostedt (Red Hat) 	 * delay before other CPUs see it. Thus, we don't care about
49616c43e554SSteven Rostedt (Red Hat) 	 * those dropped events. We care about events dropped after
49626c43e554SSteven Rostedt (Red Hat) 	 * the threads see that the buffer is active.
49636c43e554SSteven Rostedt (Red Hat) 	 */
49646c43e554SSteven Rostedt (Red Hat) 	smp_wmb();
49656c43e554SSteven Rostedt (Red Hat) 	rb_test_started = true;
49666c43e554SSteven Rostedt (Red Hat) 
49676c43e554SSteven Rostedt (Red Hat) 	set_current_state(TASK_INTERRUPTIBLE);
49686c43e554SSteven Rostedt (Red Hat) 	/* Just run for 10 seconds */;
49696c43e554SSteven Rostedt (Red Hat) 	schedule_timeout(10 * HZ);
49706c43e554SSteven Rostedt (Red Hat) 
49716c43e554SSteven Rostedt (Red Hat) 	kthread_stop(rb_hammer);
49726c43e554SSteven Rostedt (Red Hat) 
49736c43e554SSteven Rostedt (Red Hat)  out_free:
49746c43e554SSteven Rostedt (Red Hat) 	for_each_online_cpu(cpu) {
49756c43e554SSteven Rostedt (Red Hat) 		if (!rb_threads[cpu])
49766c43e554SSteven Rostedt (Red Hat) 			break;
49776c43e554SSteven Rostedt (Red Hat) 		kthread_stop(rb_threads[cpu]);
49786c43e554SSteven Rostedt (Red Hat) 	}
49796c43e554SSteven Rostedt (Red Hat) 	if (ret) {
49806c43e554SSteven Rostedt (Red Hat) 		ring_buffer_free(buffer);
49816c43e554SSteven Rostedt (Red Hat) 		return ret;
49826c43e554SSteven Rostedt (Red Hat) 	}
49836c43e554SSteven Rostedt (Red Hat) 
49846c43e554SSteven Rostedt (Red Hat) 	/* Report! */
49856c43e554SSteven Rostedt (Red Hat) 	pr_info("finished\n");
49866c43e554SSteven Rostedt (Red Hat) 	for_each_online_cpu(cpu) {
49876c43e554SSteven Rostedt (Red Hat) 		struct ring_buffer_event *event;
49886c43e554SSteven Rostedt (Red Hat) 		struct rb_test_data *data = &rb_data[cpu];
49896c43e554SSteven Rostedt (Red Hat) 		struct rb_item *item;
49906c43e554SSteven Rostedt (Red Hat) 		unsigned long total_events;
49916c43e554SSteven Rostedt (Red Hat) 		unsigned long total_dropped;
49926c43e554SSteven Rostedt (Red Hat) 		unsigned long total_written;
49936c43e554SSteven Rostedt (Red Hat) 		unsigned long total_alloc;
49946c43e554SSteven Rostedt (Red Hat) 		unsigned long total_read = 0;
49956c43e554SSteven Rostedt (Red Hat) 		unsigned long total_size = 0;
49966c43e554SSteven Rostedt (Red Hat) 		unsigned long total_len = 0;
49976c43e554SSteven Rostedt (Red Hat) 		unsigned long total_lost = 0;
49986c43e554SSteven Rostedt (Red Hat) 		unsigned long lost;
49996c43e554SSteven Rostedt (Red Hat) 		int big_event_size;
50006c43e554SSteven Rostedt (Red Hat) 		int small_event_size;
50016c43e554SSteven Rostedt (Red Hat) 
50026c43e554SSteven Rostedt (Red Hat) 		ret = -1;
50036c43e554SSteven Rostedt (Red Hat) 
50046c43e554SSteven Rostedt (Red Hat) 		total_events = data->events + data->events_nested;
50056c43e554SSteven Rostedt (Red Hat) 		total_written = data->bytes_written + data->bytes_written_nested;
50066c43e554SSteven Rostedt (Red Hat) 		total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
50076c43e554SSteven Rostedt (Red Hat) 		total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
50086c43e554SSteven Rostedt (Red Hat) 
50096c43e554SSteven Rostedt (Red Hat) 		big_event_size = data->max_size + data->max_size_nested;
50106c43e554SSteven Rostedt (Red Hat) 		small_event_size = data->min_size + data->min_size_nested;
50116c43e554SSteven Rostedt (Red Hat) 
50126c43e554SSteven Rostedt (Red Hat) 		pr_info("CPU %d:\n", cpu);
50136c43e554SSteven Rostedt (Red Hat) 		pr_info("              events:    %ld\n", total_events);
50146c43e554SSteven Rostedt (Red Hat) 		pr_info("       dropped bytes:    %ld\n", total_dropped);
50156c43e554SSteven Rostedt (Red Hat) 		pr_info("       alloced bytes:    %ld\n", total_alloc);
50166c43e554SSteven Rostedt (Red Hat) 		pr_info("       written bytes:    %ld\n", total_written);
50176c43e554SSteven Rostedt (Red Hat) 		pr_info("       biggest event:    %d\n", big_event_size);
50186c43e554SSteven Rostedt (Red Hat) 		pr_info("      smallest event:    %d\n", small_event_size);
50196c43e554SSteven Rostedt (Red Hat) 
50206c43e554SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(buffer, total_dropped))
50216c43e554SSteven Rostedt (Red Hat) 			break;
50226c43e554SSteven Rostedt (Red Hat) 
50236c43e554SSteven Rostedt (Red Hat) 		ret = 0;
50246c43e554SSteven Rostedt (Red Hat) 
50256c43e554SSteven Rostedt (Red Hat) 		while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
50266c43e554SSteven Rostedt (Red Hat) 			total_lost += lost;
50276c43e554SSteven Rostedt (Red Hat) 			item = ring_buffer_event_data(event);
50286c43e554SSteven Rostedt (Red Hat) 			total_len += ring_buffer_event_length(event);
50296c43e554SSteven Rostedt (Red Hat) 			total_size += item->size + sizeof(struct rb_item);
50306c43e554SSteven Rostedt (Red Hat) 			if (memcmp(&item->str[0], rb_string, item->size) != 0) {
50316c43e554SSteven Rostedt (Red Hat) 				pr_info("FAILED!\n");
50326c43e554SSteven Rostedt (Red Hat) 				pr_info("buffer had: %.*s\n", item->size, item->str);
50336c43e554SSteven Rostedt (Red Hat) 				pr_info("expected:   %.*s\n", item->size, rb_string);
50346c43e554SSteven Rostedt (Red Hat) 				RB_WARN_ON(buffer, 1);
50356c43e554SSteven Rostedt (Red Hat) 				ret = -1;
50366c43e554SSteven Rostedt (Red Hat) 				break;
50376c43e554SSteven Rostedt (Red Hat) 			}
50386c43e554SSteven Rostedt (Red Hat) 			total_read++;
50396c43e554SSteven Rostedt (Red Hat) 		}
50406c43e554SSteven Rostedt (Red Hat) 		if (ret)
50416c43e554SSteven Rostedt (Red Hat) 			break;
50426c43e554SSteven Rostedt (Red Hat) 
50436c43e554SSteven Rostedt (Red Hat) 		ret = -1;
50446c43e554SSteven Rostedt (Red Hat) 
50456c43e554SSteven Rostedt (Red Hat) 		pr_info("         read events:   %ld\n", total_read);
50466c43e554SSteven Rostedt (Red Hat) 		pr_info("         lost events:   %ld\n", total_lost);
50476c43e554SSteven Rostedt (Red Hat) 		pr_info("        total events:   %ld\n", total_lost + total_read);
50486c43e554SSteven Rostedt (Red Hat) 		pr_info("  recorded len bytes:   %ld\n", total_len);
50496c43e554SSteven Rostedt (Red Hat) 		pr_info(" recorded size bytes:   %ld\n", total_size);
50506c43e554SSteven Rostedt (Red Hat) 		if (total_lost)
50516c43e554SSteven Rostedt (Red Hat) 			pr_info(" With dropped events, record len and size may not match\n"
50526c43e554SSteven Rostedt (Red Hat) 				" alloced and written from above\n");
50536c43e554SSteven Rostedt (Red Hat) 		if (!total_lost) {
50546c43e554SSteven Rostedt (Red Hat) 			if (RB_WARN_ON(buffer, total_len != total_alloc ||
50556c43e554SSteven Rostedt (Red Hat) 				       total_size != total_written))
50566c43e554SSteven Rostedt (Red Hat) 				break;
50576c43e554SSteven Rostedt (Red Hat) 		}
50586c43e554SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
50596c43e554SSteven Rostedt (Red Hat) 			break;
50606c43e554SSteven Rostedt (Red Hat) 
50616c43e554SSteven Rostedt (Red Hat) 		ret = 0;
50626c43e554SSteven Rostedt (Red Hat) 	}
50636c43e554SSteven Rostedt (Red Hat) 	if (!ret)
50646c43e554SSteven Rostedt (Red Hat) 		pr_info("Ring buffer PASSED!\n");
50656c43e554SSteven Rostedt (Red Hat) 
50666c43e554SSteven Rostedt (Red Hat) 	ring_buffer_free(buffer);
50676c43e554SSteven Rostedt (Red Hat) 	return 0;
50686c43e554SSteven Rostedt (Red Hat) }
50696c43e554SSteven Rostedt (Red Hat) 
50706c43e554SSteven Rostedt (Red Hat) late_initcall(test_ringbuffer);
50716c43e554SSteven Rostedt (Red Hat) #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
5072