xref: /linux-6.15/kernel/trace/ring_buffer.c (revision babe3fce)
17a8e76a3SSteven Rostedt /*
27a8e76a3SSteven Rostedt  * Generic ring buffer
37a8e76a3SSteven Rostedt  *
47a8e76a3SSteven Rostedt  * Copyright (C) 2008 Steven Rostedt <[email protected]>
57a8e76a3SSteven Rostedt  */
6af658dcaSSteven Rostedt (Red Hat) #include <linux/trace_events.h>
77a8e76a3SSteven Rostedt #include <linux/ring_buffer.h>
814131f2fSIngo Molnar #include <linux/trace_clock.h>
90b07436dSSteven Rostedt #include <linux/trace_seq.h>
107a8e76a3SSteven Rostedt #include <linux/spinlock.h>
1115693458SSteven Rostedt (Red Hat) #include <linux/irq_work.h>
127a8e76a3SSteven Rostedt #include <linux/uaccess.h>
13a81bd80aSSteven Rostedt #include <linux/hardirq.h>
146c43e554SSteven Rostedt (Red Hat) #include <linux/kthread.h>	/* for self test */
151744a21dSVegard Nossum #include <linux/kmemcheck.h>
167a8e76a3SSteven Rostedt #include <linux/module.h>
177a8e76a3SSteven Rostedt #include <linux/percpu.h>
187a8e76a3SSteven Rostedt #include <linux/mutex.h>
196c43e554SSteven Rostedt (Red Hat) #include <linux/delay.h>
205a0e3ad6STejun Heo #include <linux/slab.h>
217a8e76a3SSteven Rostedt #include <linux/init.h>
227a8e76a3SSteven Rostedt #include <linux/hash.h>
237a8e76a3SSteven Rostedt #include <linux/list.h>
24554f786eSSteven Rostedt #include <linux/cpu.h>
257a8e76a3SSteven Rostedt 
2679615760SChristoph Lameter #include <asm/local.h>
27182e9f5fSSteven Rostedt 
2883f40318SVaibhav Nagarnaik static void update_pages_handler(struct work_struct *work);
2983f40318SVaibhav Nagarnaik 
30033601a3SSteven Rostedt /*
31d1b182a8SSteven Rostedt  * The ring buffer header is special. We must manually up keep it.
32d1b182a8SSteven Rostedt  */
33d1b182a8SSteven Rostedt int ring_buffer_print_entry_header(struct trace_seq *s)
34d1b182a8SSteven Rostedt {
35c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "# compressed entry header\n");
36c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "\ttype_len    :    5 bits\n");
37c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "\ttime_delta  :   27 bits\n");
38c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_puts(s, "\tarray       :   32 bits\n");
39c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_putc(s, '\n');
40c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tpadding     : type == %d\n",
41d1b182a8SSteven Rostedt 			 RINGBUF_TYPE_PADDING);
42c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\ttime_extend : type == %d\n",
43d1b182a8SSteven Rostedt 			 RINGBUF_TYPE_TIME_EXTEND);
44c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tdata max type_len  == %d\n",
45334d4169SLai Jiangshan 			 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
46d1b182a8SSteven Rostedt 
47c0cd93aaSSteven Rostedt (Red Hat) 	return !trace_seq_has_overflowed(s);
48d1b182a8SSteven Rostedt }
49d1b182a8SSteven Rostedt 
50d1b182a8SSteven Rostedt /*
515cc98548SSteven Rostedt  * The ring buffer is made up of a list of pages. A separate list of pages is
525cc98548SSteven Rostedt  * allocated for each CPU. A writer may only write to a buffer that is
535cc98548SSteven Rostedt  * associated with the CPU it is currently executing on.  A reader may read
545cc98548SSteven Rostedt  * from any per cpu buffer.
555cc98548SSteven Rostedt  *
565cc98548SSteven Rostedt  * The reader is special. For each per cpu buffer, the reader has its own
575cc98548SSteven Rostedt  * reader page. When a reader has read the entire reader page, this reader
585cc98548SSteven Rostedt  * page is swapped with another page in the ring buffer.
595cc98548SSteven Rostedt  *
605cc98548SSteven Rostedt  * Now, as long as the writer is off the reader page, the reader can do what
615cc98548SSteven Rostedt  * ever it wants with that page. The writer will never write to that page
625cc98548SSteven Rostedt  * again (as long as it is out of the ring buffer).
635cc98548SSteven Rostedt  *
645cc98548SSteven Rostedt  * Here's some silly ASCII art.
655cc98548SSteven Rostedt  *
665cc98548SSteven Rostedt  *   +------+
675cc98548SSteven Rostedt  *   |reader|          RING BUFFER
685cc98548SSteven Rostedt  *   |page  |
695cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
705cc98548SSteven Rostedt  *                   |   |-->|   |-->|   |
715cc98548SSteven Rostedt  *                   +---+   +---+   +---+
725cc98548SSteven Rostedt  *                     ^               |
735cc98548SSteven Rostedt  *                     |               |
745cc98548SSteven Rostedt  *                     +---------------+
755cc98548SSteven Rostedt  *
765cc98548SSteven Rostedt  *
775cc98548SSteven Rostedt  *   +------+
785cc98548SSteven Rostedt  *   |reader|          RING BUFFER
795cc98548SSteven Rostedt  *   |page  |------------------v
805cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
815cc98548SSteven Rostedt  *                   |   |-->|   |-->|   |
825cc98548SSteven Rostedt  *                   +---+   +---+   +---+
835cc98548SSteven Rostedt  *                     ^               |
845cc98548SSteven Rostedt  *                     |               |
855cc98548SSteven Rostedt  *                     +---------------+
865cc98548SSteven Rostedt  *
875cc98548SSteven Rostedt  *
885cc98548SSteven Rostedt  *   +------+
895cc98548SSteven Rostedt  *   |reader|          RING BUFFER
905cc98548SSteven Rostedt  *   |page  |------------------v
915cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
925cc98548SSteven Rostedt  *      ^            |   |-->|   |-->|   |
935cc98548SSteven Rostedt  *      |            +---+   +---+   +---+
945cc98548SSteven Rostedt  *      |                              |
955cc98548SSteven Rostedt  *      |                              |
965cc98548SSteven Rostedt  *      +------------------------------+
975cc98548SSteven Rostedt  *
985cc98548SSteven Rostedt  *
995cc98548SSteven Rostedt  *   +------+
1005cc98548SSteven Rostedt  *   |buffer|          RING BUFFER
1015cc98548SSteven Rostedt  *   |page  |------------------v
1025cc98548SSteven Rostedt  *   +------+        +---+   +---+   +---+
1035cc98548SSteven Rostedt  *      ^            |   |   |   |-->|   |
1045cc98548SSteven Rostedt  *      |   New      +---+   +---+   +---+
1055cc98548SSteven Rostedt  *      |  Reader------^               |
1065cc98548SSteven Rostedt  *      |   page                       |
1075cc98548SSteven Rostedt  *      +------------------------------+
1085cc98548SSteven Rostedt  *
1095cc98548SSteven Rostedt  *
1105cc98548SSteven Rostedt  * After we make this swap, the reader can hand this page off to the splice
1115cc98548SSteven Rostedt  * code and be done with it. It can even allocate a new page if it needs to
1125cc98548SSteven Rostedt  * and swap that into the ring buffer.
1135cc98548SSteven Rostedt  *
1145cc98548SSteven Rostedt  * We will be using cmpxchg soon to make all this lockless.
1155cc98548SSteven Rostedt  *
1165cc98548SSteven Rostedt  */
1175cc98548SSteven Rostedt 
118499e5470SSteven Rostedt /* Used for individual buffers (after the counter) */
119499e5470SSteven Rostedt #define RB_BUFFER_OFF		(1 << 20)
120499e5470SSteven Rostedt 
121474d32b6SSteven Rostedt #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
122474d32b6SSteven Rostedt 
123e3d6bf0aSSteven Rostedt #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
12467d34724SAndrew Morton #define RB_ALIGNMENT		4U
125334d4169SLai Jiangshan #define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
126c7b09308SSteven Rostedt #define RB_EVNT_MIN_SIZE	8U	/* two 32bit words */
127334d4169SLai Jiangshan 
128649508f6SJames Hogan #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
1292271048dSSteven Rostedt # define RB_FORCE_8BYTE_ALIGNMENT	0
1302271048dSSteven Rostedt # define RB_ARCH_ALIGNMENT		RB_ALIGNMENT
1312271048dSSteven Rostedt #else
1322271048dSSteven Rostedt # define RB_FORCE_8BYTE_ALIGNMENT	1
1332271048dSSteven Rostedt # define RB_ARCH_ALIGNMENT		8U
1342271048dSSteven Rostedt #endif
1352271048dSSteven Rostedt 
136649508f6SJames Hogan #define RB_ALIGN_DATA		__aligned(RB_ARCH_ALIGNMENT)
137649508f6SJames Hogan 
138334d4169SLai Jiangshan /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
139334d4169SLai Jiangshan #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
1407a8e76a3SSteven Rostedt 
1417a8e76a3SSteven Rostedt enum {
1427a8e76a3SSteven Rostedt 	RB_LEN_TIME_EXTEND = 8,
1437a8e76a3SSteven Rostedt 	RB_LEN_TIME_STAMP = 16,
1447a8e76a3SSteven Rostedt };
1457a8e76a3SSteven Rostedt 
14669d1b839SSteven Rostedt #define skip_time_extend(event) \
14769d1b839SSteven Rostedt 	((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
14869d1b839SSteven Rostedt 
1492d622719STom Zanussi static inline int rb_null_event(struct ring_buffer_event *event)
1502d622719STom Zanussi {
151a1863c21SSteven Rostedt 	return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
1522d622719STom Zanussi }
1532d622719STom Zanussi 
1542d622719STom Zanussi static void rb_event_set_padding(struct ring_buffer_event *event)
1552d622719STom Zanussi {
156a1863c21SSteven Rostedt 	/* padding has a NULL time_delta */
157334d4169SLai Jiangshan 	event->type_len = RINGBUF_TYPE_PADDING;
1582d622719STom Zanussi 	event->time_delta = 0;
1592d622719STom Zanussi }
1602d622719STom Zanussi 
1612d622719STom Zanussi static unsigned
1622d622719STom Zanussi rb_event_data_length(struct ring_buffer_event *event)
1632d622719STom Zanussi {
1642d622719STom Zanussi 	unsigned length;
1652d622719STom Zanussi 
166334d4169SLai Jiangshan 	if (event->type_len)
167334d4169SLai Jiangshan 		length = event->type_len * RB_ALIGNMENT;
1682d622719STom Zanussi 	else
1692d622719STom Zanussi 		length = event->array[0];
1702d622719STom Zanussi 	return length + RB_EVNT_HDR_SIZE;
1712d622719STom Zanussi }
1722d622719STom Zanussi 
17369d1b839SSteven Rostedt /*
17469d1b839SSteven Rostedt  * Return the length of the given event. Will return
17569d1b839SSteven Rostedt  * the length of the time extend if the event is a
17669d1b839SSteven Rostedt  * time extend.
17769d1b839SSteven Rostedt  */
17869d1b839SSteven Rostedt static inline unsigned
1797a8e76a3SSteven Rostedt rb_event_length(struct ring_buffer_event *event)
1807a8e76a3SSteven Rostedt {
181334d4169SLai Jiangshan 	switch (event->type_len) {
1827a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
1832d622719STom Zanussi 		if (rb_null_event(event))
1847a8e76a3SSteven Rostedt 			/* undefined */
1857a8e76a3SSteven Rostedt 			return -1;
186334d4169SLai Jiangshan 		return  event->array[0] + RB_EVNT_HDR_SIZE;
1877a8e76a3SSteven Rostedt 
1887a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
1897a8e76a3SSteven Rostedt 		return RB_LEN_TIME_EXTEND;
1907a8e76a3SSteven Rostedt 
1917a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
1927a8e76a3SSteven Rostedt 		return RB_LEN_TIME_STAMP;
1937a8e76a3SSteven Rostedt 
1947a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
1952d622719STom Zanussi 		return rb_event_data_length(event);
1967a8e76a3SSteven Rostedt 	default:
1977a8e76a3SSteven Rostedt 		BUG();
1987a8e76a3SSteven Rostedt 	}
1997a8e76a3SSteven Rostedt 	/* not hit */
2007a8e76a3SSteven Rostedt 	return 0;
2017a8e76a3SSteven Rostedt }
2027a8e76a3SSteven Rostedt 
20369d1b839SSteven Rostedt /*
20469d1b839SSteven Rostedt  * Return total length of time extend and data,
20569d1b839SSteven Rostedt  *   or just the event length for all other events.
20669d1b839SSteven Rostedt  */
20769d1b839SSteven Rostedt static inline unsigned
20869d1b839SSteven Rostedt rb_event_ts_length(struct ring_buffer_event *event)
20969d1b839SSteven Rostedt {
21069d1b839SSteven Rostedt 	unsigned len = 0;
21169d1b839SSteven Rostedt 
21269d1b839SSteven Rostedt 	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
21369d1b839SSteven Rostedt 		/* time extends include the data event after it */
21469d1b839SSteven Rostedt 		len = RB_LEN_TIME_EXTEND;
21569d1b839SSteven Rostedt 		event = skip_time_extend(event);
21669d1b839SSteven Rostedt 	}
21769d1b839SSteven Rostedt 	return len + rb_event_length(event);
21869d1b839SSteven Rostedt }
21969d1b839SSteven Rostedt 
2207a8e76a3SSteven Rostedt /**
2217a8e76a3SSteven Rostedt  * ring_buffer_event_length - return the length of the event
2227a8e76a3SSteven Rostedt  * @event: the event to get the length of
22369d1b839SSteven Rostedt  *
22469d1b839SSteven Rostedt  * Returns the size of the data load of a data event.
22569d1b839SSteven Rostedt  * If the event is something other than a data event, it
22669d1b839SSteven Rostedt  * returns the size of the event itself. With the exception
22769d1b839SSteven Rostedt  * of a TIME EXTEND, where it still returns the size of the
22869d1b839SSteven Rostedt  * data load of the data event after it.
2297a8e76a3SSteven Rostedt  */
2307a8e76a3SSteven Rostedt unsigned ring_buffer_event_length(struct ring_buffer_event *event)
2317a8e76a3SSteven Rostedt {
23269d1b839SSteven Rostedt 	unsigned length;
23369d1b839SSteven Rostedt 
23469d1b839SSteven Rostedt 	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
23569d1b839SSteven Rostedt 		event = skip_time_extend(event);
23669d1b839SSteven Rostedt 
23769d1b839SSteven Rostedt 	length = rb_event_length(event);
238334d4169SLai Jiangshan 	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
239465634adSRobert Richter 		return length;
240465634adSRobert Richter 	length -= RB_EVNT_HDR_SIZE;
241465634adSRobert Richter 	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
242465634adSRobert Richter                 length -= sizeof(event->array[0]);
243465634adSRobert Richter 	return length;
2447a8e76a3SSteven Rostedt }
245c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_event_length);
2467a8e76a3SSteven Rostedt 
2477a8e76a3SSteven Rostedt /* inline for ring buffer fast paths */
248929ddbf3SSteven Rostedt (Red Hat) static __always_inline void *
2497a8e76a3SSteven Rostedt rb_event_data(struct ring_buffer_event *event)
2507a8e76a3SSteven Rostedt {
25169d1b839SSteven Rostedt 	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
25269d1b839SSteven Rostedt 		event = skip_time_extend(event);
253334d4169SLai Jiangshan 	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
2547a8e76a3SSteven Rostedt 	/* If length is in len field, then array[0] has the data */
255334d4169SLai Jiangshan 	if (event->type_len)
2567a8e76a3SSteven Rostedt 		return (void *)&event->array[0];
2577a8e76a3SSteven Rostedt 	/* Otherwise length is in array[0] and array[1] has the data */
2587a8e76a3SSteven Rostedt 	return (void *)&event->array[1];
2597a8e76a3SSteven Rostedt }
2607a8e76a3SSteven Rostedt 
2617a8e76a3SSteven Rostedt /**
2627a8e76a3SSteven Rostedt  * ring_buffer_event_data - return the data of the event
2637a8e76a3SSteven Rostedt  * @event: the event to get the data from
2647a8e76a3SSteven Rostedt  */
2657a8e76a3SSteven Rostedt void *ring_buffer_event_data(struct ring_buffer_event *event)
2667a8e76a3SSteven Rostedt {
2677a8e76a3SSteven Rostedt 	return rb_event_data(event);
2687a8e76a3SSteven Rostedt }
269c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_event_data);
2707a8e76a3SSteven Rostedt 
2717a8e76a3SSteven Rostedt #define for_each_buffer_cpu(buffer, cpu)		\
2729e01c1b7SRusty Russell 	for_each_cpu(cpu, buffer->cpumask)
2737a8e76a3SSteven Rostedt 
2747a8e76a3SSteven Rostedt #define TS_SHIFT	27
2757a8e76a3SSteven Rostedt #define TS_MASK		((1ULL << TS_SHIFT) - 1)
2767a8e76a3SSteven Rostedt #define TS_DELTA_TEST	(~TS_MASK)
2777a8e76a3SSteven Rostedt 
27866a8cb95SSteven Rostedt /* Flag when events were overwritten */
27966a8cb95SSteven Rostedt #define RB_MISSED_EVENTS	(1 << 31)
280ff0ff84aSSteven Rostedt /* Missed count stored at end */
281ff0ff84aSSteven Rostedt #define RB_MISSED_STORED	(1 << 30)
28266a8cb95SSteven Rostedt 
283abc9b56dSSteven Rostedt struct buffer_data_page {
2847a8e76a3SSteven Rostedt 	u64		 time_stamp;	/* page time stamp */
285c3706f00SWenji Huang 	local_t		 commit;	/* write committed index */
286649508f6SJames Hogan 	unsigned char	 data[] RB_ALIGN_DATA;	/* data of buffer page */
287abc9b56dSSteven Rostedt };
288abc9b56dSSteven Rostedt 
28977ae365eSSteven Rostedt /*
29077ae365eSSteven Rostedt  * Note, the buffer_page list must be first. The buffer pages
29177ae365eSSteven Rostedt  * are allocated in cache lines, which means that each buffer
29277ae365eSSteven Rostedt  * page will be at the beginning of a cache line, and thus
29377ae365eSSteven Rostedt  * the least significant bits will be zero. We use this to
29477ae365eSSteven Rostedt  * add flags in the list struct pointers, to make the ring buffer
29577ae365eSSteven Rostedt  * lockless.
29677ae365eSSteven Rostedt  */
297abc9b56dSSteven Rostedt struct buffer_page {
298778c55d4SSteven Rostedt 	struct list_head list;		/* list of buffer pages */
299abc9b56dSSteven Rostedt 	local_t		 write;		/* index for next write */
3006f807acdSSteven Rostedt 	unsigned	 read;		/* index for next read */
301778c55d4SSteven Rostedt 	local_t		 entries;	/* entries on this page */
302ff0ff84aSSteven Rostedt 	unsigned long	 real_end;	/* real end of data */
303abc9b56dSSteven Rostedt 	struct buffer_data_page *page;	/* Actual data page */
3047a8e76a3SSteven Rostedt };
3057a8e76a3SSteven Rostedt 
30677ae365eSSteven Rostedt /*
30777ae365eSSteven Rostedt  * The buffer page counters, write and entries, must be reset
30877ae365eSSteven Rostedt  * atomically when crossing page boundaries. To synchronize this
30977ae365eSSteven Rostedt  * update, two counters are inserted into the number. One is
31077ae365eSSteven Rostedt  * the actual counter for the write position or count on the page.
31177ae365eSSteven Rostedt  *
31277ae365eSSteven Rostedt  * The other is a counter of updaters. Before an update happens
31377ae365eSSteven Rostedt  * the update partition of the counter is incremented. This will
31477ae365eSSteven Rostedt  * allow the updater to update the counter atomically.
31577ae365eSSteven Rostedt  *
31677ae365eSSteven Rostedt  * The counter is 20 bits, and the state data is 12.
31777ae365eSSteven Rostedt  */
31877ae365eSSteven Rostedt #define RB_WRITE_MASK		0xfffff
31977ae365eSSteven Rostedt #define RB_WRITE_INTCNT		(1 << 20)
32077ae365eSSteven Rostedt 
321044fa782SSteven Rostedt static void rb_init_page(struct buffer_data_page *bpage)
322abc9b56dSSteven Rostedt {
323044fa782SSteven Rostedt 	local_set(&bpage->commit, 0);
324abc9b56dSSteven Rostedt }
325abc9b56dSSteven Rostedt 
326474d32b6SSteven Rostedt /**
327474d32b6SSteven Rostedt  * ring_buffer_page_len - the size of data on the page.
328474d32b6SSteven Rostedt  * @page: The page to read
329474d32b6SSteven Rostedt  *
330474d32b6SSteven Rostedt  * Returns the amount of data on the page, including buffer page header.
331474d32b6SSteven Rostedt  */
332ef7a4a16SSteven Rostedt size_t ring_buffer_page_len(void *page)
333ef7a4a16SSteven Rostedt {
334474d32b6SSteven Rostedt 	return local_read(&((struct buffer_data_page *)page)->commit)
335474d32b6SSteven Rostedt 		+ BUF_PAGE_HDR_SIZE;
336ef7a4a16SSteven Rostedt }
337ef7a4a16SSteven Rostedt 
3387a8e76a3SSteven Rostedt /*
339ed56829cSSteven Rostedt  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
340ed56829cSSteven Rostedt  * this issue out.
341ed56829cSSteven Rostedt  */
34234a148bfSAndrew Morton static void free_buffer_page(struct buffer_page *bpage)
343ed56829cSSteven Rostedt {
3446ae2a076SSteven Rostedt 	free_page((unsigned long)bpage->page);
345e4c2ce82SSteven Rostedt 	kfree(bpage);
346ed56829cSSteven Rostedt }
347ed56829cSSteven Rostedt 
348ed56829cSSteven Rostedt /*
3497a8e76a3SSteven Rostedt  * We need to fit the time_stamp delta into 27 bits.
3507a8e76a3SSteven Rostedt  */
3517a8e76a3SSteven Rostedt static inline int test_time_stamp(u64 delta)
3527a8e76a3SSteven Rostedt {
3537a8e76a3SSteven Rostedt 	if (delta & TS_DELTA_TEST)
3547a8e76a3SSteven Rostedt 		return 1;
3557a8e76a3SSteven Rostedt 	return 0;
3567a8e76a3SSteven Rostedt }
3577a8e76a3SSteven Rostedt 
358474d32b6SSteven Rostedt #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
3597a8e76a3SSteven Rostedt 
360be957c44SSteven Rostedt /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
361be957c44SSteven Rostedt #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
362be957c44SSteven Rostedt 
363d1b182a8SSteven Rostedt int ring_buffer_print_page_header(struct trace_seq *s)
364d1b182a8SSteven Rostedt {
365d1b182a8SSteven Rostedt 	struct buffer_data_page field;
366d1b182a8SSteven Rostedt 
367c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: u64 timestamp;\t"
36826a50744STom Zanussi 			 "offset:0;\tsize:%u;\tsigned:%u;\n",
36926a50744STom Zanussi 			 (unsigned int)sizeof(field.time_stamp),
37026a50744STom Zanussi 			 (unsigned int)is_signed_type(u64));
371d1b182a8SSteven Rostedt 
372c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: local_t commit;\t"
37326a50744STom Zanussi 			 "offset:%u;\tsize:%u;\tsigned:%u;\n",
374d1b182a8SSteven Rostedt 			 (unsigned int)offsetof(typeof(field), commit),
37526a50744STom Zanussi 			 (unsigned int)sizeof(field.commit),
37626a50744STom Zanussi 			 (unsigned int)is_signed_type(long));
377d1b182a8SSteven Rostedt 
378c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: int overwrite;\t"
37966a8cb95SSteven Rostedt 			 "offset:%u;\tsize:%u;\tsigned:%u;\n",
38066a8cb95SSteven Rostedt 			 (unsigned int)offsetof(typeof(field), commit),
38166a8cb95SSteven Rostedt 			 1,
38266a8cb95SSteven Rostedt 			 (unsigned int)is_signed_type(long));
38366a8cb95SSteven Rostedt 
384c0cd93aaSSteven Rostedt (Red Hat) 	trace_seq_printf(s, "\tfield: char data;\t"
38526a50744STom Zanussi 			 "offset:%u;\tsize:%u;\tsigned:%u;\n",
386d1b182a8SSteven Rostedt 			 (unsigned int)offsetof(typeof(field), data),
38726a50744STom Zanussi 			 (unsigned int)BUF_PAGE_SIZE,
38826a50744STom Zanussi 			 (unsigned int)is_signed_type(char));
389d1b182a8SSteven Rostedt 
390c0cd93aaSSteven Rostedt (Red Hat) 	return !trace_seq_has_overflowed(s);
391d1b182a8SSteven Rostedt }
392d1b182a8SSteven Rostedt 
39315693458SSteven Rostedt (Red Hat) struct rb_irq_work {
39415693458SSteven Rostedt (Red Hat) 	struct irq_work			work;
39515693458SSteven Rostedt (Red Hat) 	wait_queue_head_t		waiters;
3961e0d6714SSteven Rostedt (Red Hat) 	wait_queue_head_t		full_waiters;
39715693458SSteven Rostedt (Red Hat) 	bool				waiters_pending;
3981e0d6714SSteven Rostedt (Red Hat) 	bool				full_waiters_pending;
3991e0d6714SSteven Rostedt (Red Hat) 	bool				wakeup_full;
40015693458SSteven Rostedt (Red Hat) };
40115693458SSteven Rostedt (Red Hat) 
4027a8e76a3SSteven Rostedt /*
403fcc742eaSSteven Rostedt (Red Hat)  * Structure to hold event state and handle nested events.
404fcc742eaSSteven Rostedt (Red Hat)  */
405fcc742eaSSteven Rostedt (Red Hat) struct rb_event_info {
406fcc742eaSSteven Rostedt (Red Hat) 	u64			ts;
407fcc742eaSSteven Rostedt (Red Hat) 	u64			delta;
408fcc742eaSSteven Rostedt (Red Hat) 	unsigned long		length;
409fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page	*tail_page;
410fcc742eaSSteven Rostedt (Red Hat) 	int			add_timestamp;
411fcc742eaSSteven Rostedt (Red Hat) };
412fcc742eaSSteven Rostedt (Red Hat) 
413fcc742eaSSteven Rostedt (Red Hat) /*
414a497adb4SSteven Rostedt (Red Hat)  * Used for which event context the event is in.
415a497adb4SSteven Rostedt (Red Hat)  *  NMI     = 0
416a497adb4SSteven Rostedt (Red Hat)  *  IRQ     = 1
417a497adb4SSteven Rostedt (Red Hat)  *  SOFTIRQ = 2
418a497adb4SSteven Rostedt (Red Hat)  *  NORMAL  = 3
419a497adb4SSteven Rostedt (Red Hat)  *
420a497adb4SSteven Rostedt (Red Hat)  * See trace_recursive_lock() comment below for more details.
421a497adb4SSteven Rostedt (Red Hat)  */
422a497adb4SSteven Rostedt (Red Hat) enum {
423a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_NMI,
424a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_IRQ,
425a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_SOFTIRQ,
426a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_NORMAL,
427a497adb4SSteven Rostedt (Red Hat) 	RB_CTX_MAX
428a497adb4SSteven Rostedt (Red Hat) };
429a497adb4SSteven Rostedt (Red Hat) 
430a497adb4SSteven Rostedt (Red Hat) /*
4317a8e76a3SSteven Rostedt  * head_page == tail_page && head == tail then buffer is empty.
4327a8e76a3SSteven Rostedt  */
4337a8e76a3SSteven Rostedt struct ring_buffer_per_cpu {
4347a8e76a3SSteven Rostedt 	int				cpu;
435985023deSRichard Kennedy 	atomic_t			record_disabled;
4367a8e76a3SSteven Rostedt 	struct ring_buffer		*buffer;
4375389f6faSThomas Gleixner 	raw_spinlock_t			reader_lock;	/* serialize readers */
438445c8951SThomas Gleixner 	arch_spinlock_t			lock;
4397a8e76a3SSteven Rostedt 	struct lock_class_key		lock_key;
4409b94a8fbSSteven Rostedt (Red Hat) 	unsigned long			nr_pages;
44158a09ec6SSteven Rostedt (Red Hat) 	unsigned int			current_context;
4423adc54faSSteven Rostedt 	struct list_head		*pages;
4436f807acdSSteven Rostedt 	struct buffer_page		*head_page;	/* read from head */
4446f807acdSSteven Rostedt 	struct buffer_page		*tail_page;	/* write to tail */
445c3706f00SWenji Huang 	struct buffer_page		*commit_page;	/* committed pages */
446d769041fSSteven Rostedt 	struct buffer_page		*reader_page;
44766a8cb95SSteven Rostedt 	unsigned long			lost_events;
44866a8cb95SSteven Rostedt 	unsigned long			last_overrun;
449c64e148aSVaibhav Nagarnaik 	local_t				entries_bytes;
450e4906effSSteven Rostedt 	local_t				entries;
451884bfe89SSlava Pestov 	local_t				overrun;
452884bfe89SSlava Pestov 	local_t				commit_overrun;
453884bfe89SSlava Pestov 	local_t				dropped_events;
454fa743953SSteven Rostedt 	local_t				committing;
455fa743953SSteven Rostedt 	local_t				commits;
45677ae365eSSteven Rostedt 	unsigned long			read;
457c64e148aSVaibhav Nagarnaik 	unsigned long			read_bytes;
4587a8e76a3SSteven Rostedt 	u64				write_stamp;
4597a8e76a3SSteven Rostedt 	u64				read_stamp;
460438ced17SVaibhav Nagarnaik 	/* ring buffer pages to update, > 0 to add, < 0 to remove */
4619b94a8fbSSteven Rostedt (Red Hat) 	long				nr_pages_to_update;
462438ced17SVaibhav Nagarnaik 	struct list_head		new_pages; /* new pages to add */
46383f40318SVaibhav Nagarnaik 	struct work_struct		update_pages_work;
46405fdd70dSVaibhav Nagarnaik 	struct completion		update_done;
46515693458SSteven Rostedt (Red Hat) 
46615693458SSteven Rostedt (Red Hat) 	struct rb_irq_work		irq_work;
4677a8e76a3SSteven Rostedt };
4687a8e76a3SSteven Rostedt 
4697a8e76a3SSteven Rostedt struct ring_buffer {
4707a8e76a3SSteven Rostedt 	unsigned			flags;
4717a8e76a3SSteven Rostedt 	int				cpus;
4727a8e76a3SSteven Rostedt 	atomic_t			record_disabled;
47383f40318SVaibhav Nagarnaik 	atomic_t			resize_disabled;
47400f62f61SArnaldo Carvalho de Melo 	cpumask_var_t			cpumask;
4757a8e76a3SSteven Rostedt 
4761f8a6a10SPeter Zijlstra 	struct lock_class_key		*reader_lock_key;
4771f8a6a10SPeter Zijlstra 
4787a8e76a3SSteven Rostedt 	struct mutex			mutex;
4797a8e76a3SSteven Rostedt 
4807a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu	**buffers;
481554f786eSSteven Rostedt 
48259222efeSSteven Rostedt #ifdef CONFIG_HOTPLUG_CPU
483554f786eSSteven Rostedt 	struct notifier_block		cpu_notify;
484554f786eSSteven Rostedt #endif
48537886f6aSSteven Rostedt 	u64				(*clock)(void);
48615693458SSteven Rostedt (Red Hat) 
48715693458SSteven Rostedt (Red Hat) 	struct rb_irq_work		irq_work;
4887a8e76a3SSteven Rostedt };
4897a8e76a3SSteven Rostedt 
4907a8e76a3SSteven Rostedt struct ring_buffer_iter {
4917a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu	*cpu_buffer;
4927a8e76a3SSteven Rostedt 	unsigned long			head;
4937a8e76a3SSteven Rostedt 	struct buffer_page		*head_page;
494492a74f4SSteven Rostedt 	struct buffer_page		*cache_reader_page;
495492a74f4SSteven Rostedt 	unsigned long			cache_read;
4967a8e76a3SSteven Rostedt 	u64				read_stamp;
4977a8e76a3SSteven Rostedt };
4987a8e76a3SSteven Rostedt 
49915693458SSteven Rostedt (Red Hat) /*
50015693458SSteven Rostedt (Red Hat)  * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
50115693458SSteven Rostedt (Red Hat)  *
50215693458SSteven Rostedt (Red Hat)  * Schedules a delayed work to wake up any task that is blocked on the
50315693458SSteven Rostedt (Red Hat)  * ring buffer waiters queue.
50415693458SSteven Rostedt (Red Hat)  */
50515693458SSteven Rostedt (Red Hat) static void rb_wake_up_waiters(struct irq_work *work)
50615693458SSteven Rostedt (Red Hat) {
50715693458SSteven Rostedt (Red Hat) 	struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
50815693458SSteven Rostedt (Red Hat) 
50915693458SSteven Rostedt (Red Hat) 	wake_up_all(&rbwork->waiters);
5101e0d6714SSteven Rostedt (Red Hat) 	if (rbwork->wakeup_full) {
5111e0d6714SSteven Rostedt (Red Hat) 		rbwork->wakeup_full = false;
5121e0d6714SSteven Rostedt (Red Hat) 		wake_up_all(&rbwork->full_waiters);
5131e0d6714SSteven Rostedt (Red Hat) 	}
51415693458SSteven Rostedt (Red Hat) }
51515693458SSteven Rostedt (Red Hat) 
51615693458SSteven Rostedt (Red Hat) /**
51715693458SSteven Rostedt (Red Hat)  * ring_buffer_wait - wait for input to the ring buffer
51815693458SSteven Rostedt (Red Hat)  * @buffer: buffer to wait on
51915693458SSteven Rostedt (Red Hat)  * @cpu: the cpu buffer to wait on
520e30f53aaSRabin Vincent  * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
52115693458SSteven Rostedt (Red Hat)  *
52215693458SSteven Rostedt (Red Hat)  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
52315693458SSteven Rostedt (Red Hat)  * as data is added to any of the @buffer's cpu buffers. Otherwise
52415693458SSteven Rostedt (Red Hat)  * it will wait for data to be added to a specific cpu buffer.
52515693458SSteven Rostedt (Red Hat)  */
526e30f53aaSRabin Vincent int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
52715693458SSteven Rostedt (Red Hat) {
528e30f53aaSRabin Vincent 	struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
52915693458SSteven Rostedt (Red Hat) 	DEFINE_WAIT(wait);
53015693458SSteven Rostedt (Red Hat) 	struct rb_irq_work *work;
531e30f53aaSRabin Vincent 	int ret = 0;
53215693458SSteven Rostedt (Red Hat) 
53315693458SSteven Rostedt (Red Hat) 	/*
53415693458SSteven Rostedt (Red Hat) 	 * Depending on what the caller is waiting for, either any
53515693458SSteven Rostedt (Red Hat) 	 * data in any cpu buffer, or a specific buffer, put the
53615693458SSteven Rostedt (Red Hat) 	 * caller on the appropriate wait queue.
53715693458SSteven Rostedt (Red Hat) 	 */
5381e0d6714SSteven Rostedt (Red Hat) 	if (cpu == RING_BUFFER_ALL_CPUS) {
53915693458SSteven Rostedt (Red Hat) 		work = &buffer->irq_work;
5401e0d6714SSteven Rostedt (Red Hat) 		/* Full only makes sense on per cpu reads */
5411e0d6714SSteven Rostedt (Red Hat) 		full = false;
5421e0d6714SSteven Rostedt (Red Hat) 	} else {
5438b8b3683SSteven Rostedt (Red Hat) 		if (!cpumask_test_cpu(cpu, buffer->cpumask))
5448b8b3683SSteven Rostedt (Red Hat) 			return -ENODEV;
54515693458SSteven Rostedt (Red Hat) 		cpu_buffer = buffer->buffers[cpu];
54615693458SSteven Rostedt (Red Hat) 		work = &cpu_buffer->irq_work;
54715693458SSteven Rostedt (Red Hat) 	}
54815693458SSteven Rostedt (Red Hat) 
54915693458SSteven Rostedt (Red Hat) 
550e30f53aaSRabin Vincent 	while (true) {
5511e0d6714SSteven Rostedt (Red Hat) 		if (full)
5521e0d6714SSteven Rostedt (Red Hat) 			prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
5531e0d6714SSteven Rostedt (Red Hat) 		else
55415693458SSteven Rostedt (Red Hat) 			prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
55515693458SSteven Rostedt (Red Hat) 
55615693458SSteven Rostedt (Red Hat) 		/*
55715693458SSteven Rostedt (Red Hat) 		 * The events can happen in critical sections where
55815693458SSteven Rostedt (Red Hat) 		 * checking a work queue can cause deadlocks.
55915693458SSteven Rostedt (Red Hat) 		 * After adding a task to the queue, this flag is set
56015693458SSteven Rostedt (Red Hat) 		 * only to notify events to try to wake up the queue
56115693458SSteven Rostedt (Red Hat) 		 * using irq_work.
56215693458SSteven Rostedt (Red Hat) 		 *
56315693458SSteven Rostedt (Red Hat) 		 * We don't clear it even if the buffer is no longer
56415693458SSteven Rostedt (Red Hat) 		 * empty. The flag only causes the next event to run
56515693458SSteven Rostedt (Red Hat) 		 * irq_work to do the work queue wake up. The worse
56615693458SSteven Rostedt (Red Hat) 		 * that can happen if we race with !trace_empty() is that
56715693458SSteven Rostedt (Red Hat) 		 * an event will cause an irq_work to try to wake up
56815693458SSteven Rostedt (Red Hat) 		 * an empty queue.
56915693458SSteven Rostedt (Red Hat) 		 *
57015693458SSteven Rostedt (Red Hat) 		 * There's no reason to protect this flag either, as
57115693458SSteven Rostedt (Red Hat) 		 * the work queue and irq_work logic will do the necessary
57215693458SSteven Rostedt (Red Hat) 		 * synchronization for the wake ups. The only thing
57315693458SSteven Rostedt (Red Hat) 		 * that is necessary is that the wake up happens after
57415693458SSteven Rostedt (Red Hat) 		 * a task has been queued. It's OK for spurious wake ups.
57515693458SSteven Rostedt (Red Hat) 		 */
5761e0d6714SSteven Rostedt (Red Hat) 		if (full)
5771e0d6714SSteven Rostedt (Red Hat) 			work->full_waiters_pending = true;
5781e0d6714SSteven Rostedt (Red Hat) 		else
57915693458SSteven Rostedt (Red Hat) 			work->waiters_pending = true;
58015693458SSteven Rostedt (Red Hat) 
581e30f53aaSRabin Vincent 		if (signal_pending(current)) {
582e30f53aaSRabin Vincent 			ret = -EINTR;
583e30f53aaSRabin Vincent 			break;
584e30f53aaSRabin Vincent 		}
585e30f53aaSRabin Vincent 
586e30f53aaSRabin Vincent 		if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
587e30f53aaSRabin Vincent 			break;
588e30f53aaSRabin Vincent 
589e30f53aaSRabin Vincent 		if (cpu != RING_BUFFER_ALL_CPUS &&
590e30f53aaSRabin Vincent 		    !ring_buffer_empty_cpu(buffer, cpu)) {
591e30f53aaSRabin Vincent 			unsigned long flags;
592e30f53aaSRabin Vincent 			bool pagebusy;
593e30f53aaSRabin Vincent 
594e30f53aaSRabin Vincent 			if (!full)
595e30f53aaSRabin Vincent 				break;
596e30f53aaSRabin Vincent 
597e30f53aaSRabin Vincent 			raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
598e30f53aaSRabin Vincent 			pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
599e30f53aaSRabin Vincent 			raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
600e30f53aaSRabin Vincent 
601e30f53aaSRabin Vincent 			if (!pagebusy)
602e30f53aaSRabin Vincent 				break;
603e30f53aaSRabin Vincent 		}
604e30f53aaSRabin Vincent 
60515693458SSteven Rostedt (Red Hat) 		schedule();
606e30f53aaSRabin Vincent 	}
60715693458SSteven Rostedt (Red Hat) 
6081e0d6714SSteven Rostedt (Red Hat) 	if (full)
6091e0d6714SSteven Rostedt (Red Hat) 		finish_wait(&work->full_waiters, &wait);
6101e0d6714SSteven Rostedt (Red Hat) 	else
61115693458SSteven Rostedt (Red Hat) 		finish_wait(&work->waiters, &wait);
612e30f53aaSRabin Vincent 
613e30f53aaSRabin Vincent 	return ret;
61415693458SSteven Rostedt (Red Hat) }
61515693458SSteven Rostedt (Red Hat) 
61615693458SSteven Rostedt (Red Hat) /**
61715693458SSteven Rostedt (Red Hat)  * ring_buffer_poll_wait - poll on buffer input
61815693458SSteven Rostedt (Red Hat)  * @buffer: buffer to wait on
61915693458SSteven Rostedt (Red Hat)  * @cpu: the cpu buffer to wait on
62015693458SSteven Rostedt (Red Hat)  * @filp: the file descriptor
62115693458SSteven Rostedt (Red Hat)  * @poll_table: The poll descriptor
62215693458SSteven Rostedt (Red Hat)  *
62315693458SSteven Rostedt (Red Hat)  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
62415693458SSteven Rostedt (Red Hat)  * as data is added to any of the @buffer's cpu buffers. Otherwise
62515693458SSteven Rostedt (Red Hat)  * it will wait for data to be added to a specific cpu buffer.
62615693458SSteven Rostedt (Red Hat)  *
62715693458SSteven Rostedt (Red Hat)  * Returns POLLIN | POLLRDNORM if data exists in the buffers,
62815693458SSteven Rostedt (Red Hat)  * zero otherwise.
62915693458SSteven Rostedt (Red Hat)  */
63015693458SSteven Rostedt (Red Hat) int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
63115693458SSteven Rostedt (Red Hat) 			  struct file *filp, poll_table *poll_table)
63215693458SSteven Rostedt (Red Hat) {
63315693458SSteven Rostedt (Red Hat) 	struct ring_buffer_per_cpu *cpu_buffer;
63415693458SSteven Rostedt (Red Hat) 	struct rb_irq_work *work;
63515693458SSteven Rostedt (Red Hat) 
63615693458SSteven Rostedt (Red Hat) 	if (cpu == RING_BUFFER_ALL_CPUS)
63715693458SSteven Rostedt (Red Hat) 		work = &buffer->irq_work;
63815693458SSteven Rostedt (Red Hat) 	else {
6396721cb60SSteven Rostedt (Red Hat) 		if (!cpumask_test_cpu(cpu, buffer->cpumask))
6406721cb60SSteven Rostedt (Red Hat) 			return -EINVAL;
6416721cb60SSteven Rostedt (Red Hat) 
64215693458SSteven Rostedt (Red Hat) 		cpu_buffer = buffer->buffers[cpu];
64315693458SSteven Rostedt (Red Hat) 		work = &cpu_buffer->irq_work;
64415693458SSteven Rostedt (Red Hat) 	}
64515693458SSteven Rostedt (Red Hat) 
64615693458SSteven Rostedt (Red Hat) 	poll_wait(filp, &work->waiters, poll_table);
6474ce97dbfSJosef Bacik 	work->waiters_pending = true;
6484ce97dbfSJosef Bacik 	/*
6494ce97dbfSJosef Bacik 	 * There's a tight race between setting the waiters_pending and
6504ce97dbfSJosef Bacik 	 * checking if the ring buffer is empty.  Once the waiters_pending bit
6514ce97dbfSJosef Bacik 	 * is set, the next event will wake the task up, but we can get stuck
6524ce97dbfSJosef Bacik 	 * if there's only a single event in.
6534ce97dbfSJosef Bacik 	 *
6544ce97dbfSJosef Bacik 	 * FIXME: Ideally, we need a memory barrier on the writer side as well,
6554ce97dbfSJosef Bacik 	 * but adding a memory barrier to all events will cause too much of a
6564ce97dbfSJosef Bacik 	 * performance hit in the fast path.  We only need a memory barrier when
6574ce97dbfSJosef Bacik 	 * the buffer goes from empty to having content.  But as this race is
6584ce97dbfSJosef Bacik 	 * extremely small, and it's not a problem if another event comes in, we
6594ce97dbfSJosef Bacik 	 * will fix it later.
6604ce97dbfSJosef Bacik 	 */
6614ce97dbfSJosef Bacik 	smp_mb();
66215693458SSteven Rostedt (Red Hat) 
66315693458SSteven Rostedt (Red Hat) 	if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
66415693458SSteven Rostedt (Red Hat) 	    (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
66515693458SSteven Rostedt (Red Hat) 		return POLLIN | POLLRDNORM;
66615693458SSteven Rostedt (Red Hat) 	return 0;
66715693458SSteven Rostedt (Red Hat) }
66815693458SSteven Rostedt (Red Hat) 
669f536aafcSSteven Rostedt /* buffer may be either ring_buffer or ring_buffer_per_cpu */
670077c5407SSteven Rostedt #define RB_WARN_ON(b, cond)						\
6713e89c7bbSSteven Rostedt 	({								\
6723e89c7bbSSteven Rostedt 		int _____ret = unlikely(cond);				\
6733e89c7bbSSteven Rostedt 		if (_____ret) {						\
674077c5407SSteven Rostedt 			if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
675077c5407SSteven Rostedt 				struct ring_buffer_per_cpu *__b =	\
676077c5407SSteven Rostedt 					(void *)b;			\
677077c5407SSteven Rostedt 				atomic_inc(&__b->buffer->record_disabled); \
678077c5407SSteven Rostedt 			} else						\
679077c5407SSteven Rostedt 				atomic_inc(&b->record_disabled);	\
680bf41a158SSteven Rostedt 			WARN_ON(1);					\
681bf41a158SSteven Rostedt 		}							\
6823e89c7bbSSteven Rostedt 		_____ret;						\
6833e89c7bbSSteven Rostedt 	})
684f536aafcSSteven Rostedt 
68537886f6aSSteven Rostedt /* Up this if you want to test the TIME_EXTENTS and normalization */
68637886f6aSSteven Rostedt #define DEBUG_SHIFT 0
68737886f6aSSteven Rostedt 
6886d3f1e12SJiri Olsa static inline u64 rb_time_stamp(struct ring_buffer *buffer)
68988eb0125SSteven Rostedt {
69088eb0125SSteven Rostedt 	/* shift to debug/test normalization and TIME_EXTENTS */
69188eb0125SSteven Rostedt 	return buffer->clock() << DEBUG_SHIFT;
69288eb0125SSteven Rostedt }
69388eb0125SSteven Rostedt 
69437886f6aSSteven Rostedt u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
69537886f6aSSteven Rostedt {
69637886f6aSSteven Rostedt 	u64 time;
69737886f6aSSteven Rostedt 
69837886f6aSSteven Rostedt 	preempt_disable_notrace();
6996d3f1e12SJiri Olsa 	time = rb_time_stamp(buffer);
70037886f6aSSteven Rostedt 	preempt_enable_no_resched_notrace();
70137886f6aSSteven Rostedt 
70237886f6aSSteven Rostedt 	return time;
70337886f6aSSteven Rostedt }
70437886f6aSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
70537886f6aSSteven Rostedt 
70637886f6aSSteven Rostedt void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
70737886f6aSSteven Rostedt 				      int cpu, u64 *ts)
70837886f6aSSteven Rostedt {
70937886f6aSSteven Rostedt 	/* Just stupid testing the normalize function and deltas */
71037886f6aSSteven Rostedt 	*ts >>= DEBUG_SHIFT;
71137886f6aSSteven Rostedt }
71237886f6aSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
71337886f6aSSteven Rostedt 
71477ae365eSSteven Rostedt /*
71577ae365eSSteven Rostedt  * Making the ring buffer lockless makes things tricky.
71677ae365eSSteven Rostedt  * Although writes only happen on the CPU that they are on,
71777ae365eSSteven Rostedt  * and they only need to worry about interrupts. Reads can
71877ae365eSSteven Rostedt  * happen on any CPU.
71977ae365eSSteven Rostedt  *
72077ae365eSSteven Rostedt  * The reader page is always off the ring buffer, but when the
72177ae365eSSteven Rostedt  * reader finishes with a page, it needs to swap its page with
72277ae365eSSteven Rostedt  * a new one from the buffer. The reader needs to take from
72377ae365eSSteven Rostedt  * the head (writes go to the tail). But if a writer is in overwrite
72477ae365eSSteven Rostedt  * mode and wraps, it must push the head page forward.
72577ae365eSSteven Rostedt  *
72677ae365eSSteven Rostedt  * Here lies the problem.
72777ae365eSSteven Rostedt  *
72877ae365eSSteven Rostedt  * The reader must be careful to replace only the head page, and
72977ae365eSSteven Rostedt  * not another one. As described at the top of the file in the
73077ae365eSSteven Rostedt  * ASCII art, the reader sets its old page to point to the next
73177ae365eSSteven Rostedt  * page after head. It then sets the page after head to point to
73277ae365eSSteven Rostedt  * the old reader page. But if the writer moves the head page
73377ae365eSSteven Rostedt  * during this operation, the reader could end up with the tail.
73477ae365eSSteven Rostedt  *
73577ae365eSSteven Rostedt  * We use cmpxchg to help prevent this race. We also do something
73677ae365eSSteven Rostedt  * special with the page before head. We set the LSB to 1.
73777ae365eSSteven Rostedt  *
73877ae365eSSteven Rostedt  * When the writer must push the page forward, it will clear the
73977ae365eSSteven Rostedt  * bit that points to the head page, move the head, and then set
74077ae365eSSteven Rostedt  * the bit that points to the new head page.
74177ae365eSSteven Rostedt  *
74277ae365eSSteven Rostedt  * We also don't want an interrupt coming in and moving the head
74377ae365eSSteven Rostedt  * page on another writer. Thus we use the second LSB to catch
74477ae365eSSteven Rostedt  * that too. Thus:
74577ae365eSSteven Rostedt  *
74677ae365eSSteven Rostedt  * head->list->prev->next        bit 1          bit 0
74777ae365eSSteven Rostedt  *                              -------        -------
74877ae365eSSteven Rostedt  * Normal page                     0              0
74977ae365eSSteven Rostedt  * Points to head page             0              1
75077ae365eSSteven Rostedt  * New head page                   1              0
75177ae365eSSteven Rostedt  *
75277ae365eSSteven Rostedt  * Note we can not trust the prev pointer of the head page, because:
75377ae365eSSteven Rostedt  *
75477ae365eSSteven Rostedt  * +----+       +-----+        +-----+
75577ae365eSSteven Rostedt  * |    |------>|  T  |---X--->|  N  |
75677ae365eSSteven Rostedt  * |    |<------|     |        |     |
75777ae365eSSteven Rostedt  * +----+       +-----+        +-----+
75877ae365eSSteven Rostedt  *   ^                           ^ |
75977ae365eSSteven Rostedt  *   |          +-----+          | |
76077ae365eSSteven Rostedt  *   +----------|  R  |----------+ |
76177ae365eSSteven Rostedt  *              |     |<-----------+
76277ae365eSSteven Rostedt  *              +-----+
76377ae365eSSteven Rostedt  *
76477ae365eSSteven Rostedt  * Key:  ---X-->  HEAD flag set in pointer
76577ae365eSSteven Rostedt  *         T      Tail page
76677ae365eSSteven Rostedt  *         R      Reader page
76777ae365eSSteven Rostedt  *         N      Next page
76877ae365eSSteven Rostedt  *
76977ae365eSSteven Rostedt  * (see __rb_reserve_next() to see where this happens)
77077ae365eSSteven Rostedt  *
77177ae365eSSteven Rostedt  *  What the above shows is that the reader just swapped out
77277ae365eSSteven Rostedt  *  the reader page with a page in the buffer, but before it
77377ae365eSSteven Rostedt  *  could make the new header point back to the new page added
77477ae365eSSteven Rostedt  *  it was preempted by a writer. The writer moved forward onto
77577ae365eSSteven Rostedt  *  the new page added by the reader and is about to move forward
77677ae365eSSteven Rostedt  *  again.
77777ae365eSSteven Rostedt  *
77877ae365eSSteven Rostedt  *  You can see, it is legitimate for the previous pointer of
77977ae365eSSteven Rostedt  *  the head (or any page) not to point back to itself. But only
78077ae365eSSteven Rostedt  *  temporarially.
78177ae365eSSteven Rostedt  */
78277ae365eSSteven Rostedt 
78377ae365eSSteven Rostedt #define RB_PAGE_NORMAL		0UL
78477ae365eSSteven Rostedt #define RB_PAGE_HEAD		1UL
78577ae365eSSteven Rostedt #define RB_PAGE_UPDATE		2UL
78677ae365eSSteven Rostedt 
78777ae365eSSteven Rostedt 
78877ae365eSSteven Rostedt #define RB_FLAG_MASK		3UL
78977ae365eSSteven Rostedt 
79077ae365eSSteven Rostedt /* PAGE_MOVED is not part of the mask */
79177ae365eSSteven Rostedt #define RB_PAGE_MOVED		4UL
79277ae365eSSteven Rostedt 
79377ae365eSSteven Rostedt /*
79477ae365eSSteven Rostedt  * rb_list_head - remove any bit
79577ae365eSSteven Rostedt  */
79677ae365eSSteven Rostedt static struct list_head *rb_list_head(struct list_head *list)
79777ae365eSSteven Rostedt {
79877ae365eSSteven Rostedt 	unsigned long val = (unsigned long)list;
79977ae365eSSteven Rostedt 
80077ae365eSSteven Rostedt 	return (struct list_head *)(val & ~RB_FLAG_MASK);
80177ae365eSSteven Rostedt }
80277ae365eSSteven Rostedt 
80377ae365eSSteven Rostedt /*
8046d3f1e12SJiri Olsa  * rb_is_head_page - test if the given page is the head page
80577ae365eSSteven Rostedt  *
80677ae365eSSteven Rostedt  * Because the reader may move the head_page pointer, we can
80777ae365eSSteven Rostedt  * not trust what the head page is (it may be pointing to
80877ae365eSSteven Rostedt  * the reader page). But if the next page is a header page,
80977ae365eSSteven Rostedt  * its flags will be non zero.
81077ae365eSSteven Rostedt  */
81142b16b3fSJesper Juhl static inline int
81277ae365eSSteven Rostedt rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
81377ae365eSSteven Rostedt 		struct buffer_page *page, struct list_head *list)
81477ae365eSSteven Rostedt {
81577ae365eSSteven Rostedt 	unsigned long val;
81677ae365eSSteven Rostedt 
81777ae365eSSteven Rostedt 	val = (unsigned long)list->next;
81877ae365eSSteven Rostedt 
81977ae365eSSteven Rostedt 	if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
82077ae365eSSteven Rostedt 		return RB_PAGE_MOVED;
82177ae365eSSteven Rostedt 
82277ae365eSSteven Rostedt 	return val & RB_FLAG_MASK;
82377ae365eSSteven Rostedt }
82477ae365eSSteven Rostedt 
82577ae365eSSteven Rostedt /*
82677ae365eSSteven Rostedt  * rb_is_reader_page
82777ae365eSSteven Rostedt  *
82877ae365eSSteven Rostedt  * The unique thing about the reader page, is that, if the
82977ae365eSSteven Rostedt  * writer is ever on it, the previous pointer never points
83077ae365eSSteven Rostedt  * back to the reader page.
83177ae365eSSteven Rostedt  */
83206ca3209SYaowei Bai static bool rb_is_reader_page(struct buffer_page *page)
83377ae365eSSteven Rostedt {
83477ae365eSSteven Rostedt 	struct list_head *list = page->list.prev;
83577ae365eSSteven Rostedt 
83677ae365eSSteven Rostedt 	return rb_list_head(list->next) != &page->list;
83777ae365eSSteven Rostedt }
83877ae365eSSteven Rostedt 
83977ae365eSSteven Rostedt /*
84077ae365eSSteven Rostedt  * rb_set_list_to_head - set a list_head to be pointing to head.
84177ae365eSSteven Rostedt  */
84277ae365eSSteven Rostedt static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
84377ae365eSSteven Rostedt 				struct list_head *list)
84477ae365eSSteven Rostedt {
84577ae365eSSteven Rostedt 	unsigned long *ptr;
84677ae365eSSteven Rostedt 
84777ae365eSSteven Rostedt 	ptr = (unsigned long *)&list->next;
84877ae365eSSteven Rostedt 	*ptr |= RB_PAGE_HEAD;
84977ae365eSSteven Rostedt 	*ptr &= ~RB_PAGE_UPDATE;
85077ae365eSSteven Rostedt }
85177ae365eSSteven Rostedt 
85277ae365eSSteven Rostedt /*
85377ae365eSSteven Rostedt  * rb_head_page_activate - sets up head page
85477ae365eSSteven Rostedt  */
85577ae365eSSteven Rostedt static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
85677ae365eSSteven Rostedt {
85777ae365eSSteven Rostedt 	struct buffer_page *head;
85877ae365eSSteven Rostedt 
85977ae365eSSteven Rostedt 	head = cpu_buffer->head_page;
86077ae365eSSteven Rostedt 	if (!head)
86177ae365eSSteven Rostedt 		return;
86277ae365eSSteven Rostedt 
86377ae365eSSteven Rostedt 	/*
86477ae365eSSteven Rostedt 	 * Set the previous list pointer to have the HEAD flag.
86577ae365eSSteven Rostedt 	 */
86677ae365eSSteven Rostedt 	rb_set_list_to_head(cpu_buffer, head->list.prev);
86777ae365eSSteven Rostedt }
86877ae365eSSteven Rostedt 
86977ae365eSSteven Rostedt static void rb_list_head_clear(struct list_head *list)
87077ae365eSSteven Rostedt {
87177ae365eSSteven Rostedt 	unsigned long *ptr = (unsigned long *)&list->next;
87277ae365eSSteven Rostedt 
87377ae365eSSteven Rostedt 	*ptr &= ~RB_FLAG_MASK;
87477ae365eSSteven Rostedt }
87577ae365eSSteven Rostedt 
87677ae365eSSteven Rostedt /*
87777ae365eSSteven Rostedt  * rb_head_page_dactivate - clears head page ptr (for free list)
87877ae365eSSteven Rostedt  */
87977ae365eSSteven Rostedt static void
88077ae365eSSteven Rostedt rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
88177ae365eSSteven Rostedt {
88277ae365eSSteven Rostedt 	struct list_head *hd;
88377ae365eSSteven Rostedt 
88477ae365eSSteven Rostedt 	/* Go through the whole list and clear any pointers found. */
88577ae365eSSteven Rostedt 	rb_list_head_clear(cpu_buffer->pages);
88677ae365eSSteven Rostedt 
88777ae365eSSteven Rostedt 	list_for_each(hd, cpu_buffer->pages)
88877ae365eSSteven Rostedt 		rb_list_head_clear(hd);
88977ae365eSSteven Rostedt }
89077ae365eSSteven Rostedt 
89177ae365eSSteven Rostedt static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
89277ae365eSSteven Rostedt 			    struct buffer_page *head,
89377ae365eSSteven Rostedt 			    struct buffer_page *prev,
89477ae365eSSteven Rostedt 			    int old_flag, int new_flag)
89577ae365eSSteven Rostedt {
89677ae365eSSteven Rostedt 	struct list_head *list;
89777ae365eSSteven Rostedt 	unsigned long val = (unsigned long)&head->list;
89877ae365eSSteven Rostedt 	unsigned long ret;
89977ae365eSSteven Rostedt 
90077ae365eSSteven Rostedt 	list = &prev->list;
90177ae365eSSteven Rostedt 
90277ae365eSSteven Rostedt 	val &= ~RB_FLAG_MASK;
90377ae365eSSteven Rostedt 
90408a40816SSteven Rostedt 	ret = cmpxchg((unsigned long *)&list->next,
90577ae365eSSteven Rostedt 		      val | old_flag, val | new_flag);
90677ae365eSSteven Rostedt 
90777ae365eSSteven Rostedt 	/* check if the reader took the page */
90877ae365eSSteven Rostedt 	if ((ret & ~RB_FLAG_MASK) != val)
90977ae365eSSteven Rostedt 		return RB_PAGE_MOVED;
91077ae365eSSteven Rostedt 
91177ae365eSSteven Rostedt 	return ret & RB_FLAG_MASK;
91277ae365eSSteven Rostedt }
91377ae365eSSteven Rostedt 
91477ae365eSSteven Rostedt static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
91577ae365eSSteven Rostedt 				   struct buffer_page *head,
91677ae365eSSteven Rostedt 				   struct buffer_page *prev,
91777ae365eSSteven Rostedt 				   int old_flag)
91877ae365eSSteven Rostedt {
91977ae365eSSteven Rostedt 	return rb_head_page_set(cpu_buffer, head, prev,
92077ae365eSSteven Rostedt 				old_flag, RB_PAGE_UPDATE);
92177ae365eSSteven Rostedt }
92277ae365eSSteven Rostedt 
92377ae365eSSteven Rostedt static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
92477ae365eSSteven Rostedt 				 struct buffer_page *head,
92577ae365eSSteven Rostedt 				 struct buffer_page *prev,
92677ae365eSSteven Rostedt 				 int old_flag)
92777ae365eSSteven Rostedt {
92877ae365eSSteven Rostedt 	return rb_head_page_set(cpu_buffer, head, prev,
92977ae365eSSteven Rostedt 				old_flag, RB_PAGE_HEAD);
93077ae365eSSteven Rostedt }
93177ae365eSSteven Rostedt 
93277ae365eSSteven Rostedt static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
93377ae365eSSteven Rostedt 				   struct buffer_page *head,
93477ae365eSSteven Rostedt 				   struct buffer_page *prev,
93577ae365eSSteven Rostedt 				   int old_flag)
93677ae365eSSteven Rostedt {
93777ae365eSSteven Rostedt 	return rb_head_page_set(cpu_buffer, head, prev,
93877ae365eSSteven Rostedt 				old_flag, RB_PAGE_NORMAL);
93977ae365eSSteven Rostedt }
94077ae365eSSteven Rostedt 
94177ae365eSSteven Rostedt static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
94277ae365eSSteven Rostedt 			       struct buffer_page **bpage)
94377ae365eSSteven Rostedt {
94477ae365eSSteven Rostedt 	struct list_head *p = rb_list_head((*bpage)->list.next);
94577ae365eSSteven Rostedt 
94677ae365eSSteven Rostedt 	*bpage = list_entry(p, struct buffer_page, list);
94777ae365eSSteven Rostedt }
94877ae365eSSteven Rostedt 
94977ae365eSSteven Rostedt static struct buffer_page *
95077ae365eSSteven Rostedt rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
95177ae365eSSteven Rostedt {
95277ae365eSSteven Rostedt 	struct buffer_page *head;
95377ae365eSSteven Rostedt 	struct buffer_page *page;
95477ae365eSSteven Rostedt 	struct list_head *list;
95577ae365eSSteven Rostedt 	int i;
95677ae365eSSteven Rostedt 
95777ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
95877ae365eSSteven Rostedt 		return NULL;
95977ae365eSSteven Rostedt 
96077ae365eSSteven Rostedt 	/* sanity check */
96177ae365eSSteven Rostedt 	list = cpu_buffer->pages;
96277ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
96377ae365eSSteven Rostedt 		return NULL;
96477ae365eSSteven Rostedt 
96577ae365eSSteven Rostedt 	page = head = cpu_buffer->head_page;
96677ae365eSSteven Rostedt 	/*
96777ae365eSSteven Rostedt 	 * It is possible that the writer moves the header behind
96877ae365eSSteven Rostedt 	 * where we started, and we miss in one loop.
96977ae365eSSteven Rostedt 	 * A second loop should grab the header, but we'll do
97077ae365eSSteven Rostedt 	 * three loops just because I'm paranoid.
97177ae365eSSteven Rostedt 	 */
97277ae365eSSteven Rostedt 	for (i = 0; i < 3; i++) {
97377ae365eSSteven Rostedt 		do {
97477ae365eSSteven Rostedt 			if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
97577ae365eSSteven Rostedt 				cpu_buffer->head_page = page;
97677ae365eSSteven Rostedt 				return page;
97777ae365eSSteven Rostedt 			}
97877ae365eSSteven Rostedt 			rb_inc_page(cpu_buffer, &page);
97977ae365eSSteven Rostedt 		} while (page != head);
98077ae365eSSteven Rostedt 	}
98177ae365eSSteven Rostedt 
98277ae365eSSteven Rostedt 	RB_WARN_ON(cpu_buffer, 1);
98377ae365eSSteven Rostedt 
98477ae365eSSteven Rostedt 	return NULL;
98577ae365eSSteven Rostedt }
98677ae365eSSteven Rostedt 
98777ae365eSSteven Rostedt static int rb_head_page_replace(struct buffer_page *old,
98877ae365eSSteven Rostedt 				struct buffer_page *new)
98977ae365eSSteven Rostedt {
99077ae365eSSteven Rostedt 	unsigned long *ptr = (unsigned long *)&old->list.prev->next;
99177ae365eSSteven Rostedt 	unsigned long val;
99277ae365eSSteven Rostedt 	unsigned long ret;
99377ae365eSSteven Rostedt 
99477ae365eSSteven Rostedt 	val = *ptr & ~RB_FLAG_MASK;
99577ae365eSSteven Rostedt 	val |= RB_PAGE_HEAD;
99677ae365eSSteven Rostedt 
99708a40816SSteven Rostedt 	ret = cmpxchg(ptr, val, (unsigned long)&new->list);
99877ae365eSSteven Rostedt 
99977ae365eSSteven Rostedt 	return ret == val;
100077ae365eSSteven Rostedt }
100177ae365eSSteven Rostedt 
100277ae365eSSteven Rostedt /*
100377ae365eSSteven Rostedt  * rb_tail_page_update - move the tail page forward
100477ae365eSSteven Rostedt  */
100570004986SSteven Rostedt (Red Hat) static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
100677ae365eSSteven Rostedt 			       struct buffer_page *tail_page,
100777ae365eSSteven Rostedt 			       struct buffer_page *next_page)
100877ae365eSSteven Rostedt {
100977ae365eSSteven Rostedt 	unsigned long old_entries;
101077ae365eSSteven Rostedt 	unsigned long old_write;
101177ae365eSSteven Rostedt 
101277ae365eSSteven Rostedt 	/*
101377ae365eSSteven Rostedt 	 * The tail page now needs to be moved forward.
101477ae365eSSteven Rostedt 	 *
101577ae365eSSteven Rostedt 	 * We need to reset the tail page, but without messing
101677ae365eSSteven Rostedt 	 * with possible erasing of data brought in by interrupts
101777ae365eSSteven Rostedt 	 * that have moved the tail page and are currently on it.
101877ae365eSSteven Rostedt 	 *
101977ae365eSSteven Rostedt 	 * We add a counter to the write field to denote this.
102077ae365eSSteven Rostedt 	 */
102177ae365eSSteven Rostedt 	old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
102277ae365eSSteven Rostedt 	old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
102377ae365eSSteven Rostedt 
102477ae365eSSteven Rostedt 	/*
102577ae365eSSteven Rostedt 	 * Just make sure we have seen our old_write and synchronize
102677ae365eSSteven Rostedt 	 * with any interrupts that come in.
102777ae365eSSteven Rostedt 	 */
102877ae365eSSteven Rostedt 	barrier();
102977ae365eSSteven Rostedt 
103077ae365eSSteven Rostedt 	/*
103177ae365eSSteven Rostedt 	 * If the tail page is still the same as what we think
103277ae365eSSteven Rostedt 	 * it is, then it is up to us to update the tail
103377ae365eSSteven Rostedt 	 * pointer.
103477ae365eSSteven Rostedt 	 */
10358573636eSSteven Rostedt (Red Hat) 	if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
103677ae365eSSteven Rostedt 		/* Zero the write counter */
103777ae365eSSteven Rostedt 		unsigned long val = old_write & ~RB_WRITE_MASK;
103877ae365eSSteven Rostedt 		unsigned long eval = old_entries & ~RB_WRITE_MASK;
103977ae365eSSteven Rostedt 
104077ae365eSSteven Rostedt 		/*
104177ae365eSSteven Rostedt 		 * This will only succeed if an interrupt did
104277ae365eSSteven Rostedt 		 * not come in and change it. In which case, we
104377ae365eSSteven Rostedt 		 * do not want to modify it.
1044da706d8bSLai Jiangshan 		 *
1045da706d8bSLai Jiangshan 		 * We add (void) to let the compiler know that we do not care
1046da706d8bSLai Jiangshan 		 * about the return value of these functions. We use the
1047da706d8bSLai Jiangshan 		 * cmpxchg to only update if an interrupt did not already
1048da706d8bSLai Jiangshan 		 * do it for us. If the cmpxchg fails, we don't care.
104977ae365eSSteven Rostedt 		 */
1050da706d8bSLai Jiangshan 		(void)local_cmpxchg(&next_page->write, old_write, val);
1051da706d8bSLai Jiangshan 		(void)local_cmpxchg(&next_page->entries, old_entries, eval);
105277ae365eSSteven Rostedt 
105377ae365eSSteven Rostedt 		/*
105477ae365eSSteven Rostedt 		 * No need to worry about races with clearing out the commit.
105577ae365eSSteven Rostedt 		 * it only can increment when a commit takes place. But that
105677ae365eSSteven Rostedt 		 * only happens in the outer most nested commit.
105777ae365eSSteven Rostedt 		 */
105877ae365eSSteven Rostedt 		local_set(&next_page->page->commit, 0);
105977ae365eSSteven Rostedt 
106070004986SSteven Rostedt (Red Hat) 		/* Again, either we update tail_page or an interrupt does */
106170004986SSteven Rostedt (Red Hat) 		(void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
106277ae365eSSteven Rostedt 	}
106377ae365eSSteven Rostedt }
106477ae365eSSteven Rostedt 
106577ae365eSSteven Rostedt static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
106677ae365eSSteven Rostedt 			  struct buffer_page *bpage)
106777ae365eSSteven Rostedt {
106877ae365eSSteven Rostedt 	unsigned long val = (unsigned long)bpage;
106977ae365eSSteven Rostedt 
107077ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
107177ae365eSSteven Rostedt 		return 1;
107277ae365eSSteven Rostedt 
107377ae365eSSteven Rostedt 	return 0;
107477ae365eSSteven Rostedt }
107577ae365eSSteven Rostedt 
107677ae365eSSteven Rostedt /**
107777ae365eSSteven Rostedt  * rb_check_list - make sure a pointer to a list has the last bits zero
107877ae365eSSteven Rostedt  */
107977ae365eSSteven Rostedt static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
108077ae365eSSteven Rostedt 			 struct list_head *list)
108177ae365eSSteven Rostedt {
108277ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
108377ae365eSSteven Rostedt 		return 1;
108477ae365eSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
108577ae365eSSteven Rostedt 		return 1;
108677ae365eSSteven Rostedt 	return 0;
108777ae365eSSteven Rostedt }
108877ae365eSSteven Rostedt 
10897a8e76a3SSteven Rostedt /**
1090d611851bSzhangwei(Jovi)  * rb_check_pages - integrity check of buffer pages
10917a8e76a3SSteven Rostedt  * @cpu_buffer: CPU buffer with pages to test
10927a8e76a3SSteven Rostedt  *
1093c3706f00SWenji Huang  * As a safety measure we check to make sure the data pages have not
10947a8e76a3SSteven Rostedt  * been corrupted.
10957a8e76a3SSteven Rostedt  */
10967a8e76a3SSteven Rostedt static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
10977a8e76a3SSteven Rostedt {
10983adc54faSSteven Rostedt 	struct list_head *head = cpu_buffer->pages;
1099044fa782SSteven Rostedt 	struct buffer_page *bpage, *tmp;
11007a8e76a3SSteven Rostedt 
1101308f7eebSSteven Rostedt 	/* Reset the head page if it exists */
1102308f7eebSSteven Rostedt 	if (cpu_buffer->head_page)
1103308f7eebSSteven Rostedt 		rb_set_head_page(cpu_buffer);
1104308f7eebSSteven Rostedt 
110577ae365eSSteven Rostedt 	rb_head_page_deactivate(cpu_buffer);
110677ae365eSSteven Rostedt 
11073e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
11083e89c7bbSSteven Rostedt 		return -1;
11093e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
11103e89c7bbSSteven Rostedt 		return -1;
11117a8e76a3SSteven Rostedt 
111277ae365eSSteven Rostedt 	if (rb_check_list(cpu_buffer, head))
111377ae365eSSteven Rostedt 		return -1;
111477ae365eSSteven Rostedt 
1115044fa782SSteven Rostedt 	list_for_each_entry_safe(bpage, tmp, head, list) {
11163e89c7bbSSteven Rostedt 		if (RB_WARN_ON(cpu_buffer,
1117044fa782SSteven Rostedt 			       bpage->list.next->prev != &bpage->list))
11183e89c7bbSSteven Rostedt 			return -1;
11193e89c7bbSSteven Rostedt 		if (RB_WARN_ON(cpu_buffer,
1120044fa782SSteven Rostedt 			       bpage->list.prev->next != &bpage->list))
11213e89c7bbSSteven Rostedt 			return -1;
112277ae365eSSteven Rostedt 		if (rb_check_list(cpu_buffer, &bpage->list))
112377ae365eSSteven Rostedt 			return -1;
11247a8e76a3SSteven Rostedt 	}
11257a8e76a3SSteven Rostedt 
112677ae365eSSteven Rostedt 	rb_head_page_activate(cpu_buffer);
112777ae365eSSteven Rostedt 
11287a8e76a3SSteven Rostedt 	return 0;
11297a8e76a3SSteven Rostedt }
11307a8e76a3SSteven Rostedt 
11319b94a8fbSSteven Rostedt (Red Hat) static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
11327a8e76a3SSteven Rostedt {
1133044fa782SSteven Rostedt 	struct buffer_page *bpage, *tmp;
11349b94a8fbSSteven Rostedt (Red Hat) 	long i;
11353adc54faSSteven Rostedt 
11367a8e76a3SSteven Rostedt 	for (i = 0; i < nr_pages; i++) {
11377ea59064SVaibhav Nagarnaik 		struct page *page;
1138d7ec4bfeSVaibhav Nagarnaik 		/*
1139d7ec4bfeSVaibhav Nagarnaik 		 * __GFP_NORETRY flag makes sure that the allocation fails
1140d7ec4bfeSVaibhav Nagarnaik 		 * gracefully without invoking oom-killer and the system is
1141d7ec4bfeSVaibhav Nagarnaik 		 * not destabilized.
1142d7ec4bfeSVaibhav Nagarnaik 		 */
1143044fa782SSteven Rostedt 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1144d7ec4bfeSVaibhav Nagarnaik 				    GFP_KERNEL | __GFP_NORETRY,
1145438ced17SVaibhav Nagarnaik 				    cpu_to_node(cpu));
1146044fa782SSteven Rostedt 		if (!bpage)
1147e4c2ce82SSteven Rostedt 			goto free_pages;
114877ae365eSSteven Rostedt 
1149438ced17SVaibhav Nagarnaik 		list_add(&bpage->list, pages);
115077ae365eSSteven Rostedt 
1151438ced17SVaibhav Nagarnaik 		page = alloc_pages_node(cpu_to_node(cpu),
1152d7ec4bfeSVaibhav Nagarnaik 					GFP_KERNEL | __GFP_NORETRY, 0);
11537ea59064SVaibhav Nagarnaik 		if (!page)
11547a8e76a3SSteven Rostedt 			goto free_pages;
11557ea59064SVaibhav Nagarnaik 		bpage->page = page_address(page);
1156044fa782SSteven Rostedt 		rb_init_page(bpage->page);
11577a8e76a3SSteven Rostedt 	}
11587a8e76a3SSteven Rostedt 
1159438ced17SVaibhav Nagarnaik 	return 0;
1160438ced17SVaibhav Nagarnaik 
1161438ced17SVaibhav Nagarnaik free_pages:
1162438ced17SVaibhav Nagarnaik 	list_for_each_entry_safe(bpage, tmp, pages, list) {
1163438ced17SVaibhav Nagarnaik 		list_del_init(&bpage->list);
1164438ced17SVaibhav Nagarnaik 		free_buffer_page(bpage);
1165438ced17SVaibhav Nagarnaik 	}
1166438ced17SVaibhav Nagarnaik 
1167438ced17SVaibhav Nagarnaik 	return -ENOMEM;
1168438ced17SVaibhav Nagarnaik }
1169438ced17SVaibhav Nagarnaik 
1170438ced17SVaibhav Nagarnaik static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
11719b94a8fbSSteven Rostedt (Red Hat) 			     unsigned long nr_pages)
1172438ced17SVaibhav Nagarnaik {
1173438ced17SVaibhav Nagarnaik 	LIST_HEAD(pages);
1174438ced17SVaibhav Nagarnaik 
1175438ced17SVaibhav Nagarnaik 	WARN_ON(!nr_pages);
1176438ced17SVaibhav Nagarnaik 
1177438ced17SVaibhav Nagarnaik 	if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1178438ced17SVaibhav Nagarnaik 		return -ENOMEM;
1179438ced17SVaibhav Nagarnaik 
11803adc54faSSteven Rostedt 	/*
11813adc54faSSteven Rostedt 	 * The ring buffer page list is a circular list that does not
11823adc54faSSteven Rostedt 	 * start and end with a list head. All page list items point to
11833adc54faSSteven Rostedt 	 * other pages.
11843adc54faSSteven Rostedt 	 */
11853adc54faSSteven Rostedt 	cpu_buffer->pages = pages.next;
11863adc54faSSteven Rostedt 	list_del(&pages);
11877a8e76a3SSteven Rostedt 
1188438ced17SVaibhav Nagarnaik 	cpu_buffer->nr_pages = nr_pages;
1189438ced17SVaibhav Nagarnaik 
11907a8e76a3SSteven Rostedt 	rb_check_pages(cpu_buffer);
11917a8e76a3SSteven Rostedt 
11927a8e76a3SSteven Rostedt 	return 0;
11937a8e76a3SSteven Rostedt }
11947a8e76a3SSteven Rostedt 
11957a8e76a3SSteven Rostedt static struct ring_buffer_per_cpu *
11969b94a8fbSSteven Rostedt (Red Hat) rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu)
11977a8e76a3SSteven Rostedt {
11987a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
1199044fa782SSteven Rostedt 	struct buffer_page *bpage;
12007ea59064SVaibhav Nagarnaik 	struct page *page;
12017a8e76a3SSteven Rostedt 	int ret;
12027a8e76a3SSteven Rostedt 
12037a8e76a3SSteven Rostedt 	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
12047a8e76a3SSteven Rostedt 				  GFP_KERNEL, cpu_to_node(cpu));
12057a8e76a3SSteven Rostedt 	if (!cpu_buffer)
12067a8e76a3SSteven Rostedt 		return NULL;
12077a8e76a3SSteven Rostedt 
12087a8e76a3SSteven Rostedt 	cpu_buffer->cpu = cpu;
12097a8e76a3SSteven Rostedt 	cpu_buffer->buffer = buffer;
12105389f6faSThomas Gleixner 	raw_spin_lock_init(&cpu_buffer->reader_lock);
12111f8a6a10SPeter Zijlstra 	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1212edc35bd7SThomas Gleixner 	cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
121383f40318SVaibhav Nagarnaik 	INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
121405fdd70dSVaibhav Nagarnaik 	init_completion(&cpu_buffer->update_done);
121515693458SSteven Rostedt (Red Hat) 	init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1216f1dc6725SSteven Rostedt (Red Hat) 	init_waitqueue_head(&cpu_buffer->irq_work.waiters);
12171e0d6714SSteven Rostedt (Red Hat) 	init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
12187a8e76a3SSteven Rostedt 
1219044fa782SSteven Rostedt 	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1220e4c2ce82SSteven Rostedt 			    GFP_KERNEL, cpu_to_node(cpu));
1221044fa782SSteven Rostedt 	if (!bpage)
1222e4c2ce82SSteven Rostedt 		goto fail_free_buffer;
1223e4c2ce82SSteven Rostedt 
122477ae365eSSteven Rostedt 	rb_check_bpage(cpu_buffer, bpage);
122577ae365eSSteven Rostedt 
1226044fa782SSteven Rostedt 	cpu_buffer->reader_page = bpage;
12277ea59064SVaibhav Nagarnaik 	page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
12287ea59064SVaibhav Nagarnaik 	if (!page)
1229e4c2ce82SSteven Rostedt 		goto fail_free_reader;
12307ea59064SVaibhav Nagarnaik 	bpage->page = page_address(page);
1231044fa782SSteven Rostedt 	rb_init_page(bpage->page);
1232e4c2ce82SSteven Rostedt 
1233d769041fSSteven Rostedt 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
123444b99462SVaibhav Nagarnaik 	INIT_LIST_HEAD(&cpu_buffer->new_pages);
1235d769041fSSteven Rostedt 
1236438ced17SVaibhav Nagarnaik 	ret = rb_allocate_pages(cpu_buffer, nr_pages);
12377a8e76a3SSteven Rostedt 	if (ret < 0)
1238d769041fSSteven Rostedt 		goto fail_free_reader;
12397a8e76a3SSteven Rostedt 
12407a8e76a3SSteven Rostedt 	cpu_buffer->head_page
12413adc54faSSteven Rostedt 		= list_entry(cpu_buffer->pages, struct buffer_page, list);
1242bf41a158SSteven Rostedt 	cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
12437a8e76a3SSteven Rostedt 
124477ae365eSSteven Rostedt 	rb_head_page_activate(cpu_buffer);
124577ae365eSSteven Rostedt 
12467a8e76a3SSteven Rostedt 	return cpu_buffer;
12477a8e76a3SSteven Rostedt 
1248d769041fSSteven Rostedt  fail_free_reader:
1249d769041fSSteven Rostedt 	free_buffer_page(cpu_buffer->reader_page);
1250d769041fSSteven Rostedt 
12517a8e76a3SSteven Rostedt  fail_free_buffer:
12527a8e76a3SSteven Rostedt 	kfree(cpu_buffer);
12537a8e76a3SSteven Rostedt 	return NULL;
12547a8e76a3SSteven Rostedt }
12557a8e76a3SSteven Rostedt 
12567a8e76a3SSteven Rostedt static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
12577a8e76a3SSteven Rostedt {
12583adc54faSSteven Rostedt 	struct list_head *head = cpu_buffer->pages;
1259044fa782SSteven Rostedt 	struct buffer_page *bpage, *tmp;
12607a8e76a3SSteven Rostedt 
1261d769041fSSteven Rostedt 	free_buffer_page(cpu_buffer->reader_page);
1262d769041fSSteven Rostedt 
126377ae365eSSteven Rostedt 	rb_head_page_deactivate(cpu_buffer);
126477ae365eSSteven Rostedt 
12653adc54faSSteven Rostedt 	if (head) {
1266044fa782SSteven Rostedt 		list_for_each_entry_safe(bpage, tmp, head, list) {
1267044fa782SSteven Rostedt 			list_del_init(&bpage->list);
1268044fa782SSteven Rostedt 			free_buffer_page(bpage);
12697a8e76a3SSteven Rostedt 		}
12703adc54faSSteven Rostedt 		bpage = list_entry(head, struct buffer_page, list);
12713adc54faSSteven Rostedt 		free_buffer_page(bpage);
12723adc54faSSteven Rostedt 	}
12733adc54faSSteven Rostedt 
12747a8e76a3SSteven Rostedt 	kfree(cpu_buffer);
12757a8e76a3SSteven Rostedt }
12767a8e76a3SSteven Rostedt 
127759222efeSSteven Rostedt #ifdef CONFIG_HOTPLUG_CPU
127809c9e84dSFrederic Weisbecker static int rb_cpu_notify(struct notifier_block *self,
1279554f786eSSteven Rostedt 			 unsigned long action, void *hcpu);
1280554f786eSSteven Rostedt #endif
1281554f786eSSteven Rostedt 
12827a8e76a3SSteven Rostedt /**
1283d611851bSzhangwei(Jovi)  * __ring_buffer_alloc - allocate a new ring_buffer
128468814b58SRobert Richter  * @size: the size in bytes per cpu that is needed.
12857a8e76a3SSteven Rostedt  * @flags: attributes to set for the ring buffer.
12867a8e76a3SSteven Rostedt  *
12877a8e76a3SSteven Rostedt  * Currently the only flag that is available is the RB_FL_OVERWRITE
12887a8e76a3SSteven Rostedt  * flag. This flag means that the buffer will overwrite old data
12897a8e76a3SSteven Rostedt  * when the buffer wraps. If this flag is not set, the buffer will
12907a8e76a3SSteven Rostedt  * drop data when the tail hits the head.
12917a8e76a3SSteven Rostedt  */
12921f8a6a10SPeter Zijlstra struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
12931f8a6a10SPeter Zijlstra 					struct lock_class_key *key)
12947a8e76a3SSteven Rostedt {
12957a8e76a3SSteven Rostedt 	struct ring_buffer *buffer;
12969b94a8fbSSteven Rostedt (Red Hat) 	long nr_pages;
12977a8e76a3SSteven Rostedt 	int bsize;
12989b94a8fbSSteven Rostedt (Red Hat) 	int cpu;
12997a8e76a3SSteven Rostedt 
13007a8e76a3SSteven Rostedt 	/* keep it in its own cache line */
13017a8e76a3SSteven Rostedt 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
13027a8e76a3SSteven Rostedt 			 GFP_KERNEL);
13037a8e76a3SSteven Rostedt 	if (!buffer)
13047a8e76a3SSteven Rostedt 		return NULL;
13057a8e76a3SSteven Rostedt 
13069e01c1b7SRusty Russell 	if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
13079e01c1b7SRusty Russell 		goto fail_free_buffer;
13089e01c1b7SRusty Russell 
1309438ced17SVaibhav Nagarnaik 	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
13107a8e76a3SSteven Rostedt 	buffer->flags = flags;
131137886f6aSSteven Rostedt 	buffer->clock = trace_clock_local;
13121f8a6a10SPeter Zijlstra 	buffer->reader_lock_key = key;
13137a8e76a3SSteven Rostedt 
131415693458SSteven Rostedt (Red Hat) 	init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1315f1dc6725SSteven Rostedt (Red Hat) 	init_waitqueue_head(&buffer->irq_work.waiters);
131615693458SSteven Rostedt (Red Hat) 
13177a8e76a3SSteven Rostedt 	/* need at least two pages */
1318438ced17SVaibhav Nagarnaik 	if (nr_pages < 2)
1319438ced17SVaibhav Nagarnaik 		nr_pages = 2;
13207a8e76a3SSteven Rostedt 
13213bf832ceSFrederic Weisbecker 	/*
13223bf832ceSFrederic Weisbecker 	 * In case of non-hotplug cpu, if the ring-buffer is allocated
13233bf832ceSFrederic Weisbecker 	 * in early initcall, it will not be notified of secondary cpus.
13243bf832ceSFrederic Weisbecker 	 * In that off case, we need to allocate for all possible cpus.
13253bf832ceSFrederic Weisbecker 	 */
13263bf832ceSFrederic Weisbecker #ifdef CONFIG_HOTPLUG_CPU
1327d39ad278SSrivatsa S. Bhat 	cpu_notifier_register_begin();
1328554f786eSSteven Rostedt 	cpumask_copy(buffer->cpumask, cpu_online_mask);
13293bf832ceSFrederic Weisbecker #else
13303bf832ceSFrederic Weisbecker 	cpumask_copy(buffer->cpumask, cpu_possible_mask);
13313bf832ceSFrederic Weisbecker #endif
13327a8e76a3SSteven Rostedt 	buffer->cpus = nr_cpu_ids;
13337a8e76a3SSteven Rostedt 
13347a8e76a3SSteven Rostedt 	bsize = sizeof(void *) * nr_cpu_ids;
13357a8e76a3SSteven Rostedt 	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
13367a8e76a3SSteven Rostedt 				  GFP_KERNEL);
13377a8e76a3SSteven Rostedt 	if (!buffer->buffers)
13389e01c1b7SRusty Russell 		goto fail_free_cpumask;
13397a8e76a3SSteven Rostedt 
13407a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
13417a8e76a3SSteven Rostedt 		buffer->buffers[cpu] =
1342438ced17SVaibhav Nagarnaik 			rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
13437a8e76a3SSteven Rostedt 		if (!buffer->buffers[cpu])
13447a8e76a3SSteven Rostedt 			goto fail_free_buffers;
13457a8e76a3SSteven Rostedt 	}
13467a8e76a3SSteven Rostedt 
134759222efeSSteven Rostedt #ifdef CONFIG_HOTPLUG_CPU
1348554f786eSSteven Rostedt 	buffer->cpu_notify.notifier_call = rb_cpu_notify;
1349554f786eSSteven Rostedt 	buffer->cpu_notify.priority = 0;
1350d39ad278SSrivatsa S. Bhat 	__register_cpu_notifier(&buffer->cpu_notify);
1351d39ad278SSrivatsa S. Bhat 	cpu_notifier_register_done();
1352554f786eSSteven Rostedt #endif
1353554f786eSSteven Rostedt 
13547a8e76a3SSteven Rostedt 	mutex_init(&buffer->mutex);
13557a8e76a3SSteven Rostedt 
13567a8e76a3SSteven Rostedt 	return buffer;
13577a8e76a3SSteven Rostedt 
13587a8e76a3SSteven Rostedt  fail_free_buffers:
13597a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
13607a8e76a3SSteven Rostedt 		if (buffer->buffers[cpu])
13617a8e76a3SSteven Rostedt 			rb_free_cpu_buffer(buffer->buffers[cpu]);
13627a8e76a3SSteven Rostedt 	}
13637a8e76a3SSteven Rostedt 	kfree(buffer->buffers);
13647a8e76a3SSteven Rostedt 
13659e01c1b7SRusty Russell  fail_free_cpumask:
13669e01c1b7SRusty Russell 	free_cpumask_var(buffer->cpumask);
1367d39ad278SSrivatsa S. Bhat #ifdef CONFIG_HOTPLUG_CPU
1368d39ad278SSrivatsa S. Bhat 	cpu_notifier_register_done();
1369d39ad278SSrivatsa S. Bhat #endif
13709e01c1b7SRusty Russell 
13717a8e76a3SSteven Rostedt  fail_free_buffer:
13727a8e76a3SSteven Rostedt 	kfree(buffer);
13737a8e76a3SSteven Rostedt 	return NULL;
13747a8e76a3SSteven Rostedt }
13751f8a6a10SPeter Zijlstra EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
13767a8e76a3SSteven Rostedt 
13777a8e76a3SSteven Rostedt /**
13787a8e76a3SSteven Rostedt  * ring_buffer_free - free a ring buffer.
13797a8e76a3SSteven Rostedt  * @buffer: the buffer to free.
13807a8e76a3SSteven Rostedt  */
13817a8e76a3SSteven Rostedt void
13827a8e76a3SSteven Rostedt ring_buffer_free(struct ring_buffer *buffer)
13837a8e76a3SSteven Rostedt {
13847a8e76a3SSteven Rostedt 	int cpu;
13857a8e76a3SSteven Rostedt 
138659222efeSSteven Rostedt #ifdef CONFIG_HOTPLUG_CPU
1387d39ad278SSrivatsa S. Bhat 	cpu_notifier_register_begin();
1388d39ad278SSrivatsa S. Bhat 	__unregister_cpu_notifier(&buffer->cpu_notify);
1389554f786eSSteven Rostedt #endif
1390554f786eSSteven Rostedt 
13917a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu)
13927a8e76a3SSteven Rostedt 		rb_free_cpu_buffer(buffer->buffers[cpu]);
13937a8e76a3SSteven Rostedt 
1394d39ad278SSrivatsa S. Bhat #ifdef CONFIG_HOTPLUG_CPU
1395d39ad278SSrivatsa S. Bhat 	cpu_notifier_register_done();
1396d39ad278SSrivatsa S. Bhat #endif
1397554f786eSSteven Rostedt 
1398bd3f0221SEric Dumazet 	kfree(buffer->buffers);
13999e01c1b7SRusty Russell 	free_cpumask_var(buffer->cpumask);
14009e01c1b7SRusty Russell 
14017a8e76a3SSteven Rostedt 	kfree(buffer);
14027a8e76a3SSteven Rostedt }
1403c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_free);
14047a8e76a3SSteven Rostedt 
140537886f6aSSteven Rostedt void ring_buffer_set_clock(struct ring_buffer *buffer,
140637886f6aSSteven Rostedt 			   u64 (*clock)(void))
140737886f6aSSteven Rostedt {
140837886f6aSSteven Rostedt 	buffer->clock = clock;
140937886f6aSSteven Rostedt }
141037886f6aSSteven Rostedt 
14117a8e76a3SSteven Rostedt static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
14127a8e76a3SSteven Rostedt 
141383f40318SVaibhav Nagarnaik static inline unsigned long rb_page_entries(struct buffer_page *bpage)
14147a8e76a3SSteven Rostedt {
141583f40318SVaibhav Nagarnaik 	return local_read(&bpage->entries) & RB_WRITE_MASK;
141683f40318SVaibhav Nagarnaik }
141783f40318SVaibhav Nagarnaik 
141883f40318SVaibhav Nagarnaik static inline unsigned long rb_page_write(struct buffer_page *bpage)
141983f40318SVaibhav Nagarnaik {
142083f40318SVaibhav Nagarnaik 	return local_read(&bpage->write) & RB_WRITE_MASK;
142183f40318SVaibhav Nagarnaik }
142283f40318SVaibhav Nagarnaik 
14235040b4b7SVaibhav Nagarnaik static int
14249b94a8fbSSteven Rostedt (Red Hat) rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
142583f40318SVaibhav Nagarnaik {
142683f40318SVaibhav Nagarnaik 	struct list_head *tail_page, *to_remove, *next_page;
142783f40318SVaibhav Nagarnaik 	struct buffer_page *to_remove_page, *tmp_iter_page;
142883f40318SVaibhav Nagarnaik 	struct buffer_page *last_page, *first_page;
14299b94a8fbSSteven Rostedt (Red Hat) 	unsigned long nr_removed;
143083f40318SVaibhav Nagarnaik 	unsigned long head_bit;
143183f40318SVaibhav Nagarnaik 	int page_entries;
143283f40318SVaibhav Nagarnaik 
143383f40318SVaibhav Nagarnaik 	head_bit = 0;
14347a8e76a3SSteven Rostedt 
14355389f6faSThomas Gleixner 	raw_spin_lock_irq(&cpu_buffer->reader_lock);
143683f40318SVaibhav Nagarnaik 	atomic_inc(&cpu_buffer->record_disabled);
143783f40318SVaibhav Nagarnaik 	/*
143883f40318SVaibhav Nagarnaik 	 * We don't race with the readers since we have acquired the reader
143983f40318SVaibhav Nagarnaik 	 * lock. We also don't race with writers after disabling recording.
144083f40318SVaibhav Nagarnaik 	 * This makes it easy to figure out the first and the last page to be
144183f40318SVaibhav Nagarnaik 	 * removed from the list. We unlink all the pages in between including
144283f40318SVaibhav Nagarnaik 	 * the first and last pages. This is done in a busy loop so that we
144383f40318SVaibhav Nagarnaik 	 * lose the least number of traces.
144483f40318SVaibhav Nagarnaik 	 * The pages are freed after we restart recording and unlock readers.
144583f40318SVaibhav Nagarnaik 	 */
144683f40318SVaibhav Nagarnaik 	tail_page = &cpu_buffer->tail_page->list;
144777ae365eSSteven Rostedt 
144883f40318SVaibhav Nagarnaik 	/*
144983f40318SVaibhav Nagarnaik 	 * tail page might be on reader page, we remove the next page
145083f40318SVaibhav Nagarnaik 	 * from the ring buffer
145183f40318SVaibhav Nagarnaik 	 */
145283f40318SVaibhav Nagarnaik 	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
145383f40318SVaibhav Nagarnaik 		tail_page = rb_list_head(tail_page->next);
145483f40318SVaibhav Nagarnaik 	to_remove = tail_page;
145583f40318SVaibhav Nagarnaik 
145683f40318SVaibhav Nagarnaik 	/* start of pages to remove */
145783f40318SVaibhav Nagarnaik 	first_page = list_entry(rb_list_head(to_remove->next),
145883f40318SVaibhav Nagarnaik 				struct buffer_page, list);
145983f40318SVaibhav Nagarnaik 
146083f40318SVaibhav Nagarnaik 	for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
146183f40318SVaibhav Nagarnaik 		to_remove = rb_list_head(to_remove)->next;
146283f40318SVaibhav Nagarnaik 		head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
14637a8e76a3SSteven Rostedt 	}
14647a8e76a3SSteven Rostedt 
146583f40318SVaibhav Nagarnaik 	next_page = rb_list_head(to_remove)->next;
14667a8e76a3SSteven Rostedt 
146783f40318SVaibhav Nagarnaik 	/*
146883f40318SVaibhav Nagarnaik 	 * Now we remove all pages between tail_page and next_page.
146983f40318SVaibhav Nagarnaik 	 * Make sure that we have head_bit value preserved for the
147083f40318SVaibhav Nagarnaik 	 * next page
147183f40318SVaibhav Nagarnaik 	 */
147283f40318SVaibhav Nagarnaik 	tail_page->next = (struct list_head *)((unsigned long)next_page |
147383f40318SVaibhav Nagarnaik 						head_bit);
147483f40318SVaibhav Nagarnaik 	next_page = rb_list_head(next_page);
147583f40318SVaibhav Nagarnaik 	next_page->prev = tail_page;
147683f40318SVaibhav Nagarnaik 
147783f40318SVaibhav Nagarnaik 	/* make sure pages points to a valid page in the ring buffer */
147883f40318SVaibhav Nagarnaik 	cpu_buffer->pages = next_page;
147983f40318SVaibhav Nagarnaik 
148083f40318SVaibhav Nagarnaik 	/* update head page */
148183f40318SVaibhav Nagarnaik 	if (head_bit)
148283f40318SVaibhav Nagarnaik 		cpu_buffer->head_page = list_entry(next_page,
148383f40318SVaibhav Nagarnaik 						struct buffer_page, list);
148483f40318SVaibhav Nagarnaik 
148583f40318SVaibhav Nagarnaik 	/*
148683f40318SVaibhav Nagarnaik 	 * change read pointer to make sure any read iterators reset
148783f40318SVaibhav Nagarnaik 	 * themselves
148883f40318SVaibhav Nagarnaik 	 */
148983f40318SVaibhav Nagarnaik 	cpu_buffer->read = 0;
149083f40318SVaibhav Nagarnaik 
149183f40318SVaibhav Nagarnaik 	/* pages are removed, resume tracing and then free the pages */
149283f40318SVaibhav Nagarnaik 	atomic_dec(&cpu_buffer->record_disabled);
14935389f6faSThomas Gleixner 	raw_spin_unlock_irq(&cpu_buffer->reader_lock);
149483f40318SVaibhav Nagarnaik 
149583f40318SVaibhav Nagarnaik 	RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
149683f40318SVaibhav Nagarnaik 
149783f40318SVaibhav Nagarnaik 	/* last buffer page to remove */
149883f40318SVaibhav Nagarnaik 	last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
149983f40318SVaibhav Nagarnaik 				list);
150083f40318SVaibhav Nagarnaik 	tmp_iter_page = first_page;
150183f40318SVaibhav Nagarnaik 
150283f40318SVaibhav Nagarnaik 	do {
150383f40318SVaibhav Nagarnaik 		to_remove_page = tmp_iter_page;
150483f40318SVaibhav Nagarnaik 		rb_inc_page(cpu_buffer, &tmp_iter_page);
150583f40318SVaibhav Nagarnaik 
150683f40318SVaibhav Nagarnaik 		/* update the counters */
150783f40318SVaibhav Nagarnaik 		page_entries = rb_page_entries(to_remove_page);
150883f40318SVaibhav Nagarnaik 		if (page_entries) {
150983f40318SVaibhav Nagarnaik 			/*
151083f40318SVaibhav Nagarnaik 			 * If something was added to this page, it was full
151183f40318SVaibhav Nagarnaik 			 * since it is not the tail page. So we deduct the
151283f40318SVaibhav Nagarnaik 			 * bytes consumed in ring buffer from here.
151348fdc72fSVaibhav Nagarnaik 			 * Increment overrun to account for the lost events.
151483f40318SVaibhav Nagarnaik 			 */
151548fdc72fSVaibhav Nagarnaik 			local_add(page_entries, &cpu_buffer->overrun);
151683f40318SVaibhav Nagarnaik 			local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
151783f40318SVaibhav Nagarnaik 		}
151883f40318SVaibhav Nagarnaik 
151983f40318SVaibhav Nagarnaik 		/*
152083f40318SVaibhav Nagarnaik 		 * We have already removed references to this list item, just
152183f40318SVaibhav Nagarnaik 		 * free up the buffer_page and its page
152283f40318SVaibhav Nagarnaik 		 */
152383f40318SVaibhav Nagarnaik 		free_buffer_page(to_remove_page);
152483f40318SVaibhav Nagarnaik 		nr_removed--;
152583f40318SVaibhav Nagarnaik 
152683f40318SVaibhav Nagarnaik 	} while (to_remove_page != last_page);
152783f40318SVaibhav Nagarnaik 
152883f40318SVaibhav Nagarnaik 	RB_WARN_ON(cpu_buffer, nr_removed);
15295040b4b7SVaibhav Nagarnaik 
15305040b4b7SVaibhav Nagarnaik 	return nr_removed == 0;
15317a8e76a3SSteven Rostedt }
15327a8e76a3SSteven Rostedt 
15335040b4b7SVaibhav Nagarnaik static int
15345040b4b7SVaibhav Nagarnaik rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
15357a8e76a3SSteven Rostedt {
15365040b4b7SVaibhav Nagarnaik 	struct list_head *pages = &cpu_buffer->new_pages;
15375040b4b7SVaibhav Nagarnaik 	int retries, success;
15387a8e76a3SSteven Rostedt 
15395389f6faSThomas Gleixner 	raw_spin_lock_irq(&cpu_buffer->reader_lock);
15405040b4b7SVaibhav Nagarnaik 	/*
15415040b4b7SVaibhav Nagarnaik 	 * We are holding the reader lock, so the reader page won't be swapped
15425040b4b7SVaibhav Nagarnaik 	 * in the ring buffer. Now we are racing with the writer trying to
15435040b4b7SVaibhav Nagarnaik 	 * move head page and the tail page.
15445040b4b7SVaibhav Nagarnaik 	 * We are going to adapt the reader page update process where:
15455040b4b7SVaibhav Nagarnaik 	 * 1. We first splice the start and end of list of new pages between
15465040b4b7SVaibhav Nagarnaik 	 *    the head page and its previous page.
15475040b4b7SVaibhav Nagarnaik 	 * 2. We cmpxchg the prev_page->next to point from head page to the
15485040b4b7SVaibhav Nagarnaik 	 *    start of new pages list.
15495040b4b7SVaibhav Nagarnaik 	 * 3. Finally, we update the head->prev to the end of new list.
15505040b4b7SVaibhav Nagarnaik 	 *
15515040b4b7SVaibhav Nagarnaik 	 * We will try this process 10 times, to make sure that we don't keep
15525040b4b7SVaibhav Nagarnaik 	 * spinning.
15535040b4b7SVaibhav Nagarnaik 	 */
15545040b4b7SVaibhav Nagarnaik 	retries = 10;
15555040b4b7SVaibhav Nagarnaik 	success = 0;
15565040b4b7SVaibhav Nagarnaik 	while (retries--) {
15575040b4b7SVaibhav Nagarnaik 		struct list_head *head_page, *prev_page, *r;
15585040b4b7SVaibhav Nagarnaik 		struct list_head *last_page, *first_page;
15595040b4b7SVaibhav Nagarnaik 		struct list_head *head_page_with_bit;
156077ae365eSSteven Rostedt 
15615040b4b7SVaibhav Nagarnaik 		head_page = &rb_set_head_page(cpu_buffer)->list;
156254f7be5bSSteven Rostedt 		if (!head_page)
156354f7be5bSSteven Rostedt 			break;
15645040b4b7SVaibhav Nagarnaik 		prev_page = head_page->prev;
15655040b4b7SVaibhav Nagarnaik 
15665040b4b7SVaibhav Nagarnaik 		first_page = pages->next;
15675040b4b7SVaibhav Nagarnaik 		last_page  = pages->prev;
15685040b4b7SVaibhav Nagarnaik 
15695040b4b7SVaibhav Nagarnaik 		head_page_with_bit = (struct list_head *)
15705040b4b7SVaibhav Nagarnaik 				     ((unsigned long)head_page | RB_PAGE_HEAD);
15715040b4b7SVaibhav Nagarnaik 
15725040b4b7SVaibhav Nagarnaik 		last_page->next = head_page_with_bit;
15735040b4b7SVaibhav Nagarnaik 		first_page->prev = prev_page;
15745040b4b7SVaibhav Nagarnaik 
15755040b4b7SVaibhav Nagarnaik 		r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
15765040b4b7SVaibhav Nagarnaik 
15775040b4b7SVaibhav Nagarnaik 		if (r == head_page_with_bit) {
15785040b4b7SVaibhav Nagarnaik 			/*
15795040b4b7SVaibhav Nagarnaik 			 * yay, we replaced the page pointer to our new list,
15805040b4b7SVaibhav Nagarnaik 			 * now, we just have to update to head page's prev
15815040b4b7SVaibhav Nagarnaik 			 * pointer to point to end of list
15825040b4b7SVaibhav Nagarnaik 			 */
15835040b4b7SVaibhav Nagarnaik 			head_page->prev = last_page;
15845040b4b7SVaibhav Nagarnaik 			success = 1;
15855040b4b7SVaibhav Nagarnaik 			break;
15867a8e76a3SSteven Rostedt 		}
15875040b4b7SVaibhav Nagarnaik 	}
15887a8e76a3SSteven Rostedt 
15895040b4b7SVaibhav Nagarnaik 	if (success)
15905040b4b7SVaibhav Nagarnaik 		INIT_LIST_HEAD(pages);
15915040b4b7SVaibhav Nagarnaik 	/*
15925040b4b7SVaibhav Nagarnaik 	 * If we weren't successful in adding in new pages, warn and stop
15935040b4b7SVaibhav Nagarnaik 	 * tracing
15945040b4b7SVaibhav Nagarnaik 	 */
15955040b4b7SVaibhav Nagarnaik 	RB_WARN_ON(cpu_buffer, !success);
15965389f6faSThomas Gleixner 	raw_spin_unlock_irq(&cpu_buffer->reader_lock);
15975040b4b7SVaibhav Nagarnaik 
15985040b4b7SVaibhav Nagarnaik 	/* free pages if they weren't inserted */
15995040b4b7SVaibhav Nagarnaik 	if (!success) {
16005040b4b7SVaibhav Nagarnaik 		struct buffer_page *bpage, *tmp;
16015040b4b7SVaibhav Nagarnaik 		list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
16025040b4b7SVaibhav Nagarnaik 					 list) {
16035040b4b7SVaibhav Nagarnaik 			list_del_init(&bpage->list);
16045040b4b7SVaibhav Nagarnaik 			free_buffer_page(bpage);
16055040b4b7SVaibhav Nagarnaik 		}
16065040b4b7SVaibhav Nagarnaik 	}
16075040b4b7SVaibhav Nagarnaik 	return success;
16087a8e76a3SSteven Rostedt }
16097a8e76a3SSteven Rostedt 
161083f40318SVaibhav Nagarnaik static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1611438ced17SVaibhav Nagarnaik {
16125040b4b7SVaibhav Nagarnaik 	int success;
161383f40318SVaibhav Nagarnaik 
16145040b4b7SVaibhav Nagarnaik 	if (cpu_buffer->nr_pages_to_update > 0)
16155040b4b7SVaibhav Nagarnaik 		success = rb_insert_pages(cpu_buffer);
16165040b4b7SVaibhav Nagarnaik 	else
16175040b4b7SVaibhav Nagarnaik 		success = rb_remove_pages(cpu_buffer,
16185040b4b7SVaibhav Nagarnaik 					-cpu_buffer->nr_pages_to_update);
16195040b4b7SVaibhav Nagarnaik 
16205040b4b7SVaibhav Nagarnaik 	if (success)
1621438ced17SVaibhav Nagarnaik 		cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
162283f40318SVaibhav Nagarnaik }
162383f40318SVaibhav Nagarnaik 
162483f40318SVaibhav Nagarnaik static void update_pages_handler(struct work_struct *work)
162583f40318SVaibhav Nagarnaik {
162683f40318SVaibhav Nagarnaik 	struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
162783f40318SVaibhav Nagarnaik 			struct ring_buffer_per_cpu, update_pages_work);
162883f40318SVaibhav Nagarnaik 	rb_update_pages(cpu_buffer);
162905fdd70dSVaibhav Nagarnaik 	complete(&cpu_buffer->update_done);
1630438ced17SVaibhav Nagarnaik }
1631438ced17SVaibhav Nagarnaik 
16327a8e76a3SSteven Rostedt /**
16337a8e76a3SSteven Rostedt  * ring_buffer_resize - resize the ring buffer
16347a8e76a3SSteven Rostedt  * @buffer: the buffer to resize.
16357a8e76a3SSteven Rostedt  * @size: the new size.
1636d611851bSzhangwei(Jovi)  * @cpu_id: the cpu buffer to resize
16377a8e76a3SSteven Rostedt  *
16387a8e76a3SSteven Rostedt  * Minimum size is 2 * BUF_PAGE_SIZE.
16397a8e76a3SSteven Rostedt  *
164083f40318SVaibhav Nagarnaik  * Returns 0 on success and < 0 on failure.
16417a8e76a3SSteven Rostedt  */
1642438ced17SVaibhav Nagarnaik int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1643438ced17SVaibhav Nagarnaik 			int cpu_id)
16447a8e76a3SSteven Rostedt {
16457a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
16469b94a8fbSSteven Rostedt (Red Hat) 	unsigned long nr_pages;
164783f40318SVaibhav Nagarnaik 	int cpu, err = 0;
16487a8e76a3SSteven Rostedt 
1649ee51a1deSIngo Molnar 	/*
1650ee51a1deSIngo Molnar 	 * Always succeed at resizing a non-existent buffer:
1651ee51a1deSIngo Molnar 	 */
1652ee51a1deSIngo Molnar 	if (!buffer)
1653ee51a1deSIngo Molnar 		return size;
1654ee51a1deSIngo Molnar 
16556a31e1f1SSteven Rostedt 	/* Make sure the requested buffer exists */
16566a31e1f1SSteven Rostedt 	if (cpu_id != RING_BUFFER_ALL_CPUS &&
16576a31e1f1SSteven Rostedt 	    !cpumask_test_cpu(cpu_id, buffer->cpumask))
16586a31e1f1SSteven Rostedt 		return size;
16596a31e1f1SSteven Rostedt 
166059643d15SSteven Rostedt (Red Hat) 	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
16617a8e76a3SSteven Rostedt 
16627a8e76a3SSteven Rostedt 	/* we need a minimum of two pages */
166359643d15SSteven Rostedt (Red Hat) 	if (nr_pages < 2)
166459643d15SSteven Rostedt (Red Hat) 		nr_pages = 2;
16657a8e76a3SSteven Rostedt 
166659643d15SSteven Rostedt (Red Hat) 	size = nr_pages * BUF_PAGE_SIZE;
16677a8e76a3SSteven Rostedt 
166883f40318SVaibhav Nagarnaik 	/*
166983f40318SVaibhav Nagarnaik 	 * Don't succeed if resizing is disabled, as a reader might be
167083f40318SVaibhav Nagarnaik 	 * manipulating the ring buffer and is expecting a sane state while
167183f40318SVaibhav Nagarnaik 	 * this is true.
167283f40318SVaibhav Nagarnaik 	 */
167383f40318SVaibhav Nagarnaik 	if (atomic_read(&buffer->resize_disabled))
167483f40318SVaibhav Nagarnaik 		return -EBUSY;
167583f40318SVaibhav Nagarnaik 
167683f40318SVaibhav Nagarnaik 	/* prevent another thread from changing buffer sizes */
167783f40318SVaibhav Nagarnaik 	mutex_lock(&buffer->mutex);
167883f40318SVaibhav Nagarnaik 
1679438ced17SVaibhav Nagarnaik 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
1680438ced17SVaibhav Nagarnaik 		/* calculate the pages to update */
16817a8e76a3SSteven Rostedt 		for_each_buffer_cpu(buffer, cpu) {
16827a8e76a3SSteven Rostedt 			cpu_buffer = buffer->buffers[cpu];
1683438ced17SVaibhav Nagarnaik 
1684438ced17SVaibhav Nagarnaik 			cpu_buffer->nr_pages_to_update = nr_pages -
1685438ced17SVaibhav Nagarnaik 							cpu_buffer->nr_pages;
1686438ced17SVaibhav Nagarnaik 			/*
1687438ced17SVaibhav Nagarnaik 			 * nothing more to do for removing pages or no update
1688438ced17SVaibhav Nagarnaik 			 */
1689438ced17SVaibhav Nagarnaik 			if (cpu_buffer->nr_pages_to_update <= 0)
1690438ced17SVaibhav Nagarnaik 				continue;
1691438ced17SVaibhav Nagarnaik 			/*
1692438ced17SVaibhav Nagarnaik 			 * to add pages, make sure all new pages can be
1693438ced17SVaibhav Nagarnaik 			 * allocated without receiving ENOMEM
1694438ced17SVaibhav Nagarnaik 			 */
1695438ced17SVaibhav Nagarnaik 			INIT_LIST_HEAD(&cpu_buffer->new_pages);
1696438ced17SVaibhav Nagarnaik 			if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
169783f40318SVaibhav Nagarnaik 						&cpu_buffer->new_pages, cpu)) {
1698438ced17SVaibhav Nagarnaik 				/* not enough memory for new pages */
169983f40318SVaibhav Nagarnaik 				err = -ENOMEM;
170083f40318SVaibhav Nagarnaik 				goto out_err;
170183f40318SVaibhav Nagarnaik 			}
170283f40318SVaibhav Nagarnaik 		}
170383f40318SVaibhav Nagarnaik 
170483f40318SVaibhav Nagarnaik 		get_online_cpus();
170583f40318SVaibhav Nagarnaik 		/*
170683f40318SVaibhav Nagarnaik 		 * Fire off all the required work handlers
170705fdd70dSVaibhav Nagarnaik 		 * We can't schedule on offline CPUs, but it's not necessary
170883f40318SVaibhav Nagarnaik 		 * since we can change their buffer sizes without any race.
170983f40318SVaibhav Nagarnaik 		 */
171083f40318SVaibhav Nagarnaik 		for_each_buffer_cpu(buffer, cpu) {
171183f40318SVaibhav Nagarnaik 			cpu_buffer = buffer->buffers[cpu];
171205fdd70dSVaibhav Nagarnaik 			if (!cpu_buffer->nr_pages_to_update)
171383f40318SVaibhav Nagarnaik 				continue;
171483f40318SVaibhav Nagarnaik 
1715021c5b34SCorey Minyard 			/* Can't run something on an offline CPU. */
1716021c5b34SCorey Minyard 			if (!cpu_online(cpu)) {
1717f5eb5588SSteven Rostedt (Red Hat) 				rb_update_pages(cpu_buffer);
1718f5eb5588SSteven Rostedt (Red Hat) 				cpu_buffer->nr_pages_to_update = 0;
1719f5eb5588SSteven Rostedt (Red Hat) 			} else {
172005fdd70dSVaibhav Nagarnaik 				schedule_work_on(cpu,
172105fdd70dSVaibhav Nagarnaik 						&cpu_buffer->update_pages_work);
1722f5eb5588SSteven Rostedt (Red Hat) 			}
17237a8e76a3SSteven Rostedt 		}
1724438ced17SVaibhav Nagarnaik 
1725438ced17SVaibhav Nagarnaik 		/* wait for all the updates to complete */
1726438ced17SVaibhav Nagarnaik 		for_each_buffer_cpu(buffer, cpu) {
1727438ced17SVaibhav Nagarnaik 			cpu_buffer = buffer->buffers[cpu];
172805fdd70dSVaibhav Nagarnaik 			if (!cpu_buffer->nr_pages_to_update)
172983f40318SVaibhav Nagarnaik 				continue;
173083f40318SVaibhav Nagarnaik 
173105fdd70dSVaibhav Nagarnaik 			if (cpu_online(cpu))
173205fdd70dSVaibhav Nagarnaik 				wait_for_completion(&cpu_buffer->update_done);
173383f40318SVaibhav Nagarnaik 			cpu_buffer->nr_pages_to_update = 0;
1734438ced17SVaibhav Nagarnaik 		}
173583f40318SVaibhav Nagarnaik 
173683f40318SVaibhav Nagarnaik 		put_online_cpus();
1737438ced17SVaibhav Nagarnaik 	} else {
17388e49f418SVaibhav Nagarnaik 		/* Make sure this CPU has been intitialized */
17398e49f418SVaibhav Nagarnaik 		if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
17408e49f418SVaibhav Nagarnaik 			goto out;
17418e49f418SVaibhav Nagarnaik 
1742438ced17SVaibhav Nagarnaik 		cpu_buffer = buffer->buffers[cpu_id];
174383f40318SVaibhav Nagarnaik 
1744438ced17SVaibhav Nagarnaik 		if (nr_pages == cpu_buffer->nr_pages)
17457a8e76a3SSteven Rostedt 			goto out;
1746438ced17SVaibhav Nagarnaik 
1747438ced17SVaibhav Nagarnaik 		cpu_buffer->nr_pages_to_update = nr_pages -
1748438ced17SVaibhav Nagarnaik 						cpu_buffer->nr_pages;
1749438ced17SVaibhav Nagarnaik 
1750438ced17SVaibhav Nagarnaik 		INIT_LIST_HEAD(&cpu_buffer->new_pages);
1751438ced17SVaibhav Nagarnaik 		if (cpu_buffer->nr_pages_to_update > 0 &&
1752438ced17SVaibhav Nagarnaik 			__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
175383f40318SVaibhav Nagarnaik 					    &cpu_buffer->new_pages, cpu_id)) {
175483f40318SVaibhav Nagarnaik 			err = -ENOMEM;
175583f40318SVaibhav Nagarnaik 			goto out_err;
175683f40318SVaibhav Nagarnaik 		}
1757438ced17SVaibhav Nagarnaik 
175883f40318SVaibhav Nagarnaik 		get_online_cpus();
175983f40318SVaibhav Nagarnaik 
1760021c5b34SCorey Minyard 		/* Can't run something on an offline CPU. */
1761021c5b34SCorey Minyard 		if (!cpu_online(cpu_id))
1762f5eb5588SSteven Rostedt (Red Hat) 			rb_update_pages(cpu_buffer);
1763f5eb5588SSteven Rostedt (Red Hat) 		else {
176483f40318SVaibhav Nagarnaik 			schedule_work_on(cpu_id,
176583f40318SVaibhav Nagarnaik 					 &cpu_buffer->update_pages_work);
176605fdd70dSVaibhav Nagarnaik 			wait_for_completion(&cpu_buffer->update_done);
1767f5eb5588SSteven Rostedt (Red Hat) 		}
176883f40318SVaibhav Nagarnaik 
176983f40318SVaibhav Nagarnaik 		cpu_buffer->nr_pages_to_update = 0;
177005fdd70dSVaibhav Nagarnaik 		put_online_cpus();
17717a8e76a3SSteven Rostedt 	}
17727a8e76a3SSteven Rostedt 
17737a8e76a3SSteven Rostedt  out:
1774659f451fSSteven Rostedt 	/*
1775659f451fSSteven Rostedt 	 * The ring buffer resize can happen with the ring buffer
1776659f451fSSteven Rostedt 	 * enabled, so that the update disturbs the tracing as little
1777659f451fSSteven Rostedt 	 * as possible. But if the buffer is disabled, we do not need
1778659f451fSSteven Rostedt 	 * to worry about that, and we can take the time to verify
1779659f451fSSteven Rostedt 	 * that the buffer is not corrupt.
1780659f451fSSteven Rostedt 	 */
1781659f451fSSteven Rostedt 	if (atomic_read(&buffer->record_disabled)) {
1782659f451fSSteven Rostedt 		atomic_inc(&buffer->record_disabled);
1783659f451fSSteven Rostedt 		/*
1784659f451fSSteven Rostedt 		 * Even though the buffer was disabled, we must make sure
1785659f451fSSteven Rostedt 		 * that it is truly disabled before calling rb_check_pages.
1786659f451fSSteven Rostedt 		 * There could have been a race between checking
1787659f451fSSteven Rostedt 		 * record_disable and incrementing it.
1788659f451fSSteven Rostedt 		 */
1789659f451fSSteven Rostedt 		synchronize_sched();
1790659f451fSSteven Rostedt 		for_each_buffer_cpu(buffer, cpu) {
1791659f451fSSteven Rostedt 			cpu_buffer = buffer->buffers[cpu];
1792659f451fSSteven Rostedt 			rb_check_pages(cpu_buffer);
1793659f451fSSteven Rostedt 		}
1794659f451fSSteven Rostedt 		atomic_dec(&buffer->record_disabled);
1795659f451fSSteven Rostedt 	}
1796659f451fSSteven Rostedt 
17977a8e76a3SSteven Rostedt 	mutex_unlock(&buffer->mutex);
17987a8e76a3SSteven Rostedt 	return size;
17997a8e76a3SSteven Rostedt 
180083f40318SVaibhav Nagarnaik  out_err:
1801438ced17SVaibhav Nagarnaik 	for_each_buffer_cpu(buffer, cpu) {
1802438ced17SVaibhav Nagarnaik 		struct buffer_page *bpage, *tmp;
180383f40318SVaibhav Nagarnaik 
1804438ced17SVaibhav Nagarnaik 		cpu_buffer = buffer->buffers[cpu];
1805438ced17SVaibhav Nagarnaik 		cpu_buffer->nr_pages_to_update = 0;
180683f40318SVaibhav Nagarnaik 
1807438ced17SVaibhav Nagarnaik 		if (list_empty(&cpu_buffer->new_pages))
1808438ced17SVaibhav Nagarnaik 			continue;
180983f40318SVaibhav Nagarnaik 
1810438ced17SVaibhav Nagarnaik 		list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1811438ced17SVaibhav Nagarnaik 					list) {
1812044fa782SSteven Rostedt 			list_del_init(&bpage->list);
1813044fa782SSteven Rostedt 			free_buffer_page(bpage);
18147a8e76a3SSteven Rostedt 		}
1815438ced17SVaibhav Nagarnaik 	}
1816641d2f63SVegard Nossum 	mutex_unlock(&buffer->mutex);
181783f40318SVaibhav Nagarnaik 	return err;
18187a8e76a3SSteven Rostedt }
1819c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_resize);
18207a8e76a3SSteven Rostedt 
1821750912faSDavid Sharp void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1822750912faSDavid Sharp {
1823750912faSDavid Sharp 	mutex_lock(&buffer->mutex);
1824750912faSDavid Sharp 	if (val)
1825750912faSDavid Sharp 		buffer->flags |= RB_FL_OVERWRITE;
1826750912faSDavid Sharp 	else
1827750912faSDavid Sharp 		buffer->flags &= ~RB_FL_OVERWRITE;
1828750912faSDavid Sharp 	mutex_unlock(&buffer->mutex);
1829750912faSDavid Sharp }
1830750912faSDavid Sharp EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1831750912faSDavid Sharp 
18322289d567SSteven Rostedt (Red Hat) static __always_inline void *
1833044fa782SSteven Rostedt __rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
18348789a9e7SSteven Rostedt {
1835044fa782SSteven Rostedt 	return bpage->data + index;
18368789a9e7SSteven Rostedt }
18378789a9e7SSteven Rostedt 
18382289d567SSteven Rostedt (Red Hat) static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
18397a8e76a3SSteven Rostedt {
1840044fa782SSteven Rostedt 	return bpage->page->data + index;
18417a8e76a3SSteven Rostedt }
18427a8e76a3SSteven Rostedt 
18432289d567SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event *
1844d769041fSSteven Rostedt rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
18457a8e76a3SSteven Rostedt {
18466f807acdSSteven Rostedt 	return __rb_page_index(cpu_buffer->reader_page,
18476f807acdSSteven Rostedt 			       cpu_buffer->reader_page->read);
18486f807acdSSteven Rostedt }
18496f807acdSSteven Rostedt 
18502289d567SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event *
18517a8e76a3SSteven Rostedt rb_iter_head_event(struct ring_buffer_iter *iter)
18527a8e76a3SSteven Rostedt {
18536f807acdSSteven Rostedt 	return __rb_page_index(iter->head_page, iter->head);
18547a8e76a3SSteven Rostedt }
18557a8e76a3SSteven Rostedt 
18562289d567SSteven Rostedt (Red Hat) static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
1857bf41a158SSteven Rostedt {
1858abc9b56dSSteven Rostedt 	return local_read(&bpage->page->commit);
1859bf41a158SSteven Rostedt }
1860bf41a158SSteven Rostedt 
186125985edcSLucas De Marchi /* Size is determined by what has been committed */
18622289d567SSteven Rostedt (Red Hat) static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
1863bf41a158SSteven Rostedt {
1864bf41a158SSteven Rostedt 	return rb_page_commit(bpage);
1865bf41a158SSteven Rostedt }
1866bf41a158SSteven Rostedt 
18672289d567SSteven Rostedt (Red Hat) static __always_inline unsigned
1868bf41a158SSteven Rostedt rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1869bf41a158SSteven Rostedt {
1870bf41a158SSteven Rostedt 	return rb_page_commit(cpu_buffer->commit_page);
1871bf41a158SSteven Rostedt }
1872bf41a158SSteven Rostedt 
18732289d567SSteven Rostedt (Red Hat) static __always_inline unsigned
1874bf41a158SSteven Rostedt rb_event_index(struct ring_buffer_event *event)
18757a8e76a3SSteven Rostedt {
1876bf41a158SSteven Rostedt 	unsigned long addr = (unsigned long)event;
1877bf41a158SSteven Rostedt 
187822f470f8SSteven Rostedt 	return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
18797a8e76a3SSteven Rostedt }
18807a8e76a3SSteven Rostedt 
188134a148bfSAndrew Morton static void rb_inc_iter(struct ring_buffer_iter *iter)
1882d769041fSSteven Rostedt {
1883d769041fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1884d769041fSSteven Rostedt 
1885d769041fSSteven Rostedt 	/*
1886d769041fSSteven Rostedt 	 * The iterator could be on the reader page (it starts there).
1887d769041fSSteven Rostedt 	 * But the head could have moved, since the reader was
1888d769041fSSteven Rostedt 	 * found. Check for this case and assign the iterator
1889d769041fSSteven Rostedt 	 * to the head page instead of next.
1890d769041fSSteven Rostedt 	 */
1891d769041fSSteven Rostedt 	if (iter->head_page == cpu_buffer->reader_page)
189277ae365eSSteven Rostedt 		iter->head_page = rb_set_head_page(cpu_buffer);
1893d769041fSSteven Rostedt 	else
1894d769041fSSteven Rostedt 		rb_inc_page(cpu_buffer, &iter->head_page);
1895d769041fSSteven Rostedt 
1896abc9b56dSSteven Rostedt 	iter->read_stamp = iter->head_page->page->time_stamp;
18977a8e76a3SSteven Rostedt 	iter->head = 0;
18987a8e76a3SSteven Rostedt }
18997a8e76a3SSteven Rostedt 
190077ae365eSSteven Rostedt /*
190177ae365eSSteven Rostedt  * rb_handle_head_page - writer hit the head page
190277ae365eSSteven Rostedt  *
190377ae365eSSteven Rostedt  * Returns: +1 to retry page
190477ae365eSSteven Rostedt  *           0 to continue
190577ae365eSSteven Rostedt  *          -1 on error
190677ae365eSSteven Rostedt  */
190777ae365eSSteven Rostedt static int
190877ae365eSSteven Rostedt rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
190977ae365eSSteven Rostedt 		    struct buffer_page *tail_page,
191077ae365eSSteven Rostedt 		    struct buffer_page *next_page)
191177ae365eSSteven Rostedt {
191277ae365eSSteven Rostedt 	struct buffer_page *new_head;
191377ae365eSSteven Rostedt 	int entries;
191477ae365eSSteven Rostedt 	int type;
191577ae365eSSteven Rostedt 	int ret;
191677ae365eSSteven Rostedt 
191777ae365eSSteven Rostedt 	entries = rb_page_entries(next_page);
191877ae365eSSteven Rostedt 
191977ae365eSSteven Rostedt 	/*
192077ae365eSSteven Rostedt 	 * The hard part is here. We need to move the head
192177ae365eSSteven Rostedt 	 * forward, and protect against both readers on
192277ae365eSSteven Rostedt 	 * other CPUs and writers coming in via interrupts.
192377ae365eSSteven Rostedt 	 */
192477ae365eSSteven Rostedt 	type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
192577ae365eSSteven Rostedt 				       RB_PAGE_HEAD);
192677ae365eSSteven Rostedt 
192777ae365eSSteven Rostedt 	/*
192877ae365eSSteven Rostedt 	 * type can be one of four:
192977ae365eSSteven Rostedt 	 *  NORMAL - an interrupt already moved it for us
193077ae365eSSteven Rostedt 	 *  HEAD   - we are the first to get here.
193177ae365eSSteven Rostedt 	 *  UPDATE - we are the interrupt interrupting
193277ae365eSSteven Rostedt 	 *           a current move.
193377ae365eSSteven Rostedt 	 *  MOVED  - a reader on another CPU moved the next
193477ae365eSSteven Rostedt 	 *           pointer to its reader page. Give up
193577ae365eSSteven Rostedt 	 *           and try again.
193677ae365eSSteven Rostedt 	 */
193777ae365eSSteven Rostedt 
193877ae365eSSteven Rostedt 	switch (type) {
193977ae365eSSteven Rostedt 	case RB_PAGE_HEAD:
194077ae365eSSteven Rostedt 		/*
194177ae365eSSteven Rostedt 		 * We changed the head to UPDATE, thus
194277ae365eSSteven Rostedt 		 * it is our responsibility to update
194377ae365eSSteven Rostedt 		 * the counters.
194477ae365eSSteven Rostedt 		 */
194577ae365eSSteven Rostedt 		local_add(entries, &cpu_buffer->overrun);
1946c64e148aSVaibhav Nagarnaik 		local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
194777ae365eSSteven Rostedt 
194877ae365eSSteven Rostedt 		/*
194977ae365eSSteven Rostedt 		 * The entries will be zeroed out when we move the
195077ae365eSSteven Rostedt 		 * tail page.
195177ae365eSSteven Rostedt 		 */
195277ae365eSSteven Rostedt 
195377ae365eSSteven Rostedt 		/* still more to do */
195477ae365eSSteven Rostedt 		break;
195577ae365eSSteven Rostedt 
195677ae365eSSteven Rostedt 	case RB_PAGE_UPDATE:
195777ae365eSSteven Rostedt 		/*
195877ae365eSSteven Rostedt 		 * This is an interrupt that interrupt the
195977ae365eSSteven Rostedt 		 * previous update. Still more to do.
196077ae365eSSteven Rostedt 		 */
196177ae365eSSteven Rostedt 		break;
196277ae365eSSteven Rostedt 	case RB_PAGE_NORMAL:
196377ae365eSSteven Rostedt 		/*
196477ae365eSSteven Rostedt 		 * An interrupt came in before the update
196577ae365eSSteven Rostedt 		 * and processed this for us.
196677ae365eSSteven Rostedt 		 * Nothing left to do.
196777ae365eSSteven Rostedt 		 */
196877ae365eSSteven Rostedt 		return 1;
196977ae365eSSteven Rostedt 	case RB_PAGE_MOVED:
197077ae365eSSteven Rostedt 		/*
197177ae365eSSteven Rostedt 		 * The reader is on another CPU and just did
197277ae365eSSteven Rostedt 		 * a swap with our next_page.
197377ae365eSSteven Rostedt 		 * Try again.
197477ae365eSSteven Rostedt 		 */
197577ae365eSSteven Rostedt 		return 1;
197677ae365eSSteven Rostedt 	default:
197777ae365eSSteven Rostedt 		RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
197877ae365eSSteven Rostedt 		return -1;
197977ae365eSSteven Rostedt 	}
198077ae365eSSteven Rostedt 
198177ae365eSSteven Rostedt 	/*
198277ae365eSSteven Rostedt 	 * Now that we are here, the old head pointer is
198377ae365eSSteven Rostedt 	 * set to UPDATE. This will keep the reader from
198477ae365eSSteven Rostedt 	 * swapping the head page with the reader page.
198577ae365eSSteven Rostedt 	 * The reader (on another CPU) will spin till
198677ae365eSSteven Rostedt 	 * we are finished.
198777ae365eSSteven Rostedt 	 *
198877ae365eSSteven Rostedt 	 * We just need to protect against interrupts
198977ae365eSSteven Rostedt 	 * doing the job. We will set the next pointer
199077ae365eSSteven Rostedt 	 * to HEAD. After that, we set the old pointer
199177ae365eSSteven Rostedt 	 * to NORMAL, but only if it was HEAD before.
199277ae365eSSteven Rostedt 	 * otherwise we are an interrupt, and only
199377ae365eSSteven Rostedt 	 * want the outer most commit to reset it.
199477ae365eSSteven Rostedt 	 */
199577ae365eSSteven Rostedt 	new_head = next_page;
199677ae365eSSteven Rostedt 	rb_inc_page(cpu_buffer, &new_head);
199777ae365eSSteven Rostedt 
199877ae365eSSteven Rostedt 	ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
199977ae365eSSteven Rostedt 				    RB_PAGE_NORMAL);
200077ae365eSSteven Rostedt 
200177ae365eSSteven Rostedt 	/*
200277ae365eSSteven Rostedt 	 * Valid returns are:
200377ae365eSSteven Rostedt 	 *  HEAD   - an interrupt came in and already set it.
200477ae365eSSteven Rostedt 	 *  NORMAL - One of two things:
200577ae365eSSteven Rostedt 	 *            1) We really set it.
200677ae365eSSteven Rostedt 	 *            2) A bunch of interrupts came in and moved
200777ae365eSSteven Rostedt 	 *               the page forward again.
200877ae365eSSteven Rostedt 	 */
200977ae365eSSteven Rostedt 	switch (ret) {
201077ae365eSSteven Rostedt 	case RB_PAGE_HEAD:
201177ae365eSSteven Rostedt 	case RB_PAGE_NORMAL:
201277ae365eSSteven Rostedt 		/* OK */
201377ae365eSSteven Rostedt 		break;
201477ae365eSSteven Rostedt 	default:
201577ae365eSSteven Rostedt 		RB_WARN_ON(cpu_buffer, 1);
201677ae365eSSteven Rostedt 		return -1;
201777ae365eSSteven Rostedt 	}
201877ae365eSSteven Rostedt 
201977ae365eSSteven Rostedt 	/*
202077ae365eSSteven Rostedt 	 * It is possible that an interrupt came in,
202177ae365eSSteven Rostedt 	 * set the head up, then more interrupts came in
202277ae365eSSteven Rostedt 	 * and moved it again. When we get back here,
202377ae365eSSteven Rostedt 	 * the page would have been set to NORMAL but we
202477ae365eSSteven Rostedt 	 * just set it back to HEAD.
202577ae365eSSteven Rostedt 	 *
202677ae365eSSteven Rostedt 	 * How do you detect this? Well, if that happened
202777ae365eSSteven Rostedt 	 * the tail page would have moved.
202877ae365eSSteven Rostedt 	 */
202977ae365eSSteven Rostedt 	if (ret == RB_PAGE_NORMAL) {
20308573636eSSteven Rostedt (Red Hat) 		struct buffer_page *buffer_tail_page;
20318573636eSSteven Rostedt (Red Hat) 
20328573636eSSteven Rostedt (Red Hat) 		buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
203377ae365eSSteven Rostedt 		/*
203477ae365eSSteven Rostedt 		 * If the tail had moved passed next, then we need
203577ae365eSSteven Rostedt 		 * to reset the pointer.
203677ae365eSSteven Rostedt 		 */
20378573636eSSteven Rostedt (Red Hat) 		if (buffer_tail_page != tail_page &&
20388573636eSSteven Rostedt (Red Hat) 		    buffer_tail_page != next_page)
203977ae365eSSteven Rostedt 			rb_head_page_set_normal(cpu_buffer, new_head,
204077ae365eSSteven Rostedt 						next_page,
204177ae365eSSteven Rostedt 						RB_PAGE_HEAD);
204277ae365eSSteven Rostedt 	}
204377ae365eSSteven Rostedt 
204477ae365eSSteven Rostedt 	/*
204577ae365eSSteven Rostedt 	 * If this was the outer most commit (the one that
204677ae365eSSteven Rostedt 	 * changed the original pointer from HEAD to UPDATE),
204777ae365eSSteven Rostedt 	 * then it is up to us to reset it to NORMAL.
204877ae365eSSteven Rostedt 	 */
204977ae365eSSteven Rostedt 	if (type == RB_PAGE_HEAD) {
205077ae365eSSteven Rostedt 		ret = rb_head_page_set_normal(cpu_buffer, next_page,
205177ae365eSSteven Rostedt 					      tail_page,
205277ae365eSSteven Rostedt 					      RB_PAGE_UPDATE);
205377ae365eSSteven Rostedt 		if (RB_WARN_ON(cpu_buffer,
205477ae365eSSteven Rostedt 			       ret != RB_PAGE_UPDATE))
205577ae365eSSteven Rostedt 			return -1;
205677ae365eSSteven Rostedt 	}
205777ae365eSSteven Rostedt 
205877ae365eSSteven Rostedt 	return 0;
205977ae365eSSteven Rostedt }
206077ae365eSSteven Rostedt 
2061c7b09308SSteven Rostedt static inline void
2062c7b09308SSteven Rostedt rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2063fcc742eaSSteven Rostedt (Red Hat) 	      unsigned long tail, struct rb_event_info *info)
2064c7b09308SSteven Rostedt {
2065fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page *tail_page = info->tail_page;
2066c7b09308SSteven Rostedt 	struct ring_buffer_event *event;
2067fcc742eaSSteven Rostedt (Red Hat) 	unsigned long length = info->length;
2068c7b09308SSteven Rostedt 
2069c7b09308SSteven Rostedt 	/*
2070c7b09308SSteven Rostedt 	 * Only the event that crossed the page boundary
2071c7b09308SSteven Rostedt 	 * must fill the old tail_page with padding.
2072c7b09308SSteven Rostedt 	 */
2073c7b09308SSteven Rostedt 	if (tail >= BUF_PAGE_SIZE) {
2074b3230c8bSSteven Rostedt 		/*
2075b3230c8bSSteven Rostedt 		 * If the page was filled, then we still need
2076b3230c8bSSteven Rostedt 		 * to update the real_end. Reset it to zero
2077b3230c8bSSteven Rostedt 		 * and the reader will ignore it.
2078b3230c8bSSteven Rostedt 		 */
2079b3230c8bSSteven Rostedt 		if (tail == BUF_PAGE_SIZE)
2080b3230c8bSSteven Rostedt 			tail_page->real_end = 0;
2081b3230c8bSSteven Rostedt 
2082c7b09308SSteven Rostedt 		local_sub(length, &tail_page->write);
2083c7b09308SSteven Rostedt 		return;
2084c7b09308SSteven Rostedt 	}
2085c7b09308SSteven Rostedt 
2086c7b09308SSteven Rostedt 	event = __rb_page_index(tail_page, tail);
2087b0b7065bSLinus Torvalds 	kmemcheck_annotate_bitfield(event, bitfield);
2088c7b09308SSteven Rostedt 
2089c64e148aSVaibhav Nagarnaik 	/* account for padding bytes */
2090c64e148aSVaibhav Nagarnaik 	local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2091c64e148aSVaibhav Nagarnaik 
2092c7b09308SSteven Rostedt 	/*
2093ff0ff84aSSteven Rostedt 	 * Save the original length to the meta data.
2094ff0ff84aSSteven Rostedt 	 * This will be used by the reader to add lost event
2095ff0ff84aSSteven Rostedt 	 * counter.
2096ff0ff84aSSteven Rostedt 	 */
2097ff0ff84aSSteven Rostedt 	tail_page->real_end = tail;
2098ff0ff84aSSteven Rostedt 
2099ff0ff84aSSteven Rostedt 	/*
2100c7b09308SSteven Rostedt 	 * If this event is bigger than the minimum size, then
2101c7b09308SSteven Rostedt 	 * we need to be careful that we don't subtract the
2102c7b09308SSteven Rostedt 	 * write counter enough to allow another writer to slip
2103c7b09308SSteven Rostedt 	 * in on this page.
2104c7b09308SSteven Rostedt 	 * We put in a discarded commit instead, to make sure
2105c7b09308SSteven Rostedt 	 * that this space is not used again.
2106c7b09308SSteven Rostedt 	 *
2107c7b09308SSteven Rostedt 	 * If we are less than the minimum size, we don't need to
2108c7b09308SSteven Rostedt 	 * worry about it.
2109c7b09308SSteven Rostedt 	 */
2110c7b09308SSteven Rostedt 	if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2111c7b09308SSteven Rostedt 		/* No room for any events */
2112c7b09308SSteven Rostedt 
2113c7b09308SSteven Rostedt 		/* Mark the rest of the page with padding */
2114c7b09308SSteven Rostedt 		rb_event_set_padding(event);
2115c7b09308SSteven Rostedt 
2116c7b09308SSteven Rostedt 		/* Set the write back to the previous setting */
2117c7b09308SSteven Rostedt 		local_sub(length, &tail_page->write);
2118c7b09308SSteven Rostedt 		return;
2119c7b09308SSteven Rostedt 	}
2120c7b09308SSteven Rostedt 
2121c7b09308SSteven Rostedt 	/* Put in a discarded event */
2122c7b09308SSteven Rostedt 	event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2123c7b09308SSteven Rostedt 	event->type_len = RINGBUF_TYPE_PADDING;
2124c7b09308SSteven Rostedt 	/* time delta must be non zero */
2125c7b09308SSteven Rostedt 	event->time_delta = 1;
2126c7b09308SSteven Rostedt 
2127c7b09308SSteven Rostedt 	/* Set write to end of buffer */
2128c7b09308SSteven Rostedt 	length = (tail + length) - BUF_PAGE_SIZE;
2129c7b09308SSteven Rostedt 	local_sub(length, &tail_page->write);
2130c7b09308SSteven Rostedt }
21316634ff26SSteven Rostedt 
21324239c38fSSteven Rostedt (Red Hat) static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
21334239c38fSSteven Rostedt (Red Hat) 
2134747e94aeSSteven Rostedt /*
2135747e94aeSSteven Rostedt  * This is the slow path, force gcc not to inline it.
2136747e94aeSSteven Rostedt  */
2137747e94aeSSteven Rostedt static noinline struct ring_buffer_event *
21386634ff26SSteven Rostedt rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2139fcc742eaSSteven Rostedt (Red Hat) 	     unsigned long tail, struct rb_event_info *info)
21407a8e76a3SSteven Rostedt {
2141fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page *tail_page = info->tail_page;
21425a50e33cSSteven Rostedt 	struct buffer_page *commit_page = cpu_buffer->commit_page;
21437a8e76a3SSteven Rostedt 	struct ring_buffer *buffer = cpu_buffer->buffer;
214477ae365eSSteven Rostedt 	struct buffer_page *next_page;
214577ae365eSSteven Rostedt 	int ret;
2146aa20ae84SSteven Rostedt 
2147aa20ae84SSteven Rostedt 	next_page = tail_page;
21487a8e76a3SSteven Rostedt 
21497a8e76a3SSteven Rostedt 	rb_inc_page(cpu_buffer, &next_page);
21507a8e76a3SSteven Rostedt 
2151bf41a158SSteven Rostedt 	/*
2152bf41a158SSteven Rostedt 	 * If for some reason, we had an interrupt storm that made
2153bf41a158SSteven Rostedt 	 * it all the way around the buffer, bail, and warn
2154bf41a158SSteven Rostedt 	 * about it.
2155bf41a158SSteven Rostedt 	 */
215698db8df7SSteven Rostedt 	if (unlikely(next_page == commit_page)) {
215777ae365eSSteven Rostedt 		local_inc(&cpu_buffer->commit_overrun);
215845141d46SSteven Rostedt 		goto out_reset;
2159bf41a158SSteven Rostedt 	}
2160d769041fSSteven Rostedt 
2161bf41a158SSteven Rostedt 	/*
216277ae365eSSteven Rostedt 	 * This is where the fun begins!
216377ae365eSSteven Rostedt 	 *
216477ae365eSSteven Rostedt 	 * We are fighting against races between a reader that
216577ae365eSSteven Rostedt 	 * could be on another CPU trying to swap its reader
216677ae365eSSteven Rostedt 	 * page with the buffer head.
216777ae365eSSteven Rostedt 	 *
216877ae365eSSteven Rostedt 	 * We are also fighting against interrupts coming in and
216977ae365eSSteven Rostedt 	 * moving the head or tail on us as well.
217077ae365eSSteven Rostedt 	 *
217177ae365eSSteven Rostedt 	 * If the next page is the head page then we have filled
217277ae365eSSteven Rostedt 	 * the buffer, unless the commit page is still on the
217377ae365eSSteven Rostedt 	 * reader page.
2174bf41a158SSteven Rostedt 	 */
217577ae365eSSteven Rostedt 	if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2176bf41a158SSteven Rostedt 
217777ae365eSSteven Rostedt 		/*
217877ae365eSSteven Rostedt 		 * If the commit is not on the reader page, then
217977ae365eSSteven Rostedt 		 * move the header page.
218077ae365eSSteven Rostedt 		 */
218177ae365eSSteven Rostedt 		if (!rb_is_reader_page(cpu_buffer->commit_page)) {
218277ae365eSSteven Rostedt 			/*
218377ae365eSSteven Rostedt 			 * If we are not in overwrite mode,
218477ae365eSSteven Rostedt 			 * this is easy, just stop here.
218577ae365eSSteven Rostedt 			 */
2186884bfe89SSlava Pestov 			if (!(buffer->flags & RB_FL_OVERWRITE)) {
2187884bfe89SSlava Pestov 				local_inc(&cpu_buffer->dropped_events);
218877ae365eSSteven Rostedt 				goto out_reset;
2189884bfe89SSlava Pestov 			}
219077ae365eSSteven Rostedt 
219177ae365eSSteven Rostedt 			ret = rb_handle_head_page(cpu_buffer,
219277ae365eSSteven Rostedt 						  tail_page,
219377ae365eSSteven Rostedt 						  next_page);
219477ae365eSSteven Rostedt 			if (ret < 0)
219577ae365eSSteven Rostedt 				goto out_reset;
219677ae365eSSteven Rostedt 			if (ret)
219777ae365eSSteven Rostedt 				goto out_again;
219877ae365eSSteven Rostedt 		} else {
219977ae365eSSteven Rostedt 			/*
220077ae365eSSteven Rostedt 			 * We need to be careful here too. The
220177ae365eSSteven Rostedt 			 * commit page could still be on the reader
220277ae365eSSteven Rostedt 			 * page. We could have a small buffer, and
220377ae365eSSteven Rostedt 			 * have filled up the buffer with events
220477ae365eSSteven Rostedt 			 * from interrupts and such, and wrapped.
220577ae365eSSteven Rostedt 			 *
220677ae365eSSteven Rostedt 			 * Note, if the tail page is also the on the
220777ae365eSSteven Rostedt 			 * reader_page, we let it move out.
220877ae365eSSteven Rostedt 			 */
220977ae365eSSteven Rostedt 			if (unlikely((cpu_buffer->commit_page !=
221077ae365eSSteven Rostedt 				      cpu_buffer->tail_page) &&
221177ae365eSSteven Rostedt 				     (cpu_buffer->commit_page ==
221277ae365eSSteven Rostedt 				      cpu_buffer->reader_page))) {
221377ae365eSSteven Rostedt 				local_inc(&cpu_buffer->commit_overrun);
221477ae365eSSteven Rostedt 				goto out_reset;
221577ae365eSSteven Rostedt 			}
221677ae365eSSteven Rostedt 		}
2217bf41a158SSteven Rostedt 	}
2218bf41a158SSteven Rostedt 
221970004986SSteven Rostedt (Red Hat) 	rb_tail_page_update(cpu_buffer, tail_page, next_page);
22207a8e76a3SSteven Rostedt 
222177ae365eSSteven Rostedt  out_again:
222277ae365eSSteven Rostedt 
2223fcc742eaSSteven Rostedt (Red Hat) 	rb_reset_tail(cpu_buffer, tail, info);
2224bf41a158SSteven Rostedt 
22254239c38fSSteven Rostedt (Red Hat) 	/* Commit what we have for now. */
22264239c38fSSteven Rostedt (Red Hat) 	rb_end_commit(cpu_buffer);
22274239c38fSSteven Rostedt (Red Hat) 	/* rb_end_commit() decs committing */
22284239c38fSSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->committing);
22294239c38fSSteven Rostedt (Red Hat) 
2230bf41a158SSteven Rostedt 	/* fail and let the caller try again */
2231bf41a158SSteven Rostedt 	return ERR_PTR(-EAGAIN);
2232bf41a158SSteven Rostedt 
223345141d46SSteven Rostedt  out_reset:
22346f3b3440SLai Jiangshan 	/* reset write */
2235fcc742eaSSteven Rostedt (Red Hat) 	rb_reset_tail(cpu_buffer, tail, info);
22366f3b3440SLai Jiangshan 
2237bf41a158SSteven Rostedt 	return NULL;
22387a8e76a3SSteven Rostedt }
22397a8e76a3SSteven Rostedt 
2240d90fd774SSteven Rostedt (Red Hat) /* Slow path, do not inline */
2241d90fd774SSteven Rostedt (Red Hat) static noinline struct ring_buffer_event *
2242d90fd774SSteven Rostedt (Red Hat) rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
2243d90fd774SSteven Rostedt (Red Hat) {
2244d90fd774SSteven Rostedt (Red Hat) 	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2245d90fd774SSteven Rostedt (Red Hat) 
2246d90fd774SSteven Rostedt (Red Hat) 	/* Not the first event on the page? */
2247d90fd774SSteven Rostedt (Red Hat) 	if (rb_event_index(event)) {
2248d90fd774SSteven Rostedt (Red Hat) 		event->time_delta = delta & TS_MASK;
2249d90fd774SSteven Rostedt (Red Hat) 		event->array[0] = delta >> TS_SHIFT;
2250d90fd774SSteven Rostedt (Red Hat) 	} else {
2251d90fd774SSteven Rostedt (Red Hat) 		/* nope, just zero it */
2252d90fd774SSteven Rostedt (Red Hat) 		event->time_delta = 0;
2253d90fd774SSteven Rostedt (Red Hat) 		event->array[0] = 0;
2254d90fd774SSteven Rostedt (Red Hat) 	}
2255d90fd774SSteven Rostedt (Red Hat) 
2256d90fd774SSteven Rostedt (Red Hat) 	return skip_time_extend(event);
2257d90fd774SSteven Rostedt (Red Hat) }
2258d90fd774SSteven Rostedt (Red Hat) 
2259cdb2a0a9SYaowei Bai static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2260b7dc42fdSSteven Rostedt (Red Hat) 				     struct ring_buffer_event *event);
2261b7dc42fdSSteven Rostedt (Red Hat) 
2262d90fd774SSteven Rostedt (Red Hat) /**
2263d90fd774SSteven Rostedt (Red Hat)  * rb_update_event - update event type and data
2264d90fd774SSteven Rostedt (Red Hat)  * @event: the event to update
2265d90fd774SSteven Rostedt (Red Hat)  * @type: the type of event
2266d90fd774SSteven Rostedt (Red Hat)  * @length: the size of the event field in the ring buffer
2267d90fd774SSteven Rostedt (Red Hat)  *
2268d90fd774SSteven Rostedt (Red Hat)  * Update the type and data fields of the event. The length
2269d90fd774SSteven Rostedt (Red Hat)  * is the actual size that is written to the ring buffer,
2270d90fd774SSteven Rostedt (Red Hat)  * and with this, we can determine what to place into the
2271d90fd774SSteven Rostedt (Red Hat)  * data field.
2272d90fd774SSteven Rostedt (Red Hat)  */
2273b7dc42fdSSteven Rostedt (Red Hat) static void
2274d90fd774SSteven Rostedt (Red Hat) rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2275d90fd774SSteven Rostedt (Red Hat) 		struct ring_buffer_event *event,
2276d90fd774SSteven Rostedt (Red Hat) 		struct rb_event_info *info)
2277d90fd774SSteven Rostedt (Red Hat) {
2278d90fd774SSteven Rostedt (Red Hat) 	unsigned length = info->length;
2279d90fd774SSteven Rostedt (Red Hat) 	u64 delta = info->delta;
2280d90fd774SSteven Rostedt (Red Hat) 
2281b7dc42fdSSteven Rostedt (Red Hat) 	/* Only a commit updates the timestamp */
2282b7dc42fdSSteven Rostedt (Red Hat) 	if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2283b7dc42fdSSteven Rostedt (Red Hat) 		delta = 0;
2284b7dc42fdSSteven Rostedt (Red Hat) 
2285d90fd774SSteven Rostedt (Red Hat) 	/*
2286d90fd774SSteven Rostedt (Red Hat) 	 * If we need to add a timestamp, then we
2287d90fd774SSteven Rostedt (Red Hat) 	 * add it to the start of the resevered space.
2288d90fd774SSteven Rostedt (Red Hat) 	 */
2289d90fd774SSteven Rostedt (Red Hat) 	if (unlikely(info->add_timestamp)) {
2290d90fd774SSteven Rostedt (Red Hat) 		event = rb_add_time_stamp(event, delta);
2291d90fd774SSteven Rostedt (Red Hat) 		length -= RB_LEN_TIME_EXTEND;
2292d90fd774SSteven Rostedt (Red Hat) 		delta = 0;
2293d90fd774SSteven Rostedt (Red Hat) 	}
2294d90fd774SSteven Rostedt (Red Hat) 
2295d90fd774SSteven Rostedt (Red Hat) 	event->time_delta = delta;
2296d90fd774SSteven Rostedt (Red Hat) 	length -= RB_EVNT_HDR_SIZE;
2297d90fd774SSteven Rostedt (Red Hat) 	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2298d90fd774SSteven Rostedt (Red Hat) 		event->type_len = 0;
2299d90fd774SSteven Rostedt (Red Hat) 		event->array[0] = length;
2300d90fd774SSteven Rostedt (Red Hat) 	} else
2301d90fd774SSteven Rostedt (Red Hat) 		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2302d90fd774SSteven Rostedt (Red Hat) }
2303d90fd774SSteven Rostedt (Red Hat) 
2304d90fd774SSteven Rostedt (Red Hat) static unsigned rb_calculate_event_length(unsigned length)
2305d90fd774SSteven Rostedt (Red Hat) {
2306d90fd774SSteven Rostedt (Red Hat) 	struct ring_buffer_event event; /* Used only for sizeof array */
2307d90fd774SSteven Rostedt (Red Hat) 
2308d90fd774SSteven Rostedt (Red Hat) 	/* zero length can cause confusions */
2309d90fd774SSteven Rostedt (Red Hat) 	if (!length)
2310d90fd774SSteven Rostedt (Red Hat) 		length++;
2311d90fd774SSteven Rostedt (Red Hat) 
2312d90fd774SSteven Rostedt (Red Hat) 	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2313d90fd774SSteven Rostedt (Red Hat) 		length += sizeof(event.array[0]);
2314d90fd774SSteven Rostedt (Red Hat) 
2315d90fd774SSteven Rostedt (Red Hat) 	length += RB_EVNT_HDR_SIZE;
2316d90fd774SSteven Rostedt (Red Hat) 	length = ALIGN(length, RB_ARCH_ALIGNMENT);
2317d90fd774SSteven Rostedt (Red Hat) 
2318d90fd774SSteven Rostedt (Red Hat) 	/*
2319d90fd774SSteven Rostedt (Red Hat) 	 * In case the time delta is larger than the 27 bits for it
2320d90fd774SSteven Rostedt (Red Hat) 	 * in the header, we need to add a timestamp. If another
2321d90fd774SSteven Rostedt (Red Hat) 	 * event comes in when trying to discard this one to increase
2322d90fd774SSteven Rostedt (Red Hat) 	 * the length, then the timestamp will be added in the allocated
2323d90fd774SSteven Rostedt (Red Hat) 	 * space of this event. If length is bigger than the size needed
2324d90fd774SSteven Rostedt (Red Hat) 	 * for the TIME_EXTEND, then padding has to be used. The events
2325d90fd774SSteven Rostedt (Red Hat) 	 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
2326d90fd774SSteven Rostedt (Red Hat) 	 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
2327d90fd774SSteven Rostedt (Red Hat) 	 * As length is a multiple of 4, we only need to worry if it
2328d90fd774SSteven Rostedt (Red Hat) 	 * is 12 (RB_LEN_TIME_EXTEND + 4).
2329d90fd774SSteven Rostedt (Red Hat) 	 */
2330d90fd774SSteven Rostedt (Red Hat) 	if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2331d90fd774SSteven Rostedt (Red Hat) 		length += RB_ALIGNMENT;
2332d90fd774SSteven Rostedt (Red Hat) 
2333d90fd774SSteven Rostedt (Red Hat) 	return length;
2334d90fd774SSteven Rostedt (Red Hat) }
2335d90fd774SSteven Rostedt (Red Hat) 
23369826b273SSteven Rostedt (Red Hat) #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
23379826b273SSteven Rostedt (Red Hat) static inline bool sched_clock_stable(void)
23389826b273SSteven Rostedt (Red Hat) {
23399826b273SSteven Rostedt (Red Hat) 	return true;
23409826b273SSteven Rostedt (Red Hat) }
23419826b273SSteven Rostedt (Red Hat) #endif
23429826b273SSteven Rostedt (Red Hat) 
2343a4543a2fSSteven Rostedt (Red Hat) static inline int
2344a4543a2fSSteven Rostedt (Red Hat) rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2345d90fd774SSteven Rostedt (Red Hat) 		  struct ring_buffer_event *event)
2346d90fd774SSteven Rostedt (Red Hat) {
2347d90fd774SSteven Rostedt (Red Hat) 	unsigned long new_index, old_index;
2348d90fd774SSteven Rostedt (Red Hat) 	struct buffer_page *bpage;
2349d90fd774SSteven Rostedt (Red Hat) 	unsigned long index;
2350d90fd774SSteven Rostedt (Red Hat) 	unsigned long addr;
2351d90fd774SSteven Rostedt (Red Hat) 
2352d90fd774SSteven Rostedt (Red Hat) 	new_index = rb_event_index(event);
2353d90fd774SSteven Rostedt (Red Hat) 	old_index = new_index + rb_event_ts_length(event);
2354d90fd774SSteven Rostedt (Red Hat) 	addr = (unsigned long)event;
2355d90fd774SSteven Rostedt (Red Hat) 	addr &= PAGE_MASK;
2356d90fd774SSteven Rostedt (Red Hat) 
23578573636eSSteven Rostedt (Red Hat) 	bpage = READ_ONCE(cpu_buffer->tail_page);
2358d90fd774SSteven Rostedt (Red Hat) 
2359d90fd774SSteven Rostedt (Red Hat) 	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2360d90fd774SSteven Rostedt (Red Hat) 		unsigned long write_mask =
2361d90fd774SSteven Rostedt (Red Hat) 			local_read(&bpage->write) & ~RB_WRITE_MASK;
2362d90fd774SSteven Rostedt (Red Hat) 		unsigned long event_length = rb_event_length(event);
2363d90fd774SSteven Rostedt (Red Hat) 		/*
2364d90fd774SSteven Rostedt (Red Hat) 		 * This is on the tail page. It is possible that
2365d90fd774SSteven Rostedt (Red Hat) 		 * a write could come in and move the tail page
2366d90fd774SSteven Rostedt (Red Hat) 		 * and write to the next page. That is fine
2367d90fd774SSteven Rostedt (Red Hat) 		 * because we just shorten what is on this page.
2368d90fd774SSteven Rostedt (Red Hat) 		 */
2369d90fd774SSteven Rostedt (Red Hat) 		old_index += write_mask;
2370d90fd774SSteven Rostedt (Red Hat) 		new_index += write_mask;
2371d90fd774SSteven Rostedt (Red Hat) 		index = local_cmpxchg(&bpage->write, old_index, new_index);
2372d90fd774SSteven Rostedt (Red Hat) 		if (index == old_index) {
2373d90fd774SSteven Rostedt (Red Hat) 			/* update counters */
2374d90fd774SSteven Rostedt (Red Hat) 			local_sub(event_length, &cpu_buffer->entries_bytes);
2375d90fd774SSteven Rostedt (Red Hat) 			return 1;
2376d90fd774SSteven Rostedt (Red Hat) 		}
2377d90fd774SSteven Rostedt (Red Hat) 	}
2378d90fd774SSteven Rostedt (Red Hat) 
2379d90fd774SSteven Rostedt (Red Hat) 	/* could not discard */
2380d90fd774SSteven Rostedt (Red Hat) 	return 0;
2381d90fd774SSteven Rostedt (Red Hat) }
2382d90fd774SSteven Rostedt (Red Hat) 
2383d90fd774SSteven Rostedt (Red Hat) static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2384d90fd774SSteven Rostedt (Red Hat) {
2385d90fd774SSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->committing);
2386d90fd774SSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->commits);
2387d90fd774SSteven Rostedt (Red Hat) }
2388d90fd774SSteven Rostedt (Red Hat) 
2389d90fd774SSteven Rostedt (Red Hat) static void
2390d90fd774SSteven Rostedt (Red Hat) rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2391d90fd774SSteven Rostedt (Red Hat) {
2392d90fd774SSteven Rostedt (Red Hat) 	unsigned long max_count;
2393d90fd774SSteven Rostedt (Red Hat) 
2394d90fd774SSteven Rostedt (Red Hat) 	/*
2395d90fd774SSteven Rostedt (Red Hat) 	 * We only race with interrupts and NMIs on this CPU.
2396d90fd774SSteven Rostedt (Red Hat) 	 * If we own the commit event, then we can commit
2397d90fd774SSteven Rostedt (Red Hat) 	 * all others that interrupted us, since the interruptions
2398d90fd774SSteven Rostedt (Red Hat) 	 * are in stack format (they finish before they come
2399d90fd774SSteven Rostedt (Red Hat) 	 * back to us). This allows us to do a simple loop to
2400d90fd774SSteven Rostedt (Red Hat) 	 * assign the commit to the tail.
2401d90fd774SSteven Rostedt (Red Hat) 	 */
2402d90fd774SSteven Rostedt (Red Hat)  again:
2403d90fd774SSteven Rostedt (Red Hat) 	max_count = cpu_buffer->nr_pages * 100;
2404d90fd774SSteven Rostedt (Red Hat) 
24058573636eSSteven Rostedt (Red Hat) 	while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2406d90fd774SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2407d90fd774SSteven Rostedt (Red Hat) 			return;
2408d90fd774SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(cpu_buffer,
2409d90fd774SSteven Rostedt (Red Hat) 			       rb_is_reader_page(cpu_buffer->tail_page)))
2410d90fd774SSteven Rostedt (Red Hat) 			return;
2411d90fd774SSteven Rostedt (Red Hat) 		local_set(&cpu_buffer->commit_page->page->commit,
2412d90fd774SSteven Rostedt (Red Hat) 			  rb_page_write(cpu_buffer->commit_page));
2413d90fd774SSteven Rostedt (Red Hat) 		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
241470004986SSteven Rostedt (Red Hat) 		/* Only update the write stamp if the page has an event */
241570004986SSteven Rostedt (Red Hat) 		if (rb_page_write(cpu_buffer->commit_page))
2416d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp =
2417d90fd774SSteven Rostedt (Red Hat) 				cpu_buffer->commit_page->page->time_stamp;
2418d90fd774SSteven Rostedt (Red Hat) 		/* add barrier to keep gcc from optimizing too much */
2419d90fd774SSteven Rostedt (Red Hat) 		barrier();
2420d90fd774SSteven Rostedt (Red Hat) 	}
2421d90fd774SSteven Rostedt (Red Hat) 	while (rb_commit_index(cpu_buffer) !=
2422d90fd774SSteven Rostedt (Red Hat) 	       rb_page_write(cpu_buffer->commit_page)) {
2423d90fd774SSteven Rostedt (Red Hat) 
2424d90fd774SSteven Rostedt (Red Hat) 		local_set(&cpu_buffer->commit_page->page->commit,
2425d90fd774SSteven Rostedt (Red Hat) 			  rb_page_write(cpu_buffer->commit_page));
2426d90fd774SSteven Rostedt (Red Hat) 		RB_WARN_ON(cpu_buffer,
2427d90fd774SSteven Rostedt (Red Hat) 			   local_read(&cpu_buffer->commit_page->page->commit) &
2428d90fd774SSteven Rostedt (Red Hat) 			   ~RB_WRITE_MASK);
2429d90fd774SSteven Rostedt (Red Hat) 		barrier();
2430d90fd774SSteven Rostedt (Red Hat) 	}
2431d90fd774SSteven Rostedt (Red Hat) 
2432d90fd774SSteven Rostedt (Red Hat) 	/* again, keep gcc from optimizing */
2433d90fd774SSteven Rostedt (Red Hat) 	barrier();
2434d90fd774SSteven Rostedt (Red Hat) 
2435d90fd774SSteven Rostedt (Red Hat) 	/*
2436d90fd774SSteven Rostedt (Red Hat) 	 * If an interrupt came in just after the first while loop
2437d90fd774SSteven Rostedt (Red Hat) 	 * and pushed the tail page forward, we will be left with
2438d90fd774SSteven Rostedt (Red Hat) 	 * a dangling commit that will never go forward.
2439d90fd774SSteven Rostedt (Red Hat) 	 */
24408573636eSSteven Rostedt (Red Hat) 	if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
2441d90fd774SSteven Rostedt (Red Hat) 		goto again;
2442d90fd774SSteven Rostedt (Red Hat) }
2443d90fd774SSteven Rostedt (Red Hat) 
2444d90fd774SSteven Rostedt (Red Hat) static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2445d90fd774SSteven Rostedt (Red Hat) {
2446d90fd774SSteven Rostedt (Red Hat) 	unsigned long commits;
2447d90fd774SSteven Rostedt (Red Hat) 
2448d90fd774SSteven Rostedt (Red Hat) 	if (RB_WARN_ON(cpu_buffer,
2449d90fd774SSteven Rostedt (Red Hat) 		       !local_read(&cpu_buffer->committing)))
2450d90fd774SSteven Rostedt (Red Hat) 		return;
2451d90fd774SSteven Rostedt (Red Hat) 
2452d90fd774SSteven Rostedt (Red Hat)  again:
2453d90fd774SSteven Rostedt (Red Hat) 	commits = local_read(&cpu_buffer->commits);
2454d90fd774SSteven Rostedt (Red Hat) 	/* synchronize with interrupts */
2455d90fd774SSteven Rostedt (Red Hat) 	barrier();
2456d90fd774SSteven Rostedt (Red Hat) 	if (local_read(&cpu_buffer->committing) == 1)
2457d90fd774SSteven Rostedt (Red Hat) 		rb_set_commit_to_write(cpu_buffer);
2458d90fd774SSteven Rostedt (Red Hat) 
2459d90fd774SSteven Rostedt (Red Hat) 	local_dec(&cpu_buffer->committing);
2460d90fd774SSteven Rostedt (Red Hat) 
2461d90fd774SSteven Rostedt (Red Hat) 	/* synchronize with interrupts */
2462d90fd774SSteven Rostedt (Red Hat) 	barrier();
2463d90fd774SSteven Rostedt (Red Hat) 
2464d90fd774SSteven Rostedt (Red Hat) 	/*
2465d90fd774SSteven Rostedt (Red Hat) 	 * Need to account for interrupts coming in between the
2466d90fd774SSteven Rostedt (Red Hat) 	 * updating of the commit page and the clearing of the
2467d90fd774SSteven Rostedt (Red Hat) 	 * committing counter.
2468d90fd774SSteven Rostedt (Red Hat) 	 */
2469d90fd774SSteven Rostedt (Red Hat) 	if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2470d90fd774SSteven Rostedt (Red Hat) 	    !local_read(&cpu_buffer->committing)) {
2471d90fd774SSteven Rostedt (Red Hat) 		local_inc(&cpu_buffer->committing);
2472d90fd774SSteven Rostedt (Red Hat) 		goto again;
2473d90fd774SSteven Rostedt (Red Hat) 	}
2474d90fd774SSteven Rostedt (Red Hat) }
2475d90fd774SSteven Rostedt (Red Hat) 
2476d90fd774SSteven Rostedt (Red Hat) static inline void rb_event_discard(struct ring_buffer_event *event)
2477d90fd774SSteven Rostedt (Red Hat) {
2478d90fd774SSteven Rostedt (Red Hat) 	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2479d90fd774SSteven Rostedt (Red Hat) 		event = skip_time_extend(event);
2480d90fd774SSteven Rostedt (Red Hat) 
2481d90fd774SSteven Rostedt (Red Hat) 	/* array[0] holds the actual length for the discarded event */
2482d90fd774SSteven Rostedt (Red Hat) 	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2483d90fd774SSteven Rostedt (Red Hat) 	event->type_len = RINGBUF_TYPE_PADDING;
2484d90fd774SSteven Rostedt (Red Hat) 	/* time delta must be non zero */
2485d90fd774SSteven Rostedt (Red Hat) 	if (!event->time_delta)
2486d90fd774SSteven Rostedt (Red Hat) 		event->time_delta = 1;
2487d90fd774SSteven Rostedt (Red Hat) }
2488d90fd774SSteven Rostedt (Red Hat) 
2489*babe3fceSSteven Rostedt (Red Hat) static __always_inline bool
2490d90fd774SSteven Rostedt (Red Hat) rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2491d90fd774SSteven Rostedt (Red Hat) 		   struct ring_buffer_event *event)
2492d90fd774SSteven Rostedt (Red Hat) {
2493d90fd774SSteven Rostedt (Red Hat) 	unsigned long addr = (unsigned long)event;
2494d90fd774SSteven Rostedt (Red Hat) 	unsigned long index;
2495d90fd774SSteven Rostedt (Red Hat) 
2496d90fd774SSteven Rostedt (Red Hat) 	index = rb_event_index(event);
2497d90fd774SSteven Rostedt (Red Hat) 	addr &= PAGE_MASK;
2498d90fd774SSteven Rostedt (Red Hat) 
2499d90fd774SSteven Rostedt (Red Hat) 	return cpu_buffer->commit_page->page == (void *)addr &&
2500d90fd774SSteven Rostedt (Red Hat) 		rb_commit_index(cpu_buffer) == index;
2501d90fd774SSteven Rostedt (Red Hat) }
2502d90fd774SSteven Rostedt (Red Hat) 
2503*babe3fceSSteven Rostedt (Red Hat) static __always_inline void
2504a4543a2fSSteven Rostedt (Red Hat) rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2505d90fd774SSteven Rostedt (Red Hat) 		      struct ring_buffer_event *event)
2506d90fd774SSteven Rostedt (Red Hat) {
2507d90fd774SSteven Rostedt (Red Hat) 	u64 delta;
2508d90fd774SSteven Rostedt (Red Hat) 
2509d90fd774SSteven Rostedt (Red Hat) 	/*
2510d90fd774SSteven Rostedt (Red Hat) 	 * The event first in the commit queue updates the
2511d90fd774SSteven Rostedt (Red Hat) 	 * time stamp.
2512d90fd774SSteven Rostedt (Red Hat) 	 */
2513d90fd774SSteven Rostedt (Red Hat) 	if (rb_event_is_commit(cpu_buffer, event)) {
2514d90fd774SSteven Rostedt (Red Hat) 		/*
2515d90fd774SSteven Rostedt (Red Hat) 		 * A commit event that is first on a page
2516d90fd774SSteven Rostedt (Red Hat) 		 * updates the write timestamp with the page stamp
2517d90fd774SSteven Rostedt (Red Hat) 		 */
2518d90fd774SSteven Rostedt (Red Hat) 		if (!rb_event_index(event))
2519d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp =
2520d90fd774SSteven Rostedt (Red Hat) 				cpu_buffer->commit_page->page->time_stamp;
2521d90fd774SSteven Rostedt (Red Hat) 		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2522d90fd774SSteven Rostedt (Red Hat) 			delta = event->array[0];
2523d90fd774SSteven Rostedt (Red Hat) 			delta <<= TS_SHIFT;
2524d90fd774SSteven Rostedt (Red Hat) 			delta += event->time_delta;
2525d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp += delta;
2526d90fd774SSteven Rostedt (Red Hat) 		} else
2527d90fd774SSteven Rostedt (Red Hat) 			cpu_buffer->write_stamp += event->time_delta;
2528d90fd774SSteven Rostedt (Red Hat) 	}
2529d90fd774SSteven Rostedt (Red Hat) }
2530d90fd774SSteven Rostedt (Red Hat) 
2531d90fd774SSteven Rostedt (Red Hat) static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2532d90fd774SSteven Rostedt (Red Hat) 		      struct ring_buffer_event *event)
2533d90fd774SSteven Rostedt (Red Hat) {
2534d90fd774SSteven Rostedt (Red Hat) 	local_inc(&cpu_buffer->entries);
2535d90fd774SSteven Rostedt (Red Hat) 	rb_update_write_stamp(cpu_buffer, event);
2536d90fd774SSteven Rostedt (Red Hat) 	rb_end_commit(cpu_buffer);
2537d90fd774SSteven Rostedt (Red Hat) }
2538d90fd774SSteven Rostedt (Red Hat) 
2539d90fd774SSteven Rostedt (Red Hat) static __always_inline void
2540d90fd774SSteven Rostedt (Red Hat) rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2541d90fd774SSteven Rostedt (Red Hat) {
2542d90fd774SSteven Rostedt (Red Hat) 	bool pagebusy;
2543d90fd774SSteven Rostedt (Red Hat) 
2544d90fd774SSteven Rostedt (Red Hat) 	if (buffer->irq_work.waiters_pending) {
2545d90fd774SSteven Rostedt (Red Hat) 		buffer->irq_work.waiters_pending = false;
2546d90fd774SSteven Rostedt (Red Hat) 		/* irq_work_queue() supplies it's own memory barriers */
2547d90fd774SSteven Rostedt (Red Hat) 		irq_work_queue(&buffer->irq_work.work);
2548d90fd774SSteven Rostedt (Red Hat) 	}
2549d90fd774SSteven Rostedt (Red Hat) 
2550d90fd774SSteven Rostedt (Red Hat) 	if (cpu_buffer->irq_work.waiters_pending) {
2551d90fd774SSteven Rostedt (Red Hat) 		cpu_buffer->irq_work.waiters_pending = false;
2552d90fd774SSteven Rostedt (Red Hat) 		/* irq_work_queue() supplies it's own memory barriers */
2553d90fd774SSteven Rostedt (Red Hat) 		irq_work_queue(&cpu_buffer->irq_work.work);
2554d90fd774SSteven Rostedt (Red Hat) 	}
2555d90fd774SSteven Rostedt (Red Hat) 
2556d90fd774SSteven Rostedt (Red Hat) 	pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2557d90fd774SSteven Rostedt (Red Hat) 
2558d90fd774SSteven Rostedt (Red Hat) 	if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2559d90fd774SSteven Rostedt (Red Hat) 		cpu_buffer->irq_work.wakeup_full = true;
2560d90fd774SSteven Rostedt (Red Hat) 		cpu_buffer->irq_work.full_waiters_pending = false;
2561d90fd774SSteven Rostedt (Red Hat) 		/* irq_work_queue() supplies it's own memory barriers */
2562d90fd774SSteven Rostedt (Red Hat) 		irq_work_queue(&cpu_buffer->irq_work.work);
2563d90fd774SSteven Rostedt (Red Hat) 	}
2564d90fd774SSteven Rostedt (Red Hat) }
2565d90fd774SSteven Rostedt (Red Hat) 
2566d90fd774SSteven Rostedt (Red Hat) /*
2567d90fd774SSteven Rostedt (Red Hat)  * The lock and unlock are done within a preempt disable section.
2568d90fd774SSteven Rostedt (Red Hat)  * The current_context per_cpu variable can only be modified
2569d90fd774SSteven Rostedt (Red Hat)  * by the current task between lock and unlock. But it can
2570d90fd774SSteven Rostedt (Red Hat)  * be modified more than once via an interrupt. To pass this
2571d90fd774SSteven Rostedt (Red Hat)  * information from the lock to the unlock without having to
2572d90fd774SSteven Rostedt (Red Hat)  * access the 'in_interrupt()' functions again (which do show
2573d90fd774SSteven Rostedt (Red Hat)  * a bit of overhead in something as critical as function tracing,
2574d90fd774SSteven Rostedt (Red Hat)  * we use a bitmask trick.
2575d90fd774SSteven Rostedt (Red Hat)  *
2576d90fd774SSteven Rostedt (Red Hat)  *  bit 0 =  NMI context
2577d90fd774SSteven Rostedt (Red Hat)  *  bit 1 =  IRQ context
2578d90fd774SSteven Rostedt (Red Hat)  *  bit 2 =  SoftIRQ context
2579d90fd774SSteven Rostedt (Red Hat)  *  bit 3 =  normal context.
2580d90fd774SSteven Rostedt (Red Hat)  *
2581d90fd774SSteven Rostedt (Red Hat)  * This works because this is the order of contexts that can
2582d90fd774SSteven Rostedt (Red Hat)  * preempt other contexts. A SoftIRQ never preempts an IRQ
2583d90fd774SSteven Rostedt (Red Hat)  * context.
2584d90fd774SSteven Rostedt (Red Hat)  *
2585d90fd774SSteven Rostedt (Red Hat)  * When the context is determined, the corresponding bit is
2586d90fd774SSteven Rostedt (Red Hat)  * checked and set (if it was set, then a recursion of that context
2587d90fd774SSteven Rostedt (Red Hat)  * happened).
2588d90fd774SSteven Rostedt (Red Hat)  *
2589d90fd774SSteven Rostedt (Red Hat)  * On unlock, we need to clear this bit. To do so, just subtract
2590d90fd774SSteven Rostedt (Red Hat)  * 1 from the current_context and AND it to itself.
2591d90fd774SSteven Rostedt (Red Hat)  *
2592d90fd774SSteven Rostedt (Red Hat)  * (binary)
2593d90fd774SSteven Rostedt (Red Hat)  *  101 - 1 = 100
2594d90fd774SSteven Rostedt (Red Hat)  *  101 & 100 = 100 (clearing bit zero)
2595d90fd774SSteven Rostedt (Red Hat)  *
2596d90fd774SSteven Rostedt (Red Hat)  *  1010 - 1 = 1001
2597d90fd774SSteven Rostedt (Red Hat)  *  1010 & 1001 = 1000 (clearing bit 1)
2598d90fd774SSteven Rostedt (Red Hat)  *
2599d90fd774SSteven Rostedt (Red Hat)  * The least significant bit can be cleared this way, and it
2600d90fd774SSteven Rostedt (Red Hat)  * just so happens that it is the same bit corresponding to
2601d90fd774SSteven Rostedt (Red Hat)  * the current context.
2602d90fd774SSteven Rostedt (Red Hat)  */
2603d90fd774SSteven Rostedt (Red Hat) 
2604d90fd774SSteven Rostedt (Red Hat) static __always_inline int
2605d90fd774SSteven Rostedt (Red Hat) trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2606d90fd774SSteven Rostedt (Red Hat) {
2607d90fd774SSteven Rostedt (Red Hat) 	unsigned int val = cpu_buffer->current_context;
2608d90fd774SSteven Rostedt (Red Hat) 	int bit;
2609d90fd774SSteven Rostedt (Red Hat) 
2610d90fd774SSteven Rostedt (Red Hat) 	if (in_interrupt()) {
2611d90fd774SSteven Rostedt (Red Hat) 		if (in_nmi())
2612d90fd774SSteven Rostedt (Red Hat) 			bit = RB_CTX_NMI;
2613d90fd774SSteven Rostedt (Red Hat) 		else if (in_irq())
2614d90fd774SSteven Rostedt (Red Hat) 			bit = RB_CTX_IRQ;
2615d90fd774SSteven Rostedt (Red Hat) 		else
2616d90fd774SSteven Rostedt (Red Hat) 			bit = RB_CTX_SOFTIRQ;
2617d90fd774SSteven Rostedt (Red Hat) 	} else
2618d90fd774SSteven Rostedt (Red Hat) 		bit = RB_CTX_NORMAL;
2619d90fd774SSteven Rostedt (Red Hat) 
2620d90fd774SSteven Rostedt (Red Hat) 	if (unlikely(val & (1 << bit)))
2621d90fd774SSteven Rostedt (Red Hat) 		return 1;
2622d90fd774SSteven Rostedt (Red Hat) 
2623d90fd774SSteven Rostedt (Red Hat) 	val |= (1 << bit);
2624d90fd774SSteven Rostedt (Red Hat) 	cpu_buffer->current_context = val;
2625d90fd774SSteven Rostedt (Red Hat) 
2626d90fd774SSteven Rostedt (Red Hat) 	return 0;
2627d90fd774SSteven Rostedt (Red Hat) }
2628d90fd774SSteven Rostedt (Red Hat) 
2629d90fd774SSteven Rostedt (Red Hat) static __always_inline void
2630d90fd774SSteven Rostedt (Red Hat) trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2631d90fd774SSteven Rostedt (Red Hat) {
2632d90fd774SSteven Rostedt (Red Hat) 	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
2633d90fd774SSteven Rostedt (Red Hat) }
2634d90fd774SSteven Rostedt (Red Hat) 
2635d90fd774SSteven Rostedt (Red Hat) /**
2636d90fd774SSteven Rostedt (Red Hat)  * ring_buffer_unlock_commit - commit a reserved
2637d90fd774SSteven Rostedt (Red Hat)  * @buffer: The buffer to commit to
2638d90fd774SSteven Rostedt (Red Hat)  * @event: The event pointer to commit.
2639d90fd774SSteven Rostedt (Red Hat)  *
2640d90fd774SSteven Rostedt (Red Hat)  * This commits the data to the ring buffer, and releases any locks held.
2641d90fd774SSteven Rostedt (Red Hat)  *
2642d90fd774SSteven Rostedt (Red Hat)  * Must be paired with ring_buffer_lock_reserve.
2643d90fd774SSteven Rostedt (Red Hat)  */
2644d90fd774SSteven Rostedt (Red Hat) int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2645d90fd774SSteven Rostedt (Red Hat) 			      struct ring_buffer_event *event)
2646d90fd774SSteven Rostedt (Red Hat) {
2647d90fd774SSteven Rostedt (Red Hat) 	struct ring_buffer_per_cpu *cpu_buffer;
2648d90fd774SSteven Rostedt (Red Hat) 	int cpu = raw_smp_processor_id();
2649d90fd774SSteven Rostedt (Red Hat) 
2650d90fd774SSteven Rostedt (Red Hat) 	cpu_buffer = buffer->buffers[cpu];
2651d90fd774SSteven Rostedt (Red Hat) 
2652d90fd774SSteven Rostedt (Red Hat) 	rb_commit(cpu_buffer, event);
2653d90fd774SSteven Rostedt (Red Hat) 
2654d90fd774SSteven Rostedt (Red Hat) 	rb_wakeups(buffer, cpu_buffer);
2655d90fd774SSteven Rostedt (Red Hat) 
2656d90fd774SSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
2657d90fd774SSteven Rostedt (Red Hat) 
2658d90fd774SSteven Rostedt (Red Hat) 	preempt_enable_notrace();
2659d90fd774SSteven Rostedt (Red Hat) 
2660d90fd774SSteven Rostedt (Red Hat) 	return 0;
2661d90fd774SSteven Rostedt (Red Hat) }
2662d90fd774SSteven Rostedt (Red Hat) EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2663a4543a2fSSteven Rostedt (Red Hat) 
26649826b273SSteven Rostedt (Red Hat) static noinline void
26659826b273SSteven Rostedt (Red Hat) rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
26669826b273SSteven Rostedt (Red Hat) 		    struct rb_event_info *info)
26679826b273SSteven Rostedt (Red Hat) {
26689826b273SSteven Rostedt (Red Hat) 	WARN_ONCE(info->delta > (1ULL << 59),
26699826b273SSteven Rostedt (Red Hat) 		  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
26709826b273SSteven Rostedt (Red Hat) 		  (unsigned long long)info->delta,
26719826b273SSteven Rostedt (Red Hat) 		  (unsigned long long)info->ts,
26729826b273SSteven Rostedt (Red Hat) 		  (unsigned long long)cpu_buffer->write_stamp,
26739826b273SSteven Rostedt (Red Hat) 		  sched_clock_stable() ? "" :
26749826b273SSteven Rostedt (Red Hat) 		  "If you just came from a suspend/resume,\n"
26759826b273SSteven Rostedt (Red Hat) 		  "please switch to the trace global clock:\n"
26769826b273SSteven Rostedt (Red Hat) 		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
26779826b273SSteven Rostedt (Red Hat) 	info->add_timestamp = 1;
26789826b273SSteven Rostedt (Red Hat) }
26799826b273SSteven Rostedt (Red Hat) 
26806634ff26SSteven Rostedt static struct ring_buffer_event *
26816634ff26SSteven Rostedt __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2682fcc742eaSSteven Rostedt (Red Hat) 		  struct rb_event_info *info)
26836634ff26SSteven Rostedt {
26846634ff26SSteven Rostedt 	struct ring_buffer_event *event;
2685fcc742eaSSteven Rostedt (Red Hat) 	struct buffer_page *tail_page;
26866634ff26SSteven Rostedt 	unsigned long tail, write;
2687b7dc42fdSSteven Rostedt (Red Hat) 
2688b7dc42fdSSteven Rostedt (Red Hat) 	/*
2689b7dc42fdSSteven Rostedt (Red Hat) 	 * If the time delta since the last event is too big to
2690b7dc42fdSSteven Rostedt (Red Hat) 	 * hold in the time field of the event, then we append a
2691b7dc42fdSSteven Rostedt (Red Hat) 	 * TIME EXTEND event ahead of the data event.
2692b7dc42fdSSteven Rostedt (Red Hat) 	 */
2693b7dc42fdSSteven Rostedt (Red Hat) 	if (unlikely(info->add_timestamp))
2694b7dc42fdSSteven Rostedt (Red Hat) 		info->length += RB_LEN_TIME_EXTEND;
269569d1b839SSteven Rostedt 
26968573636eSSteven Rostedt (Red Hat) 	/* Don't let the compiler play games with cpu_buffer->tail_page */
26978573636eSSteven Rostedt (Red Hat) 	tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
2698fcc742eaSSteven Rostedt (Red Hat) 	write = local_add_return(info->length, &tail_page->write);
269977ae365eSSteven Rostedt 
270077ae365eSSteven Rostedt 	/* set write to only the index of the write */
270177ae365eSSteven Rostedt 	write &= RB_WRITE_MASK;
2702fcc742eaSSteven Rostedt (Red Hat) 	tail = write - info->length;
27036634ff26SSteven Rostedt 
2704b7dc42fdSSteven Rostedt (Red Hat) 	/*
2705b7dc42fdSSteven Rostedt (Red Hat) 	 * If this is the first commit on the page, then it has the same
2706b7dc42fdSSteven Rostedt (Red Hat) 	 * timestamp as the page itself.
2707b7dc42fdSSteven Rostedt (Red Hat) 	 */
2708b7dc42fdSSteven Rostedt (Red Hat) 	if (!tail)
2709b7dc42fdSSteven Rostedt (Red Hat) 		info->delta = 0;
2710b7dc42fdSSteven Rostedt (Red Hat) 
27116634ff26SSteven Rostedt 	/* See if we shot pass the end of this buffer page */
2712747e94aeSSteven Rostedt 	if (unlikely(write > BUF_PAGE_SIZE))
2713fcc742eaSSteven Rostedt (Red Hat) 		return rb_move_tail(cpu_buffer, tail, info);
27146634ff26SSteven Rostedt 
27156634ff26SSteven Rostedt 	/* We reserved something on the buffer */
2716b7dc42fdSSteven Rostedt (Red Hat) 
27176634ff26SSteven Rostedt 	event = __rb_page_index(tail_page, tail);
27181744a21dSVegard Nossum 	kmemcheck_annotate_bitfield(event, bitfield);
2719fcc742eaSSteven Rostedt (Red Hat) 	rb_update_event(cpu_buffer, event, info);
27206634ff26SSteven Rostedt 
27216634ff26SSteven Rostedt 	local_inc(&tail_page->entries);
27226634ff26SSteven Rostedt 
2723b7dc42fdSSteven Rostedt (Red Hat) 	/*
2724b7dc42fdSSteven Rostedt (Red Hat) 	 * If this is the first commit on the page, then update
2725b7dc42fdSSteven Rostedt (Red Hat) 	 * its timestamp.
2726b7dc42fdSSteven Rostedt (Red Hat) 	 */
2727b7dc42fdSSteven Rostedt (Red Hat) 	if (!tail)
2728b7dc42fdSSteven Rostedt (Red Hat) 		tail_page->page->time_stamp = info->ts;
2729b7dc42fdSSteven Rostedt (Red Hat) 
2730c64e148aSVaibhav Nagarnaik 	/* account for these added bytes */
2731fcc742eaSSteven Rostedt (Red Hat) 	local_add(info->length, &cpu_buffer->entries_bytes);
2732c64e148aSVaibhav Nagarnaik 
27336634ff26SSteven Rostedt 	return event;
27346634ff26SSteven Rostedt }
27356634ff26SSteven Rostedt 
2736fa7ffb39SSteven Rostedt (Red Hat) static __always_inline struct ring_buffer_event *
273762f0b3ebSSteven Rostedt rb_reserve_next_event(struct ring_buffer *buffer,
273862f0b3ebSSteven Rostedt 		      struct ring_buffer_per_cpu *cpu_buffer,
27391cd8d735SSteven Rostedt 		      unsigned long length)
27407a8e76a3SSteven Rostedt {
27417a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
2742fcc742eaSSteven Rostedt (Red Hat) 	struct rb_event_info info;
2743818e3dd3SSteven Rostedt 	int nr_loops = 0;
2744b7dc42fdSSteven Rostedt (Red Hat) 	u64 diff;
27457a8e76a3SSteven Rostedt 
2746fa743953SSteven Rostedt 	rb_start_commit(cpu_buffer);
2747fa743953SSteven Rostedt 
274885bac32cSSteven Rostedt #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
274962f0b3ebSSteven Rostedt 	/*
275062f0b3ebSSteven Rostedt 	 * Due to the ability to swap a cpu buffer from a buffer
275162f0b3ebSSteven Rostedt 	 * it is possible it was swapped before we committed.
275262f0b3ebSSteven Rostedt 	 * (committing stops a swap). We check for it here and
275362f0b3ebSSteven Rostedt 	 * if it happened, we have to fail the write.
275462f0b3ebSSteven Rostedt 	 */
275562f0b3ebSSteven Rostedt 	barrier();
275662f0b3ebSSteven Rostedt 	if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
275762f0b3ebSSteven Rostedt 		local_dec(&cpu_buffer->committing);
275862f0b3ebSSteven Rostedt 		local_dec(&cpu_buffer->commits);
275962f0b3ebSSteven Rostedt 		return NULL;
276062f0b3ebSSteven Rostedt 	}
276185bac32cSSteven Rostedt #endif
2762b7dc42fdSSteven Rostedt (Red Hat) 
2763fcc742eaSSteven Rostedt (Red Hat) 	info.length = rb_calculate_event_length(length);
2764a4543a2fSSteven Rostedt (Red Hat)  again:
2765b7dc42fdSSteven Rostedt (Red Hat) 	info.add_timestamp = 0;
2766b7dc42fdSSteven Rostedt (Red Hat) 	info.delta = 0;
2767b7dc42fdSSteven Rostedt (Red Hat) 
2768818e3dd3SSteven Rostedt 	/*
2769818e3dd3SSteven Rostedt 	 * We allow for interrupts to reenter here and do a trace.
2770818e3dd3SSteven Rostedt 	 * If one does, it will cause this original code to loop
2771818e3dd3SSteven Rostedt 	 * back here. Even with heavy interrupts happening, this
2772818e3dd3SSteven Rostedt 	 * should only happen a few times in a row. If this happens
2773818e3dd3SSteven Rostedt 	 * 1000 times in a row, there must be either an interrupt
2774818e3dd3SSteven Rostedt 	 * storm or we have something buggy.
2775818e3dd3SSteven Rostedt 	 * Bail!
2776818e3dd3SSteven Rostedt 	 */
27773e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2778fa743953SSteven Rostedt 		goto out_fail;
2779818e3dd3SSteven Rostedt 
2780b7dc42fdSSteven Rostedt (Red Hat) 	info.ts = rb_time_stamp(cpu_buffer->buffer);
2781b7dc42fdSSteven Rostedt (Red Hat) 	diff = info.ts - cpu_buffer->write_stamp;
2782b7dc42fdSSteven Rostedt (Red Hat) 
2783b7dc42fdSSteven Rostedt (Red Hat) 	/* make sure this diff is calculated here */
2784b7dc42fdSSteven Rostedt (Red Hat) 	barrier();
2785b7dc42fdSSteven Rostedt (Red Hat) 
2786b7dc42fdSSteven Rostedt (Red Hat) 	/* Did the write stamp get updated already? */
2787b7dc42fdSSteven Rostedt (Red Hat) 	if (likely(info.ts >= cpu_buffer->write_stamp)) {
2788b7dc42fdSSteven Rostedt (Red Hat) 		info.delta = diff;
2789b7dc42fdSSteven Rostedt (Red Hat) 		if (unlikely(test_time_stamp(info.delta)))
2790b7dc42fdSSteven Rostedt (Red Hat) 			rb_handle_timestamp(cpu_buffer, &info);
2791b7dc42fdSSteven Rostedt (Red Hat) 	}
2792b7dc42fdSSteven Rostedt (Red Hat) 
2793fcc742eaSSteven Rostedt (Red Hat) 	event = __rb_reserve_next(cpu_buffer, &info);
2794fcc742eaSSteven Rostedt (Red Hat) 
2795bd1b7cd3SSteven Rostedt (Red Hat) 	if (unlikely(PTR_ERR(event) == -EAGAIN)) {
2796bd1b7cd3SSteven Rostedt (Red Hat) 		if (info.add_timestamp)
2797bd1b7cd3SSteven Rostedt (Red Hat) 			info.length -= RB_LEN_TIME_EXTEND;
2798bf41a158SSteven Rostedt 		goto again;
2799bd1b7cd3SSteven Rostedt (Red Hat) 	}
28007a8e76a3SSteven Rostedt 
2801fa743953SSteven Rostedt 	if (!event)
2802fa743953SSteven Rostedt 		goto out_fail;
2803bf41a158SSteven Rostedt 
28047a8e76a3SSteven Rostedt 	return event;
2805fa743953SSteven Rostedt 
2806fa743953SSteven Rostedt  out_fail:
2807fa743953SSteven Rostedt 	rb_end_commit(cpu_buffer);
2808fa743953SSteven Rostedt 	return NULL;
28097a8e76a3SSteven Rostedt }
28107a8e76a3SSteven Rostedt 
28117a8e76a3SSteven Rostedt /**
28127a8e76a3SSteven Rostedt  * ring_buffer_lock_reserve - reserve a part of the buffer
28137a8e76a3SSteven Rostedt  * @buffer: the ring buffer to reserve from
28147a8e76a3SSteven Rostedt  * @length: the length of the data to reserve (excluding event header)
28157a8e76a3SSteven Rostedt  *
28167a8e76a3SSteven Rostedt  * Returns a reseverd event on the ring buffer to copy directly to.
28177a8e76a3SSteven Rostedt  * The user of this interface will need to get the body to write into
28187a8e76a3SSteven Rostedt  * and can use the ring_buffer_event_data() interface.
28197a8e76a3SSteven Rostedt  *
28207a8e76a3SSteven Rostedt  * The length is the length of the data needed, not the event length
28217a8e76a3SSteven Rostedt  * which also includes the event header.
28227a8e76a3SSteven Rostedt  *
28237a8e76a3SSteven Rostedt  * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
28247a8e76a3SSteven Rostedt  * If NULL is returned, then nothing has been allocated or locked.
28257a8e76a3SSteven Rostedt  */
28267a8e76a3SSteven Rostedt struct ring_buffer_event *
28270a987751SArnaldo Carvalho de Melo ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
28287a8e76a3SSteven Rostedt {
28297a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
28307a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
28315168ae50SSteven Rostedt 	int cpu;
28327a8e76a3SSteven Rostedt 
2833bf41a158SSteven Rostedt 	/* If we are tracing schedule, we don't want to recurse */
28345168ae50SSteven Rostedt 	preempt_disable_notrace();
2835bf41a158SSteven Rostedt 
28363205f806SSteven Rostedt (Red Hat) 	if (unlikely(atomic_read(&buffer->record_disabled)))
283758a09ec6SSteven Rostedt (Red Hat) 		goto out;
2838261842b7SSteven Rostedt 
28397a8e76a3SSteven Rostedt 	cpu = raw_smp_processor_id();
28407a8e76a3SSteven Rostedt 
28413205f806SSteven Rostedt (Red Hat) 	if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
2842d769041fSSteven Rostedt 		goto out;
28437a8e76a3SSteven Rostedt 
28447a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
28457a8e76a3SSteven Rostedt 
28463205f806SSteven Rostedt (Red Hat) 	if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
2847d769041fSSteven Rostedt 		goto out;
28487a8e76a3SSteven Rostedt 
28493205f806SSteven Rostedt (Red Hat) 	if (unlikely(length > BUF_MAX_DATA_SIZE))
2850bf41a158SSteven Rostedt 		goto out;
28517a8e76a3SSteven Rostedt 
285258a09ec6SSteven Rostedt (Red Hat) 	if (unlikely(trace_recursive_lock(cpu_buffer)))
285358a09ec6SSteven Rostedt (Red Hat) 		goto out;
285458a09ec6SSteven Rostedt (Red Hat) 
285562f0b3ebSSteven Rostedt 	event = rb_reserve_next_event(buffer, cpu_buffer, length);
28567a8e76a3SSteven Rostedt 	if (!event)
285758a09ec6SSteven Rostedt (Red Hat) 		goto out_unlock;
28587a8e76a3SSteven Rostedt 
28597a8e76a3SSteven Rostedt 	return event;
28607a8e76a3SSteven Rostedt 
286158a09ec6SSteven Rostedt (Red Hat)  out_unlock:
286258a09ec6SSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
2863d769041fSSteven Rostedt  out:
28645168ae50SSteven Rostedt 	preempt_enable_notrace();
28657a8e76a3SSteven Rostedt 	return NULL;
28667a8e76a3SSteven Rostedt }
2867c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
28687a8e76a3SSteven Rostedt 
2869a1863c21SSteven Rostedt /*
2870a1863c21SSteven Rostedt  * Decrement the entries to the page that an event is on.
2871a1863c21SSteven Rostedt  * The event does not even need to exist, only the pointer
2872a1863c21SSteven Rostedt  * to the page it is on. This may only be called before the commit
2873a1863c21SSteven Rostedt  * takes place.
2874a1863c21SSteven Rostedt  */
2875a1863c21SSteven Rostedt static inline void
2876a1863c21SSteven Rostedt rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2877a1863c21SSteven Rostedt 		   struct ring_buffer_event *event)
2878a1863c21SSteven Rostedt {
2879a1863c21SSteven Rostedt 	unsigned long addr = (unsigned long)event;
2880a1863c21SSteven Rostedt 	struct buffer_page *bpage = cpu_buffer->commit_page;
2881a1863c21SSteven Rostedt 	struct buffer_page *start;
2882a1863c21SSteven Rostedt 
2883a1863c21SSteven Rostedt 	addr &= PAGE_MASK;
2884a1863c21SSteven Rostedt 
2885a1863c21SSteven Rostedt 	/* Do the likely case first */
2886a1863c21SSteven Rostedt 	if (likely(bpage->page == (void *)addr)) {
2887a1863c21SSteven Rostedt 		local_dec(&bpage->entries);
2888a1863c21SSteven Rostedt 		return;
2889a1863c21SSteven Rostedt 	}
2890a1863c21SSteven Rostedt 
2891a1863c21SSteven Rostedt 	/*
2892a1863c21SSteven Rostedt 	 * Because the commit page may be on the reader page we
2893a1863c21SSteven Rostedt 	 * start with the next page and check the end loop there.
2894a1863c21SSteven Rostedt 	 */
2895a1863c21SSteven Rostedt 	rb_inc_page(cpu_buffer, &bpage);
2896a1863c21SSteven Rostedt 	start = bpage;
2897a1863c21SSteven Rostedt 	do {
2898a1863c21SSteven Rostedt 		if (bpage->page == (void *)addr) {
2899a1863c21SSteven Rostedt 			local_dec(&bpage->entries);
2900a1863c21SSteven Rostedt 			return;
2901a1863c21SSteven Rostedt 		}
2902a1863c21SSteven Rostedt 		rb_inc_page(cpu_buffer, &bpage);
2903a1863c21SSteven Rostedt 	} while (bpage != start);
2904a1863c21SSteven Rostedt 
2905a1863c21SSteven Rostedt 	/* commit not part of this buffer?? */
2906a1863c21SSteven Rostedt 	RB_WARN_ON(cpu_buffer, 1);
2907a1863c21SSteven Rostedt }
2908a1863c21SSteven Rostedt 
29097a8e76a3SSteven Rostedt /**
2910fa1b47ddSSteven Rostedt  * ring_buffer_commit_discard - discard an event that has not been committed
2911fa1b47ddSSteven Rostedt  * @buffer: the ring buffer
2912fa1b47ddSSteven Rostedt  * @event: non committed event to discard
2913fa1b47ddSSteven Rostedt  *
2914dc892f73SSteven Rostedt  * Sometimes an event that is in the ring buffer needs to be ignored.
2915dc892f73SSteven Rostedt  * This function lets the user discard an event in the ring buffer
2916dc892f73SSteven Rostedt  * and then that event will not be read later.
2917dc892f73SSteven Rostedt  *
2918dc892f73SSteven Rostedt  * This function only works if it is called before the the item has been
2919dc892f73SSteven Rostedt  * committed. It will try to free the event from the ring buffer
2920fa1b47ddSSteven Rostedt  * if another event has not been added behind it.
2921fa1b47ddSSteven Rostedt  *
2922fa1b47ddSSteven Rostedt  * If another event has been added behind it, it will set the event
2923fa1b47ddSSteven Rostedt  * up as discarded, and perform the commit.
2924fa1b47ddSSteven Rostedt  *
2925fa1b47ddSSteven Rostedt  * If this function is called, do not call ring_buffer_unlock_commit on
2926fa1b47ddSSteven Rostedt  * the event.
2927fa1b47ddSSteven Rostedt  */
2928fa1b47ddSSteven Rostedt void ring_buffer_discard_commit(struct ring_buffer *buffer,
2929fa1b47ddSSteven Rostedt 				struct ring_buffer_event *event)
2930fa1b47ddSSteven Rostedt {
2931fa1b47ddSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
2932fa1b47ddSSteven Rostedt 	int cpu;
2933fa1b47ddSSteven Rostedt 
2934fa1b47ddSSteven Rostedt 	/* The event is discarded regardless */
2935f3b9aae1SFrederic Weisbecker 	rb_event_discard(event);
2936fa1b47ddSSteven Rostedt 
2937fa743953SSteven Rostedt 	cpu = smp_processor_id();
2938fa743953SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
2939fa743953SSteven Rostedt 
2940fa1b47ddSSteven Rostedt 	/*
2941fa1b47ddSSteven Rostedt 	 * This must only be called if the event has not been
2942fa1b47ddSSteven Rostedt 	 * committed yet. Thus we can assume that preemption
2943fa1b47ddSSteven Rostedt 	 * is still disabled.
2944fa1b47ddSSteven Rostedt 	 */
2945fa743953SSteven Rostedt 	RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
2946fa1b47ddSSteven Rostedt 
2947a1863c21SSteven Rostedt 	rb_decrement_entry(cpu_buffer, event);
29480f2541d2SSteven Rostedt 	if (rb_try_to_discard(cpu_buffer, event))
2949fa1b47ddSSteven Rostedt 		goto out;
2950fa1b47ddSSteven Rostedt 
2951fa1b47ddSSteven Rostedt 	/*
2952fa1b47ddSSteven Rostedt 	 * The commit is still visible by the reader, so we
2953a1863c21SSteven Rostedt 	 * must still update the timestamp.
2954fa1b47ddSSteven Rostedt 	 */
2955a1863c21SSteven Rostedt 	rb_update_write_stamp(cpu_buffer, event);
2956fa1b47ddSSteven Rostedt  out:
2957fa743953SSteven Rostedt 	rb_end_commit(cpu_buffer);
2958fa1b47ddSSteven Rostedt 
295958a09ec6SSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
2960f3b9aae1SFrederic Weisbecker 
29615168ae50SSteven Rostedt 	preempt_enable_notrace();
2962fa1b47ddSSteven Rostedt 
2963fa1b47ddSSteven Rostedt }
2964fa1b47ddSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2965fa1b47ddSSteven Rostedt 
2966fa1b47ddSSteven Rostedt /**
29677a8e76a3SSteven Rostedt  * ring_buffer_write - write data to the buffer without reserving
29687a8e76a3SSteven Rostedt  * @buffer: The ring buffer to write to.
29697a8e76a3SSteven Rostedt  * @length: The length of the data being written (excluding the event header)
29707a8e76a3SSteven Rostedt  * @data: The data to write to the buffer.
29717a8e76a3SSteven Rostedt  *
29727a8e76a3SSteven Rostedt  * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
29737a8e76a3SSteven Rostedt  * one function. If you already have the data to write to the buffer, it
29747a8e76a3SSteven Rostedt  * may be easier to simply call this function.
29757a8e76a3SSteven Rostedt  *
29767a8e76a3SSteven Rostedt  * Note, like ring_buffer_lock_reserve, the length is the length of the data
29777a8e76a3SSteven Rostedt  * and not the length of the event which would hold the header.
29787a8e76a3SSteven Rostedt  */
29797a8e76a3SSteven Rostedt int ring_buffer_write(struct ring_buffer *buffer,
29807a8e76a3SSteven Rostedt 		      unsigned long length,
29817a8e76a3SSteven Rostedt 		      void *data)
29827a8e76a3SSteven Rostedt {
29837a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
29847a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
29857a8e76a3SSteven Rostedt 	void *body;
29867a8e76a3SSteven Rostedt 	int ret = -EBUSY;
29875168ae50SSteven Rostedt 	int cpu;
29887a8e76a3SSteven Rostedt 
29895168ae50SSteven Rostedt 	preempt_disable_notrace();
2990bf41a158SSteven Rostedt 
299152fbe9cdSLai Jiangshan 	if (atomic_read(&buffer->record_disabled))
299252fbe9cdSLai Jiangshan 		goto out;
299352fbe9cdSLai Jiangshan 
29947a8e76a3SSteven Rostedt 	cpu = raw_smp_processor_id();
29957a8e76a3SSteven Rostedt 
29969e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
2997d769041fSSteven Rostedt 		goto out;
29987a8e76a3SSteven Rostedt 
29997a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
30007a8e76a3SSteven Rostedt 
30017a8e76a3SSteven Rostedt 	if (atomic_read(&cpu_buffer->record_disabled))
30027a8e76a3SSteven Rostedt 		goto out;
30037a8e76a3SSteven Rostedt 
3004be957c44SSteven Rostedt 	if (length > BUF_MAX_DATA_SIZE)
3005be957c44SSteven Rostedt 		goto out;
3006be957c44SSteven Rostedt 
3007985e871bSSteven Rostedt (Red Hat) 	if (unlikely(trace_recursive_lock(cpu_buffer)))
3008985e871bSSteven Rostedt (Red Hat) 		goto out;
3009985e871bSSteven Rostedt (Red Hat) 
301062f0b3ebSSteven Rostedt 	event = rb_reserve_next_event(buffer, cpu_buffer, length);
30117a8e76a3SSteven Rostedt 	if (!event)
3012985e871bSSteven Rostedt (Red Hat) 		goto out_unlock;
30137a8e76a3SSteven Rostedt 
30147a8e76a3SSteven Rostedt 	body = rb_event_data(event);
30157a8e76a3SSteven Rostedt 
30167a8e76a3SSteven Rostedt 	memcpy(body, data, length);
30177a8e76a3SSteven Rostedt 
30187a8e76a3SSteven Rostedt 	rb_commit(cpu_buffer, event);
30197a8e76a3SSteven Rostedt 
302015693458SSteven Rostedt (Red Hat) 	rb_wakeups(buffer, cpu_buffer);
302115693458SSteven Rostedt (Red Hat) 
30227a8e76a3SSteven Rostedt 	ret = 0;
3023985e871bSSteven Rostedt (Red Hat) 
3024985e871bSSteven Rostedt (Red Hat)  out_unlock:
3025985e871bSSteven Rostedt (Red Hat) 	trace_recursive_unlock(cpu_buffer);
3026985e871bSSteven Rostedt (Red Hat) 
30277a8e76a3SSteven Rostedt  out:
30285168ae50SSteven Rostedt 	preempt_enable_notrace();
30297a8e76a3SSteven Rostedt 
30307a8e76a3SSteven Rostedt 	return ret;
30317a8e76a3SSteven Rostedt }
3032c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_write);
30337a8e76a3SSteven Rostedt 
3034da58834cSYaowei Bai static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3035bf41a158SSteven Rostedt {
3036bf41a158SSteven Rostedt 	struct buffer_page *reader = cpu_buffer->reader_page;
303777ae365eSSteven Rostedt 	struct buffer_page *head = rb_set_head_page(cpu_buffer);
3038bf41a158SSteven Rostedt 	struct buffer_page *commit = cpu_buffer->commit_page;
3039bf41a158SSteven Rostedt 
304077ae365eSSteven Rostedt 	/* In case of error, head will be NULL */
304177ae365eSSteven Rostedt 	if (unlikely(!head))
3042da58834cSYaowei Bai 		return true;
304377ae365eSSteven Rostedt 
3044bf41a158SSteven Rostedt 	return reader->read == rb_page_commit(reader) &&
3045bf41a158SSteven Rostedt 		(commit == reader ||
3046bf41a158SSteven Rostedt 		 (commit == head &&
3047bf41a158SSteven Rostedt 		  head->read == rb_page_commit(commit)));
3048bf41a158SSteven Rostedt }
3049bf41a158SSteven Rostedt 
30507a8e76a3SSteven Rostedt /**
30517a8e76a3SSteven Rostedt  * ring_buffer_record_disable - stop all writes into the buffer
30527a8e76a3SSteven Rostedt  * @buffer: The ring buffer to stop writes to.
30537a8e76a3SSteven Rostedt  *
30547a8e76a3SSteven Rostedt  * This prevents all writes to the buffer. Any attempt to write
30557a8e76a3SSteven Rostedt  * to the buffer after this will fail and return NULL.
30567a8e76a3SSteven Rostedt  *
30577a8e76a3SSteven Rostedt  * The caller should call synchronize_sched() after this.
30587a8e76a3SSteven Rostedt  */
30597a8e76a3SSteven Rostedt void ring_buffer_record_disable(struct ring_buffer *buffer)
30607a8e76a3SSteven Rostedt {
30617a8e76a3SSteven Rostedt 	atomic_inc(&buffer->record_disabled);
30627a8e76a3SSteven Rostedt }
3063c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
30647a8e76a3SSteven Rostedt 
30657a8e76a3SSteven Rostedt /**
30667a8e76a3SSteven Rostedt  * ring_buffer_record_enable - enable writes to the buffer
30677a8e76a3SSteven Rostedt  * @buffer: The ring buffer to enable writes
30687a8e76a3SSteven Rostedt  *
30697a8e76a3SSteven Rostedt  * Note, multiple disables will need the same number of enables
3070c41b20e7SAdam Buchbinder  * to truly enable the writing (much like preempt_disable).
30717a8e76a3SSteven Rostedt  */
30727a8e76a3SSteven Rostedt void ring_buffer_record_enable(struct ring_buffer *buffer)
30737a8e76a3SSteven Rostedt {
30747a8e76a3SSteven Rostedt 	atomic_dec(&buffer->record_disabled);
30757a8e76a3SSteven Rostedt }
3076c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
30777a8e76a3SSteven Rostedt 
30787a8e76a3SSteven Rostedt /**
3079499e5470SSteven Rostedt  * ring_buffer_record_off - stop all writes into the buffer
3080499e5470SSteven Rostedt  * @buffer: The ring buffer to stop writes to.
3081499e5470SSteven Rostedt  *
3082499e5470SSteven Rostedt  * This prevents all writes to the buffer. Any attempt to write
3083499e5470SSteven Rostedt  * to the buffer after this will fail and return NULL.
3084499e5470SSteven Rostedt  *
3085499e5470SSteven Rostedt  * This is different than ring_buffer_record_disable() as
308687abb3b1SWang Tianhong  * it works like an on/off switch, where as the disable() version
3087499e5470SSteven Rostedt  * must be paired with a enable().
3088499e5470SSteven Rostedt  */
3089499e5470SSteven Rostedt void ring_buffer_record_off(struct ring_buffer *buffer)
3090499e5470SSteven Rostedt {
3091499e5470SSteven Rostedt 	unsigned int rd;
3092499e5470SSteven Rostedt 	unsigned int new_rd;
3093499e5470SSteven Rostedt 
3094499e5470SSteven Rostedt 	do {
3095499e5470SSteven Rostedt 		rd = atomic_read(&buffer->record_disabled);
3096499e5470SSteven Rostedt 		new_rd = rd | RB_BUFFER_OFF;
3097499e5470SSteven Rostedt 	} while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3098499e5470SSteven Rostedt }
3099499e5470SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3100499e5470SSteven Rostedt 
3101499e5470SSteven Rostedt /**
3102499e5470SSteven Rostedt  * ring_buffer_record_on - restart writes into the buffer
3103499e5470SSteven Rostedt  * @buffer: The ring buffer to start writes to.
3104499e5470SSteven Rostedt  *
3105499e5470SSteven Rostedt  * This enables all writes to the buffer that was disabled by
3106499e5470SSteven Rostedt  * ring_buffer_record_off().
3107499e5470SSteven Rostedt  *
3108499e5470SSteven Rostedt  * This is different than ring_buffer_record_enable() as
310987abb3b1SWang Tianhong  * it works like an on/off switch, where as the enable() version
3110499e5470SSteven Rostedt  * must be paired with a disable().
3111499e5470SSteven Rostedt  */
3112499e5470SSteven Rostedt void ring_buffer_record_on(struct ring_buffer *buffer)
3113499e5470SSteven Rostedt {
3114499e5470SSteven Rostedt 	unsigned int rd;
3115499e5470SSteven Rostedt 	unsigned int new_rd;
3116499e5470SSteven Rostedt 
3117499e5470SSteven Rostedt 	do {
3118499e5470SSteven Rostedt 		rd = atomic_read(&buffer->record_disabled);
3119499e5470SSteven Rostedt 		new_rd = rd & ~RB_BUFFER_OFF;
3120499e5470SSteven Rostedt 	} while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3121499e5470SSteven Rostedt }
3122499e5470SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3123499e5470SSteven Rostedt 
3124499e5470SSteven Rostedt /**
3125499e5470SSteven Rostedt  * ring_buffer_record_is_on - return true if the ring buffer can write
3126499e5470SSteven Rostedt  * @buffer: The ring buffer to see if write is enabled
3127499e5470SSteven Rostedt  *
3128499e5470SSteven Rostedt  * Returns true if the ring buffer is in a state that it accepts writes.
3129499e5470SSteven Rostedt  */
3130499e5470SSteven Rostedt int ring_buffer_record_is_on(struct ring_buffer *buffer)
3131499e5470SSteven Rostedt {
3132499e5470SSteven Rostedt 	return !atomic_read(&buffer->record_disabled);
3133499e5470SSteven Rostedt }
3134499e5470SSteven Rostedt 
3135499e5470SSteven Rostedt /**
31367a8e76a3SSteven Rostedt  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
31377a8e76a3SSteven Rostedt  * @buffer: The ring buffer to stop writes to.
31387a8e76a3SSteven Rostedt  * @cpu: The CPU buffer to stop
31397a8e76a3SSteven Rostedt  *
31407a8e76a3SSteven Rostedt  * This prevents all writes to the buffer. Any attempt to write
31417a8e76a3SSteven Rostedt  * to the buffer after this will fail and return NULL.
31427a8e76a3SSteven Rostedt  *
31437a8e76a3SSteven Rostedt  * The caller should call synchronize_sched() after this.
31447a8e76a3SSteven Rostedt  */
31457a8e76a3SSteven Rostedt void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
31467a8e76a3SSteven Rostedt {
31477a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
31487a8e76a3SSteven Rostedt 
31499e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
31508aabee57SSteven Rostedt 		return;
31517a8e76a3SSteven Rostedt 
31527a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
31537a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer->record_disabled);
31547a8e76a3SSteven Rostedt }
3155c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
31567a8e76a3SSteven Rostedt 
31577a8e76a3SSteven Rostedt /**
31587a8e76a3SSteven Rostedt  * ring_buffer_record_enable_cpu - enable writes to the buffer
31597a8e76a3SSteven Rostedt  * @buffer: The ring buffer to enable writes
31607a8e76a3SSteven Rostedt  * @cpu: The CPU to enable.
31617a8e76a3SSteven Rostedt  *
31627a8e76a3SSteven Rostedt  * Note, multiple disables will need the same number of enables
3163c41b20e7SAdam Buchbinder  * to truly enable the writing (much like preempt_disable).
31647a8e76a3SSteven Rostedt  */
31657a8e76a3SSteven Rostedt void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
31667a8e76a3SSteven Rostedt {
31677a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
31687a8e76a3SSteven Rostedt 
31699e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
31708aabee57SSteven Rostedt 		return;
31717a8e76a3SSteven Rostedt 
31727a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
31737a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer->record_disabled);
31747a8e76a3SSteven Rostedt }
3175c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
31767a8e76a3SSteven Rostedt 
3177f6195aa0SSteven Rostedt /*
3178f6195aa0SSteven Rostedt  * The total entries in the ring buffer is the running counter
3179f6195aa0SSteven Rostedt  * of entries entered into the ring buffer, minus the sum of
3180f6195aa0SSteven Rostedt  * the entries read from the ring buffer and the number of
3181f6195aa0SSteven Rostedt  * entries that were overwritten.
3182f6195aa0SSteven Rostedt  */
3183f6195aa0SSteven Rostedt static inline unsigned long
3184f6195aa0SSteven Rostedt rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3185f6195aa0SSteven Rostedt {
3186f6195aa0SSteven Rostedt 	return local_read(&cpu_buffer->entries) -
3187f6195aa0SSteven Rostedt 		(local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3188f6195aa0SSteven Rostedt }
3189f6195aa0SSteven Rostedt 
31907a8e76a3SSteven Rostedt /**
3191c64e148aSVaibhav Nagarnaik  * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
3192c64e148aSVaibhav Nagarnaik  * @buffer: The ring buffer
3193c64e148aSVaibhav Nagarnaik  * @cpu: The per CPU buffer to read from.
3194c64e148aSVaibhav Nagarnaik  */
319550ecf2c3SYoshihiro YUNOMAE u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
3196c64e148aSVaibhav Nagarnaik {
3197c64e148aSVaibhav Nagarnaik 	unsigned long flags;
3198c64e148aSVaibhav Nagarnaik 	struct ring_buffer_per_cpu *cpu_buffer;
3199c64e148aSVaibhav Nagarnaik 	struct buffer_page *bpage;
3200da830e58SLinus Torvalds 	u64 ret = 0;
3201c64e148aSVaibhav Nagarnaik 
3202c64e148aSVaibhav Nagarnaik 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3203c64e148aSVaibhav Nagarnaik 		return 0;
3204c64e148aSVaibhav Nagarnaik 
3205c64e148aSVaibhav Nagarnaik 	cpu_buffer = buffer->buffers[cpu];
32067115e3fcSLinus Torvalds 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3207c64e148aSVaibhav Nagarnaik 	/*
3208c64e148aSVaibhav Nagarnaik 	 * if the tail is on reader_page, oldest time stamp is on the reader
3209c64e148aSVaibhav Nagarnaik 	 * page
3210c64e148aSVaibhav Nagarnaik 	 */
3211c64e148aSVaibhav Nagarnaik 	if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3212c64e148aSVaibhav Nagarnaik 		bpage = cpu_buffer->reader_page;
3213c64e148aSVaibhav Nagarnaik 	else
3214c64e148aSVaibhav Nagarnaik 		bpage = rb_set_head_page(cpu_buffer);
321554f7be5bSSteven Rostedt 	if (bpage)
3216c64e148aSVaibhav Nagarnaik 		ret = bpage->page->time_stamp;
32177115e3fcSLinus Torvalds 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3218c64e148aSVaibhav Nagarnaik 
3219c64e148aSVaibhav Nagarnaik 	return ret;
3220c64e148aSVaibhav Nagarnaik }
3221c64e148aSVaibhav Nagarnaik EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3222c64e148aSVaibhav Nagarnaik 
3223c64e148aSVaibhav Nagarnaik /**
3224c64e148aSVaibhav Nagarnaik  * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
3225c64e148aSVaibhav Nagarnaik  * @buffer: The ring buffer
3226c64e148aSVaibhav Nagarnaik  * @cpu: The per CPU buffer to read from.
3227c64e148aSVaibhav Nagarnaik  */
3228c64e148aSVaibhav Nagarnaik unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
3229c64e148aSVaibhav Nagarnaik {
3230c64e148aSVaibhav Nagarnaik 	struct ring_buffer_per_cpu *cpu_buffer;
3231c64e148aSVaibhav Nagarnaik 	unsigned long ret;
3232c64e148aSVaibhav Nagarnaik 
3233c64e148aSVaibhav Nagarnaik 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3234c64e148aSVaibhav Nagarnaik 		return 0;
3235c64e148aSVaibhav Nagarnaik 
3236c64e148aSVaibhav Nagarnaik 	cpu_buffer = buffer->buffers[cpu];
3237c64e148aSVaibhav Nagarnaik 	ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3238c64e148aSVaibhav Nagarnaik 
3239c64e148aSVaibhav Nagarnaik 	return ret;
3240c64e148aSVaibhav Nagarnaik }
3241c64e148aSVaibhav Nagarnaik EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3242c64e148aSVaibhav Nagarnaik 
3243c64e148aSVaibhav Nagarnaik /**
32447a8e76a3SSteven Rostedt  * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
32457a8e76a3SSteven Rostedt  * @buffer: The ring buffer
32467a8e76a3SSteven Rostedt  * @cpu: The per CPU buffer to get the entries from.
32477a8e76a3SSteven Rostedt  */
32487a8e76a3SSteven Rostedt unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
32497a8e76a3SSteven Rostedt {
32507a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
32517a8e76a3SSteven Rostedt 
32529e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
32538aabee57SSteven Rostedt 		return 0;
32547a8e76a3SSteven Rostedt 
32557a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
3256554f786eSSteven Rostedt 
3257f6195aa0SSteven Rostedt 	return rb_num_of_entries(cpu_buffer);
32587a8e76a3SSteven Rostedt }
3259c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
32607a8e76a3SSteven Rostedt 
32617a8e76a3SSteven Rostedt /**
3262884bfe89SSlava Pestov  * ring_buffer_overrun_cpu - get the number of overruns caused by the ring
3263884bfe89SSlava Pestov  * buffer wrapping around (only if RB_FL_OVERWRITE is on).
32647a8e76a3SSteven Rostedt  * @buffer: The ring buffer
32657a8e76a3SSteven Rostedt  * @cpu: The per CPU buffer to get the number of overruns from
32667a8e76a3SSteven Rostedt  */
32677a8e76a3SSteven Rostedt unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
32687a8e76a3SSteven Rostedt {
32697a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
32708aabee57SSteven Rostedt 	unsigned long ret;
32717a8e76a3SSteven Rostedt 
32729e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
32738aabee57SSteven Rostedt 		return 0;
32747a8e76a3SSteven Rostedt 
32757a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
327677ae365eSSteven Rostedt 	ret = local_read(&cpu_buffer->overrun);
3277554f786eSSteven Rostedt 
3278554f786eSSteven Rostedt 	return ret;
32797a8e76a3SSteven Rostedt }
3280c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
32817a8e76a3SSteven Rostedt 
32827a8e76a3SSteven Rostedt /**
3283884bfe89SSlava Pestov  * ring_buffer_commit_overrun_cpu - get the number of overruns caused by
3284884bfe89SSlava Pestov  * commits failing due to the buffer wrapping around while there are uncommitted
3285884bfe89SSlava Pestov  * events, such as during an interrupt storm.
3286f0d2c681SSteven Rostedt  * @buffer: The ring buffer
3287f0d2c681SSteven Rostedt  * @cpu: The per CPU buffer to get the number of overruns from
3288f0d2c681SSteven Rostedt  */
3289f0d2c681SSteven Rostedt unsigned long
3290f0d2c681SSteven Rostedt ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3291f0d2c681SSteven Rostedt {
3292f0d2c681SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
3293f0d2c681SSteven Rostedt 	unsigned long ret;
3294f0d2c681SSteven Rostedt 
3295f0d2c681SSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3296f0d2c681SSteven Rostedt 		return 0;
3297f0d2c681SSteven Rostedt 
3298f0d2c681SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
329977ae365eSSteven Rostedt 	ret = local_read(&cpu_buffer->commit_overrun);
3300f0d2c681SSteven Rostedt 
3301f0d2c681SSteven Rostedt 	return ret;
3302f0d2c681SSteven Rostedt }
3303f0d2c681SSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3304f0d2c681SSteven Rostedt 
3305f0d2c681SSteven Rostedt /**
3306884bfe89SSlava Pestov  * ring_buffer_dropped_events_cpu - get the number of dropped events caused by
3307884bfe89SSlava Pestov  * the ring buffer filling up (only if RB_FL_OVERWRITE is off).
3308884bfe89SSlava Pestov  * @buffer: The ring buffer
3309884bfe89SSlava Pestov  * @cpu: The per CPU buffer to get the number of overruns from
3310884bfe89SSlava Pestov  */
3311884bfe89SSlava Pestov unsigned long
3312884bfe89SSlava Pestov ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3313884bfe89SSlava Pestov {
3314884bfe89SSlava Pestov 	struct ring_buffer_per_cpu *cpu_buffer;
3315884bfe89SSlava Pestov 	unsigned long ret;
3316884bfe89SSlava Pestov 
3317884bfe89SSlava Pestov 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3318884bfe89SSlava Pestov 		return 0;
3319884bfe89SSlava Pestov 
3320884bfe89SSlava Pestov 	cpu_buffer = buffer->buffers[cpu];
3321884bfe89SSlava Pestov 	ret = local_read(&cpu_buffer->dropped_events);
3322884bfe89SSlava Pestov 
3323884bfe89SSlava Pestov 	return ret;
3324884bfe89SSlava Pestov }
3325884bfe89SSlava Pestov EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3326884bfe89SSlava Pestov 
3327884bfe89SSlava Pestov /**
3328ad964704SSteven Rostedt (Red Hat)  * ring_buffer_read_events_cpu - get the number of events successfully read
3329ad964704SSteven Rostedt (Red Hat)  * @buffer: The ring buffer
3330ad964704SSteven Rostedt (Red Hat)  * @cpu: The per CPU buffer to get the number of events read
3331ad964704SSteven Rostedt (Red Hat)  */
3332ad964704SSteven Rostedt (Red Hat) unsigned long
3333ad964704SSteven Rostedt (Red Hat) ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3334ad964704SSteven Rostedt (Red Hat) {
3335ad964704SSteven Rostedt (Red Hat) 	struct ring_buffer_per_cpu *cpu_buffer;
3336ad964704SSteven Rostedt (Red Hat) 
3337ad964704SSteven Rostedt (Red Hat) 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3338ad964704SSteven Rostedt (Red Hat) 		return 0;
3339ad964704SSteven Rostedt (Red Hat) 
3340ad964704SSteven Rostedt (Red Hat) 	cpu_buffer = buffer->buffers[cpu];
3341ad964704SSteven Rostedt (Red Hat) 	return cpu_buffer->read;
3342ad964704SSteven Rostedt (Red Hat) }
3343ad964704SSteven Rostedt (Red Hat) EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3344ad964704SSteven Rostedt (Red Hat) 
3345ad964704SSteven Rostedt (Red Hat) /**
33467a8e76a3SSteven Rostedt  * ring_buffer_entries - get the number of entries in a buffer
33477a8e76a3SSteven Rostedt  * @buffer: The ring buffer
33487a8e76a3SSteven Rostedt  *
33497a8e76a3SSteven Rostedt  * Returns the total number of entries in the ring buffer
33507a8e76a3SSteven Rostedt  * (all CPU entries)
33517a8e76a3SSteven Rostedt  */
33527a8e76a3SSteven Rostedt unsigned long ring_buffer_entries(struct ring_buffer *buffer)
33537a8e76a3SSteven Rostedt {
33547a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
33557a8e76a3SSteven Rostedt 	unsigned long entries = 0;
33567a8e76a3SSteven Rostedt 	int cpu;
33577a8e76a3SSteven Rostedt 
33587a8e76a3SSteven Rostedt 	/* if you care about this being correct, lock the buffer */
33597a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
33607a8e76a3SSteven Rostedt 		cpu_buffer = buffer->buffers[cpu];
3361f6195aa0SSteven Rostedt 		entries += rb_num_of_entries(cpu_buffer);
33627a8e76a3SSteven Rostedt 	}
33637a8e76a3SSteven Rostedt 
33647a8e76a3SSteven Rostedt 	return entries;
33657a8e76a3SSteven Rostedt }
3366c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_entries);
33677a8e76a3SSteven Rostedt 
33687a8e76a3SSteven Rostedt /**
336967b394f7SJiri Olsa  * ring_buffer_overruns - get the number of overruns in buffer
33707a8e76a3SSteven Rostedt  * @buffer: The ring buffer
33717a8e76a3SSteven Rostedt  *
33727a8e76a3SSteven Rostedt  * Returns the total number of overruns in the ring buffer
33737a8e76a3SSteven Rostedt  * (all CPU entries)
33747a8e76a3SSteven Rostedt  */
33757a8e76a3SSteven Rostedt unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
33767a8e76a3SSteven Rostedt {
33777a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
33787a8e76a3SSteven Rostedt 	unsigned long overruns = 0;
33797a8e76a3SSteven Rostedt 	int cpu;
33807a8e76a3SSteven Rostedt 
33817a8e76a3SSteven Rostedt 	/* if you care about this being correct, lock the buffer */
33827a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
33837a8e76a3SSteven Rostedt 		cpu_buffer = buffer->buffers[cpu];
338477ae365eSSteven Rostedt 		overruns += local_read(&cpu_buffer->overrun);
33857a8e76a3SSteven Rostedt 	}
33867a8e76a3SSteven Rostedt 
33877a8e76a3SSteven Rostedt 	return overruns;
33887a8e76a3SSteven Rostedt }
3389c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_overruns);
33907a8e76a3SSteven Rostedt 
3391642edba5SSteven Rostedt static void rb_iter_reset(struct ring_buffer_iter *iter)
33927a8e76a3SSteven Rostedt {
33937a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
33947a8e76a3SSteven Rostedt 
3395d769041fSSteven Rostedt 	/* Iterator usage is expected to have record disabled */
3396d769041fSSteven Rostedt 	iter->head_page = cpu_buffer->reader_page;
33976f807acdSSteven Rostedt 	iter->head = cpu_buffer->reader_page->read;
3398651e22f2SSteven Rostedt (Red Hat) 
3399651e22f2SSteven Rostedt (Red Hat) 	iter->cache_reader_page = iter->head_page;
340024607f11SSteven Rostedt (Red Hat) 	iter->cache_read = cpu_buffer->read;
3401651e22f2SSteven Rostedt (Red Hat) 
3402d769041fSSteven Rostedt 	if (iter->head)
3403d769041fSSteven Rostedt 		iter->read_stamp = cpu_buffer->read_stamp;
3404d769041fSSteven Rostedt 	else
3405abc9b56dSSteven Rostedt 		iter->read_stamp = iter->head_page->page->time_stamp;
3406642edba5SSteven Rostedt }
3407f83c9d0fSSteven Rostedt 
3408642edba5SSteven Rostedt /**
3409642edba5SSteven Rostedt  * ring_buffer_iter_reset - reset an iterator
3410642edba5SSteven Rostedt  * @iter: The iterator to reset
3411642edba5SSteven Rostedt  *
3412642edba5SSteven Rostedt  * Resets the iterator, so that it will start from the beginning
3413642edba5SSteven Rostedt  * again.
3414642edba5SSteven Rostedt  */
3415642edba5SSteven Rostedt void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3416642edba5SSteven Rostedt {
3417554f786eSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
3418642edba5SSteven Rostedt 	unsigned long flags;
3419642edba5SSteven Rostedt 
3420554f786eSSteven Rostedt 	if (!iter)
3421554f786eSSteven Rostedt 		return;
3422554f786eSSteven Rostedt 
3423554f786eSSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
3424554f786eSSteven Rostedt 
34255389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3426642edba5SSteven Rostedt 	rb_iter_reset(iter);
34275389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
34287a8e76a3SSteven Rostedt }
3429c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
34307a8e76a3SSteven Rostedt 
34317a8e76a3SSteven Rostedt /**
34327a8e76a3SSteven Rostedt  * ring_buffer_iter_empty - check if an iterator has no more to read
34337a8e76a3SSteven Rostedt  * @iter: The iterator to check
34347a8e76a3SSteven Rostedt  */
34357a8e76a3SSteven Rostedt int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
34367a8e76a3SSteven Rostedt {
34377a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
34387a8e76a3SSteven Rostedt 
34397a8e76a3SSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
34407a8e76a3SSteven Rostedt 
3441bf41a158SSteven Rostedt 	return iter->head_page == cpu_buffer->commit_page &&
3442bf41a158SSteven Rostedt 		iter->head == rb_commit_index(cpu_buffer);
34437a8e76a3SSteven Rostedt }
3444c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
34457a8e76a3SSteven Rostedt 
34467a8e76a3SSteven Rostedt static void
34477a8e76a3SSteven Rostedt rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
34487a8e76a3SSteven Rostedt 		     struct ring_buffer_event *event)
34497a8e76a3SSteven Rostedt {
34507a8e76a3SSteven Rostedt 	u64 delta;
34517a8e76a3SSteven Rostedt 
3452334d4169SLai Jiangshan 	switch (event->type_len) {
34537a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
34547a8e76a3SSteven Rostedt 		return;
34557a8e76a3SSteven Rostedt 
34567a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
34577a8e76a3SSteven Rostedt 		delta = event->array[0];
34587a8e76a3SSteven Rostedt 		delta <<= TS_SHIFT;
34597a8e76a3SSteven Rostedt 		delta += event->time_delta;
34607a8e76a3SSteven Rostedt 		cpu_buffer->read_stamp += delta;
34617a8e76a3SSteven Rostedt 		return;
34627a8e76a3SSteven Rostedt 
34637a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
34647a8e76a3SSteven Rostedt 		/* FIXME: not implemented */
34657a8e76a3SSteven Rostedt 		return;
34667a8e76a3SSteven Rostedt 
34677a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
34687a8e76a3SSteven Rostedt 		cpu_buffer->read_stamp += event->time_delta;
34697a8e76a3SSteven Rostedt 		return;
34707a8e76a3SSteven Rostedt 
34717a8e76a3SSteven Rostedt 	default:
34727a8e76a3SSteven Rostedt 		BUG();
34737a8e76a3SSteven Rostedt 	}
34747a8e76a3SSteven Rostedt 	return;
34757a8e76a3SSteven Rostedt }
34767a8e76a3SSteven Rostedt 
34777a8e76a3SSteven Rostedt static void
34787a8e76a3SSteven Rostedt rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
34797a8e76a3SSteven Rostedt 			  struct ring_buffer_event *event)
34807a8e76a3SSteven Rostedt {
34817a8e76a3SSteven Rostedt 	u64 delta;
34827a8e76a3SSteven Rostedt 
3483334d4169SLai Jiangshan 	switch (event->type_len) {
34847a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
34857a8e76a3SSteven Rostedt 		return;
34867a8e76a3SSteven Rostedt 
34877a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
34887a8e76a3SSteven Rostedt 		delta = event->array[0];
34897a8e76a3SSteven Rostedt 		delta <<= TS_SHIFT;
34907a8e76a3SSteven Rostedt 		delta += event->time_delta;
34917a8e76a3SSteven Rostedt 		iter->read_stamp += delta;
34927a8e76a3SSteven Rostedt 		return;
34937a8e76a3SSteven Rostedt 
34947a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
34957a8e76a3SSteven Rostedt 		/* FIXME: not implemented */
34967a8e76a3SSteven Rostedt 		return;
34977a8e76a3SSteven Rostedt 
34987a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
34997a8e76a3SSteven Rostedt 		iter->read_stamp += event->time_delta;
35007a8e76a3SSteven Rostedt 		return;
35017a8e76a3SSteven Rostedt 
35027a8e76a3SSteven Rostedt 	default:
35037a8e76a3SSteven Rostedt 		BUG();
35047a8e76a3SSteven Rostedt 	}
35057a8e76a3SSteven Rostedt 	return;
35067a8e76a3SSteven Rostedt }
35077a8e76a3SSteven Rostedt 
3508d769041fSSteven Rostedt static struct buffer_page *
3509d769041fSSteven Rostedt rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
35107a8e76a3SSteven Rostedt {
3511d769041fSSteven Rostedt 	struct buffer_page *reader = NULL;
351266a8cb95SSteven Rostedt 	unsigned long overwrite;
3513d769041fSSteven Rostedt 	unsigned long flags;
3514818e3dd3SSteven Rostedt 	int nr_loops = 0;
351577ae365eSSteven Rostedt 	int ret;
3516d769041fSSteven Rostedt 
35173e03fb7fSSteven Rostedt 	local_irq_save(flags);
35180199c4e6SThomas Gleixner 	arch_spin_lock(&cpu_buffer->lock);
3519d769041fSSteven Rostedt 
3520d769041fSSteven Rostedt  again:
3521818e3dd3SSteven Rostedt 	/*
3522818e3dd3SSteven Rostedt 	 * This should normally only loop twice. But because the
3523818e3dd3SSteven Rostedt 	 * start of the reader inserts an empty page, it causes
3524818e3dd3SSteven Rostedt 	 * a case where we will loop three times. There should be no
3525818e3dd3SSteven Rostedt 	 * reason to loop four times (that I know of).
3526818e3dd3SSteven Rostedt 	 */
35273e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3528818e3dd3SSteven Rostedt 		reader = NULL;
3529818e3dd3SSteven Rostedt 		goto out;
3530818e3dd3SSteven Rostedt 	}
3531818e3dd3SSteven Rostedt 
3532d769041fSSteven Rostedt 	reader = cpu_buffer->reader_page;
3533d769041fSSteven Rostedt 
3534d769041fSSteven Rostedt 	/* If there's more to read, return this page */
3535bf41a158SSteven Rostedt 	if (cpu_buffer->reader_page->read < rb_page_size(reader))
3536d769041fSSteven Rostedt 		goto out;
3537d769041fSSteven Rostedt 
3538d769041fSSteven Rostedt 	/* Never should we have an index greater than the size */
35393e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer,
35403e89c7bbSSteven Rostedt 		       cpu_buffer->reader_page->read > rb_page_size(reader)))
35413e89c7bbSSteven Rostedt 		goto out;
3542d769041fSSteven Rostedt 
3543d769041fSSteven Rostedt 	/* check if we caught up to the tail */
3544d769041fSSteven Rostedt 	reader = NULL;
3545bf41a158SSteven Rostedt 	if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3546d769041fSSteven Rostedt 		goto out;
35477a8e76a3SSteven Rostedt 
3548a5fb8331SSteven Rostedt 	/* Don't bother swapping if the ring buffer is empty */
3549a5fb8331SSteven Rostedt 	if (rb_num_of_entries(cpu_buffer) == 0)
3550a5fb8331SSteven Rostedt 		goto out;
3551a5fb8331SSteven Rostedt 
35527a8e76a3SSteven Rostedt 	/*
3553d769041fSSteven Rostedt 	 * Reset the reader page to size zero.
35547a8e76a3SSteven Rostedt 	 */
355577ae365eSSteven Rostedt 	local_set(&cpu_buffer->reader_page->write, 0);
355677ae365eSSteven Rostedt 	local_set(&cpu_buffer->reader_page->entries, 0);
355777ae365eSSteven Rostedt 	local_set(&cpu_buffer->reader_page->page->commit, 0);
3558ff0ff84aSSteven Rostedt 	cpu_buffer->reader_page->real_end = 0;
3559d769041fSSteven Rostedt 
356077ae365eSSteven Rostedt  spin:
356177ae365eSSteven Rostedt 	/*
356277ae365eSSteven Rostedt 	 * Splice the empty reader page into the list around the head.
356377ae365eSSteven Rostedt 	 */
356477ae365eSSteven Rostedt 	reader = rb_set_head_page(cpu_buffer);
356554f7be5bSSteven Rostedt 	if (!reader)
356654f7be5bSSteven Rostedt 		goto out;
35670e1ff5d7SSteven Rostedt 	cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3568d769041fSSteven Rostedt 	cpu_buffer->reader_page->list.prev = reader->list.prev;
3569bf41a158SSteven Rostedt 
35703adc54faSSteven Rostedt 	/*
35713adc54faSSteven Rostedt 	 * cpu_buffer->pages just needs to point to the buffer, it
35723adc54faSSteven Rostedt 	 *  has no specific buffer page to point to. Lets move it out
357325985edcSLucas De Marchi 	 *  of our way so we don't accidentally swap it.
35743adc54faSSteven Rostedt 	 */
35753adc54faSSteven Rostedt 	cpu_buffer->pages = reader->list.prev;
35763adc54faSSteven Rostedt 
357777ae365eSSteven Rostedt 	/* The reader page will be pointing to the new head */
357877ae365eSSteven Rostedt 	rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3579d769041fSSteven Rostedt 
3580d769041fSSteven Rostedt 	/*
358166a8cb95SSteven Rostedt 	 * We want to make sure we read the overruns after we set up our
358266a8cb95SSteven Rostedt 	 * pointers to the next object. The writer side does a
358366a8cb95SSteven Rostedt 	 * cmpxchg to cross pages which acts as the mb on the writer
358466a8cb95SSteven Rostedt 	 * side. Note, the reader will constantly fail the swap
358566a8cb95SSteven Rostedt 	 * while the writer is updating the pointers, so this
358666a8cb95SSteven Rostedt 	 * guarantees that the overwrite recorded here is the one we
358766a8cb95SSteven Rostedt 	 * want to compare with the last_overrun.
358866a8cb95SSteven Rostedt 	 */
358966a8cb95SSteven Rostedt 	smp_mb();
359066a8cb95SSteven Rostedt 	overwrite = local_read(&(cpu_buffer->overrun));
359166a8cb95SSteven Rostedt 
359266a8cb95SSteven Rostedt 	/*
359377ae365eSSteven Rostedt 	 * Here's the tricky part.
359477ae365eSSteven Rostedt 	 *
359577ae365eSSteven Rostedt 	 * We need to move the pointer past the header page.
359677ae365eSSteven Rostedt 	 * But we can only do that if a writer is not currently
359777ae365eSSteven Rostedt 	 * moving it. The page before the header page has the
359877ae365eSSteven Rostedt 	 * flag bit '1' set if it is pointing to the page we want.
359977ae365eSSteven Rostedt 	 * but if the writer is in the process of moving it
360077ae365eSSteven Rostedt 	 * than it will be '2' or already moved '0'.
3601d769041fSSteven Rostedt 	 */
3602d769041fSSteven Rostedt 
360377ae365eSSteven Rostedt 	ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
360477ae365eSSteven Rostedt 
360577ae365eSSteven Rostedt 	/*
360677ae365eSSteven Rostedt 	 * If we did not convert it, then we must try again.
360777ae365eSSteven Rostedt 	 */
360877ae365eSSteven Rostedt 	if (!ret)
360977ae365eSSteven Rostedt 		goto spin;
361077ae365eSSteven Rostedt 
361177ae365eSSteven Rostedt 	/*
361277ae365eSSteven Rostedt 	 * Yeah! We succeeded in replacing the page.
361377ae365eSSteven Rostedt 	 *
361477ae365eSSteven Rostedt 	 * Now make the new head point back to the reader page.
361577ae365eSSteven Rostedt 	 */
36165ded3dc6SDavid Sharp 	rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
36177a8e76a3SSteven Rostedt 	rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3618d769041fSSteven Rostedt 
3619d769041fSSteven Rostedt 	/* Finally update the reader page to the new head */
3620d769041fSSteven Rostedt 	cpu_buffer->reader_page = reader;
3621b81f472aSSteven Rostedt (Red Hat) 	cpu_buffer->reader_page->read = 0;
3622d769041fSSteven Rostedt 
362366a8cb95SSteven Rostedt 	if (overwrite != cpu_buffer->last_overrun) {
362466a8cb95SSteven Rostedt 		cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
362566a8cb95SSteven Rostedt 		cpu_buffer->last_overrun = overwrite;
362666a8cb95SSteven Rostedt 	}
362766a8cb95SSteven Rostedt 
3628d769041fSSteven Rostedt 	goto again;
3629d769041fSSteven Rostedt 
3630d769041fSSteven Rostedt  out:
3631b81f472aSSteven Rostedt (Red Hat) 	/* Update the read_stamp on the first event */
3632b81f472aSSteven Rostedt (Red Hat) 	if (reader && reader->read == 0)
3633b81f472aSSteven Rostedt (Red Hat) 		cpu_buffer->read_stamp = reader->page->time_stamp;
3634b81f472aSSteven Rostedt (Red Hat) 
36350199c4e6SThomas Gleixner 	arch_spin_unlock(&cpu_buffer->lock);
36363e03fb7fSSteven Rostedt 	local_irq_restore(flags);
3637d769041fSSteven Rostedt 
3638d769041fSSteven Rostedt 	return reader;
36397a8e76a3SSteven Rostedt }
36407a8e76a3SSteven Rostedt 
3641d769041fSSteven Rostedt static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3642d769041fSSteven Rostedt {
3643d769041fSSteven Rostedt 	struct ring_buffer_event *event;
3644d769041fSSteven Rostedt 	struct buffer_page *reader;
3645d769041fSSteven Rostedt 	unsigned length;
3646d769041fSSteven Rostedt 
3647d769041fSSteven Rostedt 	reader = rb_get_reader_page(cpu_buffer);
3648d769041fSSteven Rostedt 
3649d769041fSSteven Rostedt 	/* This function should not be called when buffer is empty */
36503e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, !reader))
36513e89c7bbSSteven Rostedt 		return;
3652d769041fSSteven Rostedt 
3653d769041fSSteven Rostedt 	event = rb_reader_event(cpu_buffer);
36547a8e76a3SSteven Rostedt 
3655a1863c21SSteven Rostedt 	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3656e4906effSSteven Rostedt 		cpu_buffer->read++;
36577a8e76a3SSteven Rostedt 
36587a8e76a3SSteven Rostedt 	rb_update_read_stamp(cpu_buffer, event);
36597a8e76a3SSteven Rostedt 
3660d769041fSSteven Rostedt 	length = rb_event_length(event);
36616f807acdSSteven Rostedt 	cpu_buffer->reader_page->read += length;
36627a8e76a3SSteven Rostedt }
36637a8e76a3SSteven Rostedt 
36647a8e76a3SSteven Rostedt static void rb_advance_iter(struct ring_buffer_iter *iter)
36657a8e76a3SSteven Rostedt {
36667a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
36677a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
36687a8e76a3SSteven Rostedt 	unsigned length;
36697a8e76a3SSteven Rostedt 
36707a8e76a3SSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
36717a8e76a3SSteven Rostedt 
36727a8e76a3SSteven Rostedt 	/*
36737a8e76a3SSteven Rostedt 	 * Check if we are at the end of the buffer.
36747a8e76a3SSteven Rostedt 	 */
3675bf41a158SSteven Rostedt 	if (iter->head >= rb_page_size(iter->head_page)) {
3676ea05b57cSSteven Rostedt 		/* discarded commits can make the page empty */
3677ea05b57cSSteven Rostedt 		if (iter->head_page == cpu_buffer->commit_page)
36783e89c7bbSSteven Rostedt 			return;
3679d769041fSSteven Rostedt 		rb_inc_iter(iter);
36807a8e76a3SSteven Rostedt 		return;
36817a8e76a3SSteven Rostedt 	}
36827a8e76a3SSteven Rostedt 
36837a8e76a3SSteven Rostedt 	event = rb_iter_head_event(iter);
36847a8e76a3SSteven Rostedt 
36857a8e76a3SSteven Rostedt 	length = rb_event_length(event);
36867a8e76a3SSteven Rostedt 
36877a8e76a3SSteven Rostedt 	/*
36887a8e76a3SSteven Rostedt 	 * This should not be called to advance the header if we are
36897a8e76a3SSteven Rostedt 	 * at the tail of the buffer.
36907a8e76a3SSteven Rostedt 	 */
36913e89c7bbSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer,
3692f536aafcSSteven Rostedt 		       (iter->head_page == cpu_buffer->commit_page) &&
36933e89c7bbSSteven Rostedt 		       (iter->head + length > rb_commit_index(cpu_buffer))))
36943e89c7bbSSteven Rostedt 		return;
36957a8e76a3SSteven Rostedt 
36967a8e76a3SSteven Rostedt 	rb_update_iter_read_stamp(iter, event);
36977a8e76a3SSteven Rostedt 
36987a8e76a3SSteven Rostedt 	iter->head += length;
36997a8e76a3SSteven Rostedt 
37007a8e76a3SSteven Rostedt 	/* check for end of page padding */
3701bf41a158SSteven Rostedt 	if ((iter->head >= rb_page_size(iter->head_page)) &&
3702bf41a158SSteven Rostedt 	    (iter->head_page != cpu_buffer->commit_page))
3703771e0384SSteven Rostedt 		rb_inc_iter(iter);
37047a8e76a3SSteven Rostedt }
37057a8e76a3SSteven Rostedt 
370666a8cb95SSteven Rostedt static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
370766a8cb95SSteven Rostedt {
370866a8cb95SSteven Rostedt 	return cpu_buffer->lost_events;
370966a8cb95SSteven Rostedt }
371066a8cb95SSteven Rostedt 
3711f83c9d0fSSteven Rostedt static struct ring_buffer_event *
371266a8cb95SSteven Rostedt rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
371366a8cb95SSteven Rostedt 	       unsigned long *lost_events)
37147a8e76a3SSteven Rostedt {
37157a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
3716d769041fSSteven Rostedt 	struct buffer_page *reader;
3717818e3dd3SSteven Rostedt 	int nr_loops = 0;
37187a8e76a3SSteven Rostedt 
37197a8e76a3SSteven Rostedt  again:
3720818e3dd3SSteven Rostedt 	/*
372169d1b839SSteven Rostedt 	 * We repeat when a time extend is encountered.
372269d1b839SSteven Rostedt 	 * Since the time extend is always attached to a data event,
372369d1b839SSteven Rostedt 	 * we should never loop more than once.
372469d1b839SSteven Rostedt 	 * (We never hit the following condition more than twice).
3725818e3dd3SSteven Rostedt 	 */
372669d1b839SSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3727818e3dd3SSteven Rostedt 		return NULL;
3728818e3dd3SSteven Rostedt 
3729d769041fSSteven Rostedt 	reader = rb_get_reader_page(cpu_buffer);
3730d769041fSSteven Rostedt 	if (!reader)
37317a8e76a3SSteven Rostedt 		return NULL;
37327a8e76a3SSteven Rostedt 
3733d769041fSSteven Rostedt 	event = rb_reader_event(cpu_buffer);
37347a8e76a3SSteven Rostedt 
3735334d4169SLai Jiangshan 	switch (event->type_len) {
37367a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
37372d622719STom Zanussi 		if (rb_null_event(event))
3738bf41a158SSteven Rostedt 			RB_WARN_ON(cpu_buffer, 1);
37392d622719STom Zanussi 		/*
37402d622719STom Zanussi 		 * Because the writer could be discarding every
37412d622719STom Zanussi 		 * event it creates (which would probably be bad)
37422d622719STom Zanussi 		 * if we were to go back to "again" then we may never
37432d622719STom Zanussi 		 * catch up, and will trigger the warn on, or lock
37442d622719STom Zanussi 		 * the box. Return the padding, and we will release
37452d622719STom Zanussi 		 * the current locks, and try again.
37462d622719STom Zanussi 		 */
37472d622719STom Zanussi 		return event;
37487a8e76a3SSteven Rostedt 
37497a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
37507a8e76a3SSteven Rostedt 		/* Internal data, OK to advance */
3751d769041fSSteven Rostedt 		rb_advance_reader(cpu_buffer);
37527a8e76a3SSteven Rostedt 		goto again;
37537a8e76a3SSteven Rostedt 
37547a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
37557a8e76a3SSteven Rostedt 		/* FIXME: not implemented */
3756d769041fSSteven Rostedt 		rb_advance_reader(cpu_buffer);
37577a8e76a3SSteven Rostedt 		goto again;
37587a8e76a3SSteven Rostedt 
37597a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
37607a8e76a3SSteven Rostedt 		if (ts) {
37617a8e76a3SSteven Rostedt 			*ts = cpu_buffer->read_stamp + event->time_delta;
3762d8eeb2d3SRobert Richter 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
376337886f6aSSteven Rostedt 							 cpu_buffer->cpu, ts);
37647a8e76a3SSteven Rostedt 		}
376566a8cb95SSteven Rostedt 		if (lost_events)
376666a8cb95SSteven Rostedt 			*lost_events = rb_lost_events(cpu_buffer);
37677a8e76a3SSteven Rostedt 		return event;
37687a8e76a3SSteven Rostedt 
37697a8e76a3SSteven Rostedt 	default:
37707a8e76a3SSteven Rostedt 		BUG();
37717a8e76a3SSteven Rostedt 	}
37727a8e76a3SSteven Rostedt 
37737a8e76a3SSteven Rostedt 	return NULL;
37747a8e76a3SSteven Rostedt }
3775c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_peek);
37767a8e76a3SSteven Rostedt 
3777f83c9d0fSSteven Rostedt static struct ring_buffer_event *
3778f83c9d0fSSteven Rostedt rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
37797a8e76a3SSteven Rostedt {
37807a8e76a3SSteven Rostedt 	struct ring_buffer *buffer;
37817a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
37827a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
3783818e3dd3SSteven Rostedt 	int nr_loops = 0;
37847a8e76a3SSteven Rostedt 
37857a8e76a3SSteven Rostedt 	cpu_buffer = iter->cpu_buffer;
37867a8e76a3SSteven Rostedt 	buffer = cpu_buffer->buffer;
37877a8e76a3SSteven Rostedt 
3788492a74f4SSteven Rostedt 	/*
3789492a74f4SSteven Rostedt 	 * Check if someone performed a consuming read to
3790492a74f4SSteven Rostedt 	 * the buffer. A consuming read invalidates the iterator
3791492a74f4SSteven Rostedt 	 * and we need to reset the iterator in this case.
3792492a74f4SSteven Rostedt 	 */
3793492a74f4SSteven Rostedt 	if (unlikely(iter->cache_read != cpu_buffer->read ||
3794492a74f4SSteven Rostedt 		     iter->cache_reader_page != cpu_buffer->reader_page))
3795492a74f4SSteven Rostedt 		rb_iter_reset(iter);
3796492a74f4SSteven Rostedt 
37977a8e76a3SSteven Rostedt  again:
37983c05d748SSteven Rostedt 	if (ring_buffer_iter_empty(iter))
37993c05d748SSteven Rostedt 		return NULL;
38003c05d748SSteven Rostedt 
3801818e3dd3SSteven Rostedt 	/*
3802021de3d9SSteven Rostedt (Red Hat) 	 * We repeat when a time extend is encountered or we hit
3803021de3d9SSteven Rostedt (Red Hat) 	 * the end of the page. Since the time extend is always attached
3804021de3d9SSteven Rostedt (Red Hat) 	 * to a data event, we should never loop more than three times.
3805021de3d9SSteven Rostedt (Red Hat) 	 * Once for going to next page, once on time extend, and
3806021de3d9SSteven Rostedt (Red Hat) 	 * finally once to get the event.
3807021de3d9SSteven Rostedt (Red Hat) 	 * (We never hit the following condition more than thrice).
3808818e3dd3SSteven Rostedt 	 */
3809021de3d9SSteven Rostedt (Red Hat) 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
3810818e3dd3SSteven Rostedt 		return NULL;
3811818e3dd3SSteven Rostedt 
38127a8e76a3SSteven Rostedt 	if (rb_per_cpu_empty(cpu_buffer))
38137a8e76a3SSteven Rostedt 		return NULL;
38147a8e76a3SSteven Rostedt 
381510e83fd0SSteven Rostedt (Red Hat) 	if (iter->head >= rb_page_size(iter->head_page)) {
38163c05d748SSteven Rostedt 		rb_inc_iter(iter);
38173c05d748SSteven Rostedt 		goto again;
38183c05d748SSteven Rostedt 	}
38193c05d748SSteven Rostedt 
38207a8e76a3SSteven Rostedt 	event = rb_iter_head_event(iter);
38217a8e76a3SSteven Rostedt 
3822334d4169SLai Jiangshan 	switch (event->type_len) {
38237a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_PADDING:
38242d622719STom Zanussi 		if (rb_null_event(event)) {
3825d769041fSSteven Rostedt 			rb_inc_iter(iter);
38267a8e76a3SSteven Rostedt 			goto again;
38272d622719STom Zanussi 		}
38282d622719STom Zanussi 		rb_advance_iter(iter);
38292d622719STom Zanussi 		return event;
38307a8e76a3SSteven Rostedt 
38317a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_EXTEND:
38327a8e76a3SSteven Rostedt 		/* Internal data, OK to advance */
38337a8e76a3SSteven Rostedt 		rb_advance_iter(iter);
38347a8e76a3SSteven Rostedt 		goto again;
38357a8e76a3SSteven Rostedt 
38367a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_TIME_STAMP:
38377a8e76a3SSteven Rostedt 		/* FIXME: not implemented */
38387a8e76a3SSteven Rostedt 		rb_advance_iter(iter);
38397a8e76a3SSteven Rostedt 		goto again;
38407a8e76a3SSteven Rostedt 
38417a8e76a3SSteven Rostedt 	case RINGBUF_TYPE_DATA:
38427a8e76a3SSteven Rostedt 		if (ts) {
38437a8e76a3SSteven Rostedt 			*ts = iter->read_stamp + event->time_delta;
384437886f6aSSteven Rostedt 			ring_buffer_normalize_time_stamp(buffer,
384537886f6aSSteven Rostedt 							 cpu_buffer->cpu, ts);
38467a8e76a3SSteven Rostedt 		}
38477a8e76a3SSteven Rostedt 		return event;
38487a8e76a3SSteven Rostedt 
38497a8e76a3SSteven Rostedt 	default:
38507a8e76a3SSteven Rostedt 		BUG();
38517a8e76a3SSteven Rostedt 	}
38527a8e76a3SSteven Rostedt 
38537a8e76a3SSteven Rostedt 	return NULL;
38547a8e76a3SSteven Rostedt }
3855c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
38567a8e76a3SSteven Rostedt 
3857289a5a25SSteven Rostedt (Red Hat) static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
38588d707e8eSSteven Rostedt {
3859289a5a25SSteven Rostedt (Red Hat) 	if (likely(!in_nmi())) {
3860289a5a25SSteven Rostedt (Red Hat) 		raw_spin_lock(&cpu_buffer->reader_lock);
3861289a5a25SSteven Rostedt (Red Hat) 		return true;
3862289a5a25SSteven Rostedt (Red Hat) 	}
3863289a5a25SSteven Rostedt (Red Hat) 
38648d707e8eSSteven Rostedt 	/*
38658d707e8eSSteven Rostedt 	 * If an NMI die dumps out the content of the ring buffer
3866289a5a25SSteven Rostedt (Red Hat) 	 * trylock must be used to prevent a deadlock if the NMI
3867289a5a25SSteven Rostedt (Red Hat) 	 * preempted a task that holds the ring buffer locks. If
3868289a5a25SSteven Rostedt (Red Hat) 	 * we get the lock then all is fine, if not, then continue
3869289a5a25SSteven Rostedt (Red Hat) 	 * to do the read, but this can corrupt the ring buffer,
3870289a5a25SSteven Rostedt (Red Hat) 	 * so it must be permanently disabled from future writes.
3871289a5a25SSteven Rostedt (Red Hat) 	 * Reading from NMI is a oneshot deal.
38728d707e8eSSteven Rostedt 	 */
3873289a5a25SSteven Rostedt (Red Hat) 	if (raw_spin_trylock(&cpu_buffer->reader_lock))
3874289a5a25SSteven Rostedt (Red Hat) 		return true;
38758d707e8eSSteven Rostedt 
3876289a5a25SSteven Rostedt (Red Hat) 	/* Continue without locking, but disable the ring buffer */
3877289a5a25SSteven Rostedt (Red Hat) 	atomic_inc(&cpu_buffer->record_disabled);
3878289a5a25SSteven Rostedt (Red Hat) 	return false;
3879289a5a25SSteven Rostedt (Red Hat) }
3880289a5a25SSteven Rostedt (Red Hat) 
3881289a5a25SSteven Rostedt (Red Hat) static inline void
3882289a5a25SSteven Rostedt (Red Hat) rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
3883289a5a25SSteven Rostedt (Red Hat) {
3884289a5a25SSteven Rostedt (Red Hat) 	if (likely(locked))
3885289a5a25SSteven Rostedt (Red Hat) 		raw_spin_unlock(&cpu_buffer->reader_lock);
3886289a5a25SSteven Rostedt (Red Hat) 	return;
38878d707e8eSSteven Rostedt }
38888d707e8eSSteven Rostedt 
38897a8e76a3SSteven Rostedt /**
3890f83c9d0fSSteven Rostedt  * ring_buffer_peek - peek at the next event to be read
3891f83c9d0fSSteven Rostedt  * @buffer: The ring buffer to read
3892f83c9d0fSSteven Rostedt  * @cpu: The cpu to peak at
3893f83c9d0fSSteven Rostedt  * @ts: The timestamp counter of this event.
389466a8cb95SSteven Rostedt  * @lost_events: a variable to store if events were lost (may be NULL)
3895f83c9d0fSSteven Rostedt  *
3896f83c9d0fSSteven Rostedt  * This will return the event that will be read next, but does
3897f83c9d0fSSteven Rostedt  * not consume the data.
3898f83c9d0fSSteven Rostedt  */
3899f83c9d0fSSteven Rostedt struct ring_buffer_event *
390066a8cb95SSteven Rostedt ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
390166a8cb95SSteven Rostedt 		 unsigned long *lost_events)
3902f83c9d0fSSteven Rostedt {
3903f83c9d0fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
39048aabee57SSteven Rostedt 	struct ring_buffer_event *event;
3905f83c9d0fSSteven Rostedt 	unsigned long flags;
3906289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
3907f83c9d0fSSteven Rostedt 
3908554f786eSSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
39098aabee57SSteven Rostedt 		return NULL;
3910554f786eSSteven Rostedt 
39112d622719STom Zanussi  again:
39128d707e8eSSteven Rostedt 	local_irq_save(flags);
3913289a5a25SSteven Rostedt (Red Hat) 	dolock = rb_reader_lock(cpu_buffer);
391466a8cb95SSteven Rostedt 	event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3915469535a5SRobert Richter 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
3916469535a5SRobert Richter 		rb_advance_reader(cpu_buffer);
3917289a5a25SSteven Rostedt (Red Hat) 	rb_reader_unlock(cpu_buffer, dolock);
39188d707e8eSSteven Rostedt 	local_irq_restore(flags);
3919f83c9d0fSSteven Rostedt 
39201b959e18SSteven Rostedt 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
39212d622719STom Zanussi 		goto again;
39222d622719STom Zanussi 
3923f83c9d0fSSteven Rostedt 	return event;
3924f83c9d0fSSteven Rostedt }
3925f83c9d0fSSteven Rostedt 
3926f83c9d0fSSteven Rostedt /**
3927f83c9d0fSSteven Rostedt  * ring_buffer_iter_peek - peek at the next event to be read
3928f83c9d0fSSteven Rostedt  * @iter: The ring buffer iterator
3929f83c9d0fSSteven Rostedt  * @ts: The timestamp counter of this event.
3930f83c9d0fSSteven Rostedt  *
3931f83c9d0fSSteven Rostedt  * This will return the event that will be read next, but does
3932f83c9d0fSSteven Rostedt  * not increment the iterator.
3933f83c9d0fSSteven Rostedt  */
3934f83c9d0fSSteven Rostedt struct ring_buffer_event *
3935f83c9d0fSSteven Rostedt ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3936f83c9d0fSSteven Rostedt {
3937f83c9d0fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3938f83c9d0fSSteven Rostedt 	struct ring_buffer_event *event;
3939f83c9d0fSSteven Rostedt 	unsigned long flags;
3940f83c9d0fSSteven Rostedt 
39412d622719STom Zanussi  again:
39425389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3943f83c9d0fSSteven Rostedt 	event = rb_iter_peek(iter, ts);
39445389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3945f83c9d0fSSteven Rostedt 
39461b959e18SSteven Rostedt 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
39472d622719STom Zanussi 		goto again;
39482d622719STom Zanussi 
3949f83c9d0fSSteven Rostedt 	return event;
3950f83c9d0fSSteven Rostedt }
3951f83c9d0fSSteven Rostedt 
3952f83c9d0fSSteven Rostedt /**
39537a8e76a3SSteven Rostedt  * ring_buffer_consume - return an event and consume it
39547a8e76a3SSteven Rostedt  * @buffer: The ring buffer to get the next event from
395566a8cb95SSteven Rostedt  * @cpu: the cpu to read the buffer from
395666a8cb95SSteven Rostedt  * @ts: a variable to store the timestamp (may be NULL)
395766a8cb95SSteven Rostedt  * @lost_events: a variable to store if events were lost (may be NULL)
39587a8e76a3SSteven Rostedt  *
39597a8e76a3SSteven Rostedt  * Returns the next event in the ring buffer, and that event is consumed.
39607a8e76a3SSteven Rostedt  * Meaning, that sequential reads will keep returning a different event,
39617a8e76a3SSteven Rostedt  * and eventually empty the ring buffer if the producer is slower.
39627a8e76a3SSteven Rostedt  */
39637a8e76a3SSteven Rostedt struct ring_buffer_event *
396466a8cb95SSteven Rostedt ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
396566a8cb95SSteven Rostedt 		    unsigned long *lost_events)
39667a8e76a3SSteven Rostedt {
3967554f786eSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
3968554f786eSSteven Rostedt 	struct ring_buffer_event *event = NULL;
3969f83c9d0fSSteven Rostedt 	unsigned long flags;
3970289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
39717a8e76a3SSteven Rostedt 
39722d622719STom Zanussi  again:
3973554f786eSSteven Rostedt 	/* might be called in atomic */
3974554f786eSSteven Rostedt 	preempt_disable();
39757a8e76a3SSteven Rostedt 
3976554f786eSSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
3977554f786eSSteven Rostedt 		goto out;
3978554f786eSSteven Rostedt 
3979554f786eSSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
39808d707e8eSSteven Rostedt 	local_irq_save(flags);
3981289a5a25SSteven Rostedt (Red Hat) 	dolock = rb_reader_lock(cpu_buffer);
39827a8e76a3SSteven Rostedt 
398366a8cb95SSteven Rostedt 	event = rb_buffer_peek(cpu_buffer, ts, lost_events);
398466a8cb95SSteven Rostedt 	if (event) {
398566a8cb95SSteven Rostedt 		cpu_buffer->lost_events = 0;
3986d769041fSSteven Rostedt 		rb_advance_reader(cpu_buffer);
398766a8cb95SSteven Rostedt 	}
39887a8e76a3SSteven Rostedt 
3989289a5a25SSteven Rostedt (Red Hat) 	rb_reader_unlock(cpu_buffer, dolock);
39908d707e8eSSteven Rostedt 	local_irq_restore(flags);
3991f83c9d0fSSteven Rostedt 
3992554f786eSSteven Rostedt  out:
3993554f786eSSteven Rostedt 	preempt_enable();
3994554f786eSSteven Rostedt 
39951b959e18SSteven Rostedt 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
39962d622719STom Zanussi 		goto again;
39972d622719STom Zanussi 
39987a8e76a3SSteven Rostedt 	return event;
39997a8e76a3SSteven Rostedt }
4000c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_consume);
40017a8e76a3SSteven Rostedt 
40027a8e76a3SSteven Rostedt /**
400372c9ddfdSDavid Miller  * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
40047a8e76a3SSteven Rostedt  * @buffer: The ring buffer to read from
40057a8e76a3SSteven Rostedt  * @cpu: The cpu buffer to iterate over
40067a8e76a3SSteven Rostedt  *
400772c9ddfdSDavid Miller  * This performs the initial preparations necessary to iterate
400872c9ddfdSDavid Miller  * through the buffer.  Memory is allocated, buffer recording
400972c9ddfdSDavid Miller  * is disabled, and the iterator pointer is returned to the caller.
40107a8e76a3SSteven Rostedt  *
401172c9ddfdSDavid Miller  * Disabling buffer recordng prevents the reading from being
401272c9ddfdSDavid Miller  * corrupted. This is not a consuming read, so a producer is not
401372c9ddfdSDavid Miller  * expected.
401472c9ddfdSDavid Miller  *
401572c9ddfdSDavid Miller  * After a sequence of ring_buffer_read_prepare calls, the user is
4016d611851bSzhangwei(Jovi)  * expected to make at least one call to ring_buffer_read_prepare_sync.
401772c9ddfdSDavid Miller  * Afterwards, ring_buffer_read_start is invoked to get things going
401872c9ddfdSDavid Miller  * for real.
401972c9ddfdSDavid Miller  *
4020d611851bSzhangwei(Jovi)  * This overall must be paired with ring_buffer_read_finish.
40217a8e76a3SSteven Rostedt  */
40227a8e76a3SSteven Rostedt struct ring_buffer_iter *
402372c9ddfdSDavid Miller ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
40247a8e76a3SSteven Rostedt {
40257a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
40268aabee57SSteven Rostedt 	struct ring_buffer_iter *iter;
40277a8e76a3SSteven Rostedt 
40289e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
40298aabee57SSteven Rostedt 		return NULL;
40307a8e76a3SSteven Rostedt 
40317a8e76a3SSteven Rostedt 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
40327a8e76a3SSteven Rostedt 	if (!iter)
40338aabee57SSteven Rostedt 		return NULL;
40347a8e76a3SSteven Rostedt 
40357a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
40367a8e76a3SSteven Rostedt 
40377a8e76a3SSteven Rostedt 	iter->cpu_buffer = cpu_buffer;
40387a8e76a3SSteven Rostedt 
403983f40318SVaibhav Nagarnaik 	atomic_inc(&buffer->resize_disabled);
40407a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer->record_disabled);
404172c9ddfdSDavid Miller 
404272c9ddfdSDavid Miller 	return iter;
404372c9ddfdSDavid Miller }
404472c9ddfdSDavid Miller EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
404572c9ddfdSDavid Miller 
404672c9ddfdSDavid Miller /**
404772c9ddfdSDavid Miller  * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
404872c9ddfdSDavid Miller  *
404972c9ddfdSDavid Miller  * All previously invoked ring_buffer_read_prepare calls to prepare
405072c9ddfdSDavid Miller  * iterators will be synchronized.  Afterwards, read_buffer_read_start
405172c9ddfdSDavid Miller  * calls on those iterators are allowed.
405272c9ddfdSDavid Miller  */
405372c9ddfdSDavid Miller void
405472c9ddfdSDavid Miller ring_buffer_read_prepare_sync(void)
405572c9ddfdSDavid Miller {
40567a8e76a3SSteven Rostedt 	synchronize_sched();
405772c9ddfdSDavid Miller }
405872c9ddfdSDavid Miller EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
405972c9ddfdSDavid Miller 
406072c9ddfdSDavid Miller /**
406172c9ddfdSDavid Miller  * ring_buffer_read_start - start a non consuming read of the buffer
406272c9ddfdSDavid Miller  * @iter: The iterator returned by ring_buffer_read_prepare
406372c9ddfdSDavid Miller  *
406472c9ddfdSDavid Miller  * This finalizes the startup of an iteration through the buffer.
406572c9ddfdSDavid Miller  * The iterator comes from a call to ring_buffer_read_prepare and
406672c9ddfdSDavid Miller  * an intervening ring_buffer_read_prepare_sync must have been
406772c9ddfdSDavid Miller  * performed.
406872c9ddfdSDavid Miller  *
4069d611851bSzhangwei(Jovi)  * Must be paired with ring_buffer_read_finish.
407072c9ddfdSDavid Miller  */
407172c9ddfdSDavid Miller void
407272c9ddfdSDavid Miller ring_buffer_read_start(struct ring_buffer_iter *iter)
407372c9ddfdSDavid Miller {
407472c9ddfdSDavid Miller 	struct ring_buffer_per_cpu *cpu_buffer;
407572c9ddfdSDavid Miller 	unsigned long flags;
407672c9ddfdSDavid Miller 
407772c9ddfdSDavid Miller 	if (!iter)
407872c9ddfdSDavid Miller 		return;
407972c9ddfdSDavid Miller 
408072c9ddfdSDavid Miller 	cpu_buffer = iter->cpu_buffer;
40817a8e76a3SSteven Rostedt 
40825389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
40830199c4e6SThomas Gleixner 	arch_spin_lock(&cpu_buffer->lock);
4084642edba5SSteven Rostedt 	rb_iter_reset(iter);
40850199c4e6SThomas Gleixner 	arch_spin_unlock(&cpu_buffer->lock);
40865389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
40877a8e76a3SSteven Rostedt }
4088c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read_start);
40897a8e76a3SSteven Rostedt 
40907a8e76a3SSteven Rostedt /**
4091d611851bSzhangwei(Jovi)  * ring_buffer_read_finish - finish reading the iterator of the buffer
40927a8e76a3SSteven Rostedt  * @iter: The iterator retrieved by ring_buffer_start
40937a8e76a3SSteven Rostedt  *
40947a8e76a3SSteven Rostedt  * This re-enables the recording to the buffer, and frees the
40957a8e76a3SSteven Rostedt  * iterator.
40967a8e76a3SSteven Rostedt  */
40977a8e76a3SSteven Rostedt void
40987a8e76a3SSteven Rostedt ring_buffer_read_finish(struct ring_buffer_iter *iter)
40997a8e76a3SSteven Rostedt {
41007a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
41019366c1baSSteven Rostedt 	unsigned long flags;
41027a8e76a3SSteven Rostedt 
4103659f451fSSteven Rostedt 	/*
4104659f451fSSteven Rostedt 	 * Ring buffer is disabled from recording, here's a good place
4105659f451fSSteven Rostedt 	 * to check the integrity of the ring buffer.
41069366c1baSSteven Rostedt 	 * Must prevent readers from trying to read, as the check
41079366c1baSSteven Rostedt 	 * clears the HEAD page and readers require it.
4108659f451fSSteven Rostedt 	 */
41099366c1baSSteven Rostedt 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4110659f451fSSteven Rostedt 	rb_check_pages(cpu_buffer);
41119366c1baSSteven Rostedt 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4112659f451fSSteven Rostedt 
41137a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer->record_disabled);
411483f40318SVaibhav Nagarnaik 	atomic_dec(&cpu_buffer->buffer->resize_disabled);
41157a8e76a3SSteven Rostedt 	kfree(iter);
41167a8e76a3SSteven Rostedt }
4117c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
41187a8e76a3SSteven Rostedt 
41197a8e76a3SSteven Rostedt /**
41207a8e76a3SSteven Rostedt  * ring_buffer_read - read the next item in the ring buffer by the iterator
41217a8e76a3SSteven Rostedt  * @iter: The ring buffer iterator
41227a8e76a3SSteven Rostedt  * @ts: The time stamp of the event read.
41237a8e76a3SSteven Rostedt  *
41247a8e76a3SSteven Rostedt  * This reads the next event in the ring buffer and increments the iterator.
41257a8e76a3SSteven Rostedt  */
41267a8e76a3SSteven Rostedt struct ring_buffer_event *
41277a8e76a3SSteven Rostedt ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
41287a8e76a3SSteven Rostedt {
41297a8e76a3SSteven Rostedt 	struct ring_buffer_event *event;
4130f83c9d0fSSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4131f83c9d0fSSteven Rostedt 	unsigned long flags;
41327a8e76a3SSteven Rostedt 
41335389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
41347e9391cfSSteven Rostedt  again:
4135f83c9d0fSSteven Rostedt 	event = rb_iter_peek(iter, ts);
41367a8e76a3SSteven Rostedt 	if (!event)
4137f83c9d0fSSteven Rostedt 		goto out;
41387a8e76a3SSteven Rostedt 
41397e9391cfSSteven Rostedt 	if (event->type_len == RINGBUF_TYPE_PADDING)
41407e9391cfSSteven Rostedt 		goto again;
41417e9391cfSSteven Rostedt 
41427a8e76a3SSteven Rostedt 	rb_advance_iter(iter);
4143f83c9d0fSSteven Rostedt  out:
41445389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
41457a8e76a3SSteven Rostedt 
41467a8e76a3SSteven Rostedt 	return event;
41477a8e76a3SSteven Rostedt }
4148c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_read);
41497a8e76a3SSteven Rostedt 
41507a8e76a3SSteven Rostedt /**
41517a8e76a3SSteven Rostedt  * ring_buffer_size - return the size of the ring buffer (in bytes)
41527a8e76a3SSteven Rostedt  * @buffer: The ring buffer.
41537a8e76a3SSteven Rostedt  */
4154438ced17SVaibhav Nagarnaik unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
41557a8e76a3SSteven Rostedt {
4156438ced17SVaibhav Nagarnaik 	/*
4157438ced17SVaibhav Nagarnaik 	 * Earlier, this method returned
4158438ced17SVaibhav Nagarnaik 	 *	BUF_PAGE_SIZE * buffer->nr_pages
4159438ced17SVaibhav Nagarnaik 	 * Since the nr_pages field is now removed, we have converted this to
4160438ced17SVaibhav Nagarnaik 	 * return the per cpu buffer value.
4161438ced17SVaibhav Nagarnaik 	 */
4162438ced17SVaibhav Nagarnaik 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
4163438ced17SVaibhav Nagarnaik 		return 0;
4164438ced17SVaibhav Nagarnaik 
4165438ced17SVaibhav Nagarnaik 	return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
41667a8e76a3SSteven Rostedt }
4167c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_size);
41687a8e76a3SSteven Rostedt 
41697a8e76a3SSteven Rostedt static void
41707a8e76a3SSteven Rostedt rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
41717a8e76a3SSteven Rostedt {
417277ae365eSSteven Rostedt 	rb_head_page_deactivate(cpu_buffer);
417377ae365eSSteven Rostedt 
41747a8e76a3SSteven Rostedt 	cpu_buffer->head_page
41753adc54faSSteven Rostedt 		= list_entry(cpu_buffer->pages, struct buffer_page, list);
4176bf41a158SSteven Rostedt 	local_set(&cpu_buffer->head_page->write, 0);
4177778c55d4SSteven Rostedt 	local_set(&cpu_buffer->head_page->entries, 0);
4178abc9b56dSSteven Rostedt 	local_set(&cpu_buffer->head_page->page->commit, 0);
41797a8e76a3SSteven Rostedt 
41806f807acdSSteven Rostedt 	cpu_buffer->head_page->read = 0;
4181bf41a158SSteven Rostedt 
4182bf41a158SSteven Rostedt 	cpu_buffer->tail_page = cpu_buffer->head_page;
4183bf41a158SSteven Rostedt 	cpu_buffer->commit_page = cpu_buffer->head_page;
4184bf41a158SSteven Rostedt 
4185bf41a158SSteven Rostedt 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
41865040b4b7SVaibhav Nagarnaik 	INIT_LIST_HEAD(&cpu_buffer->new_pages);
4187bf41a158SSteven Rostedt 	local_set(&cpu_buffer->reader_page->write, 0);
4188778c55d4SSteven Rostedt 	local_set(&cpu_buffer->reader_page->entries, 0);
4189abc9b56dSSteven Rostedt 	local_set(&cpu_buffer->reader_page->page->commit, 0);
41906f807acdSSteven Rostedt 	cpu_buffer->reader_page->read = 0;
4191d769041fSSteven Rostedt 
4192c64e148aSVaibhav Nagarnaik 	local_set(&cpu_buffer->entries_bytes, 0);
419377ae365eSSteven Rostedt 	local_set(&cpu_buffer->overrun, 0);
4194884bfe89SSlava Pestov 	local_set(&cpu_buffer->commit_overrun, 0);
4195884bfe89SSlava Pestov 	local_set(&cpu_buffer->dropped_events, 0);
4196e4906effSSteven Rostedt 	local_set(&cpu_buffer->entries, 0);
4197fa743953SSteven Rostedt 	local_set(&cpu_buffer->committing, 0);
4198fa743953SSteven Rostedt 	local_set(&cpu_buffer->commits, 0);
419977ae365eSSteven Rostedt 	cpu_buffer->read = 0;
4200c64e148aSVaibhav Nagarnaik 	cpu_buffer->read_bytes = 0;
420169507c06SSteven Rostedt 
420269507c06SSteven Rostedt 	cpu_buffer->write_stamp = 0;
420369507c06SSteven Rostedt 	cpu_buffer->read_stamp = 0;
420477ae365eSSteven Rostedt 
420566a8cb95SSteven Rostedt 	cpu_buffer->lost_events = 0;
420666a8cb95SSteven Rostedt 	cpu_buffer->last_overrun = 0;
420766a8cb95SSteven Rostedt 
420877ae365eSSteven Rostedt 	rb_head_page_activate(cpu_buffer);
42097a8e76a3SSteven Rostedt }
42107a8e76a3SSteven Rostedt 
42117a8e76a3SSteven Rostedt /**
42127a8e76a3SSteven Rostedt  * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
42137a8e76a3SSteven Rostedt  * @buffer: The ring buffer to reset a per cpu buffer of
42147a8e76a3SSteven Rostedt  * @cpu: The CPU buffer to be reset
42157a8e76a3SSteven Rostedt  */
42167a8e76a3SSteven Rostedt void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
42177a8e76a3SSteven Rostedt {
42187a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
42197a8e76a3SSteven Rostedt 	unsigned long flags;
42207a8e76a3SSteven Rostedt 
42219e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
42228aabee57SSteven Rostedt 		return;
42237a8e76a3SSteven Rostedt 
422483f40318SVaibhav Nagarnaik 	atomic_inc(&buffer->resize_disabled);
422541ede23eSSteven Rostedt 	atomic_inc(&cpu_buffer->record_disabled);
422641ede23eSSteven Rostedt 
422783f40318SVaibhav Nagarnaik 	/* Make sure all commits have finished */
422883f40318SVaibhav Nagarnaik 	synchronize_sched();
422983f40318SVaibhav Nagarnaik 
42305389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4231f83c9d0fSSteven Rostedt 
423241b6a95dSSteven Rostedt 	if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
423341b6a95dSSteven Rostedt 		goto out;
423441b6a95dSSteven Rostedt 
42350199c4e6SThomas Gleixner 	arch_spin_lock(&cpu_buffer->lock);
42367a8e76a3SSteven Rostedt 
42377a8e76a3SSteven Rostedt 	rb_reset_cpu(cpu_buffer);
42387a8e76a3SSteven Rostedt 
42390199c4e6SThomas Gleixner 	arch_spin_unlock(&cpu_buffer->lock);
4240f83c9d0fSSteven Rostedt 
424141b6a95dSSteven Rostedt  out:
42425389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
424341ede23eSSteven Rostedt 
424441ede23eSSteven Rostedt 	atomic_dec(&cpu_buffer->record_disabled);
424583f40318SVaibhav Nagarnaik 	atomic_dec(&buffer->resize_disabled);
42467a8e76a3SSteven Rostedt }
4247c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
42487a8e76a3SSteven Rostedt 
42497a8e76a3SSteven Rostedt /**
42507a8e76a3SSteven Rostedt  * ring_buffer_reset - reset a ring buffer
42517a8e76a3SSteven Rostedt  * @buffer: The ring buffer to reset all cpu buffers
42527a8e76a3SSteven Rostedt  */
42537a8e76a3SSteven Rostedt void ring_buffer_reset(struct ring_buffer *buffer)
42547a8e76a3SSteven Rostedt {
42557a8e76a3SSteven Rostedt 	int cpu;
42567a8e76a3SSteven Rostedt 
42577a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu)
4258d769041fSSteven Rostedt 		ring_buffer_reset_cpu(buffer, cpu);
42597a8e76a3SSteven Rostedt }
4260c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_reset);
42617a8e76a3SSteven Rostedt 
42627a8e76a3SSteven Rostedt /**
42637a8e76a3SSteven Rostedt  * rind_buffer_empty - is the ring buffer empty?
42647a8e76a3SSteven Rostedt  * @buffer: The ring buffer to test
42657a8e76a3SSteven Rostedt  */
42663d4e204dSYaowei Bai bool ring_buffer_empty(struct ring_buffer *buffer)
42677a8e76a3SSteven Rostedt {
42687a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
4269d4788207SSteven Rostedt 	unsigned long flags;
4270289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
42717a8e76a3SSteven Rostedt 	int cpu;
4272d4788207SSteven Rostedt 	int ret;
42737a8e76a3SSteven Rostedt 
42747a8e76a3SSteven Rostedt 	/* yes this is racy, but if you don't like the race, lock the buffer */
42757a8e76a3SSteven Rostedt 	for_each_buffer_cpu(buffer, cpu) {
42767a8e76a3SSteven Rostedt 		cpu_buffer = buffer->buffers[cpu];
42778d707e8eSSteven Rostedt 		local_irq_save(flags);
4278289a5a25SSteven Rostedt (Red Hat) 		dolock = rb_reader_lock(cpu_buffer);
4279d4788207SSteven Rostedt 		ret = rb_per_cpu_empty(cpu_buffer);
4280289a5a25SSteven Rostedt (Red Hat) 		rb_reader_unlock(cpu_buffer, dolock);
42818d707e8eSSteven Rostedt 		local_irq_restore(flags);
42828d707e8eSSteven Rostedt 
4283d4788207SSteven Rostedt 		if (!ret)
42843d4e204dSYaowei Bai 			return false;
42857a8e76a3SSteven Rostedt 	}
4286554f786eSSteven Rostedt 
42873d4e204dSYaowei Bai 	return true;
42887a8e76a3SSteven Rostedt }
4289c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_empty);
42907a8e76a3SSteven Rostedt 
42917a8e76a3SSteven Rostedt /**
42927a8e76a3SSteven Rostedt  * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
42937a8e76a3SSteven Rostedt  * @buffer: The ring buffer
42947a8e76a3SSteven Rostedt  * @cpu: The CPU buffer to test
42957a8e76a3SSteven Rostedt  */
42963d4e204dSYaowei Bai bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
42977a8e76a3SSteven Rostedt {
42987a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer;
4299d4788207SSteven Rostedt 	unsigned long flags;
4300289a5a25SSteven Rostedt (Red Hat) 	bool dolock;
43018aabee57SSteven Rostedt 	int ret;
43027a8e76a3SSteven Rostedt 
43039e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
43043d4e204dSYaowei Bai 		return true;
43057a8e76a3SSteven Rostedt 
43067a8e76a3SSteven Rostedt 	cpu_buffer = buffer->buffers[cpu];
43078d707e8eSSteven Rostedt 	local_irq_save(flags);
4308289a5a25SSteven Rostedt (Red Hat) 	dolock = rb_reader_lock(cpu_buffer);
4309554f786eSSteven Rostedt 	ret = rb_per_cpu_empty(cpu_buffer);
4310289a5a25SSteven Rostedt (Red Hat) 	rb_reader_unlock(cpu_buffer, dolock);
43118d707e8eSSteven Rostedt 	local_irq_restore(flags);
4312554f786eSSteven Rostedt 
4313554f786eSSteven Rostedt 	return ret;
43147a8e76a3SSteven Rostedt }
4315c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
43167a8e76a3SSteven Rostedt 
431785bac32cSSteven Rostedt #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
43187a8e76a3SSteven Rostedt /**
43197a8e76a3SSteven Rostedt  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
43207a8e76a3SSteven Rostedt  * @buffer_a: One buffer to swap with
43217a8e76a3SSteven Rostedt  * @buffer_b: The other buffer to swap with
43227a8e76a3SSteven Rostedt  *
43237a8e76a3SSteven Rostedt  * This function is useful for tracers that want to take a "snapshot"
43247a8e76a3SSteven Rostedt  * of a CPU buffer and has another back up buffer lying around.
43257a8e76a3SSteven Rostedt  * it is expected that the tracer handles the cpu buffer not being
43267a8e76a3SSteven Rostedt  * used at the moment.
43277a8e76a3SSteven Rostedt  */
43287a8e76a3SSteven Rostedt int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
43297a8e76a3SSteven Rostedt 			 struct ring_buffer *buffer_b, int cpu)
43307a8e76a3SSteven Rostedt {
43317a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer_a;
43327a8e76a3SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer_b;
4333554f786eSSteven Rostedt 	int ret = -EINVAL;
4334554f786eSSteven Rostedt 
43359e01c1b7SRusty Russell 	if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
43369e01c1b7SRusty Russell 	    !cpumask_test_cpu(cpu, buffer_b->cpumask))
4337554f786eSSteven Rostedt 		goto out;
43387a8e76a3SSteven Rostedt 
4339438ced17SVaibhav Nagarnaik 	cpu_buffer_a = buffer_a->buffers[cpu];
4340438ced17SVaibhav Nagarnaik 	cpu_buffer_b = buffer_b->buffers[cpu];
4341438ced17SVaibhav Nagarnaik 
43427a8e76a3SSteven Rostedt 	/* At least make sure the two buffers are somewhat the same */
4343438ced17SVaibhav Nagarnaik 	if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4344554f786eSSteven Rostedt 		goto out;
4345554f786eSSteven Rostedt 
4346554f786eSSteven Rostedt 	ret = -EAGAIN;
43477a8e76a3SSteven Rostedt 
434897b17efeSSteven Rostedt 	if (atomic_read(&buffer_a->record_disabled))
4349554f786eSSteven Rostedt 		goto out;
435097b17efeSSteven Rostedt 
435197b17efeSSteven Rostedt 	if (atomic_read(&buffer_b->record_disabled))
4352554f786eSSteven Rostedt 		goto out;
435397b17efeSSteven Rostedt 
435497b17efeSSteven Rostedt 	if (atomic_read(&cpu_buffer_a->record_disabled))
4355554f786eSSteven Rostedt 		goto out;
435697b17efeSSteven Rostedt 
435797b17efeSSteven Rostedt 	if (atomic_read(&cpu_buffer_b->record_disabled))
4358554f786eSSteven Rostedt 		goto out;
435997b17efeSSteven Rostedt 
43607a8e76a3SSteven Rostedt 	/*
43617a8e76a3SSteven Rostedt 	 * We can't do a synchronize_sched here because this
43627a8e76a3SSteven Rostedt 	 * function can be called in atomic context.
43637a8e76a3SSteven Rostedt 	 * Normally this will be called from the same CPU as cpu.
43647a8e76a3SSteven Rostedt 	 * If not it's up to the caller to protect this.
43657a8e76a3SSteven Rostedt 	 */
43667a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer_a->record_disabled);
43677a8e76a3SSteven Rostedt 	atomic_inc(&cpu_buffer_b->record_disabled);
43687a8e76a3SSteven Rostedt 
436998277991SSteven Rostedt 	ret = -EBUSY;
437098277991SSteven Rostedt 	if (local_read(&cpu_buffer_a->committing))
437198277991SSteven Rostedt 		goto out_dec;
437298277991SSteven Rostedt 	if (local_read(&cpu_buffer_b->committing))
437398277991SSteven Rostedt 		goto out_dec;
437498277991SSteven Rostedt 
43757a8e76a3SSteven Rostedt 	buffer_a->buffers[cpu] = cpu_buffer_b;
43767a8e76a3SSteven Rostedt 	buffer_b->buffers[cpu] = cpu_buffer_a;
43777a8e76a3SSteven Rostedt 
43787a8e76a3SSteven Rostedt 	cpu_buffer_b->buffer = buffer_a;
43797a8e76a3SSteven Rostedt 	cpu_buffer_a->buffer = buffer_b;
43807a8e76a3SSteven Rostedt 
438198277991SSteven Rostedt 	ret = 0;
438298277991SSteven Rostedt 
438398277991SSteven Rostedt out_dec:
43847a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer_a->record_disabled);
43857a8e76a3SSteven Rostedt 	atomic_dec(&cpu_buffer_b->record_disabled);
4386554f786eSSteven Rostedt out:
4387554f786eSSteven Rostedt 	return ret;
43887a8e76a3SSteven Rostedt }
4389c4f50183SRobert Richter EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
439085bac32cSSteven Rostedt #endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
43917a8e76a3SSteven Rostedt 
43928789a9e7SSteven Rostedt /**
43938789a9e7SSteven Rostedt  * ring_buffer_alloc_read_page - allocate a page to read from buffer
43948789a9e7SSteven Rostedt  * @buffer: the buffer to allocate for.
4395d611851bSzhangwei(Jovi)  * @cpu: the cpu buffer to allocate.
43968789a9e7SSteven Rostedt  *
43978789a9e7SSteven Rostedt  * This function is used in conjunction with ring_buffer_read_page.
43988789a9e7SSteven Rostedt  * When reading a full page from the ring buffer, these functions
43998789a9e7SSteven Rostedt  * can be used to speed up the process. The calling function should
44008789a9e7SSteven Rostedt  * allocate a few pages first with this function. Then when it
44018789a9e7SSteven Rostedt  * needs to get pages from the ring buffer, it passes the result
44028789a9e7SSteven Rostedt  * of this function into ring_buffer_read_page, which will swap
44038789a9e7SSteven Rostedt  * the page that was allocated, with the read page of the buffer.
44048789a9e7SSteven Rostedt  *
44058789a9e7SSteven Rostedt  * Returns:
44068789a9e7SSteven Rostedt  *  The page allocated, or NULL on error.
44078789a9e7SSteven Rostedt  */
44087ea59064SVaibhav Nagarnaik void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
44098789a9e7SSteven Rostedt {
4410044fa782SSteven Rostedt 	struct buffer_data_page *bpage;
44117ea59064SVaibhav Nagarnaik 	struct page *page;
44128789a9e7SSteven Rostedt 
4413d7ec4bfeSVaibhav Nagarnaik 	page = alloc_pages_node(cpu_to_node(cpu),
4414d7ec4bfeSVaibhav Nagarnaik 				GFP_KERNEL | __GFP_NORETRY, 0);
44157ea59064SVaibhav Nagarnaik 	if (!page)
44168789a9e7SSteven Rostedt 		return NULL;
44178789a9e7SSteven Rostedt 
44187ea59064SVaibhav Nagarnaik 	bpage = page_address(page);
44198789a9e7SSteven Rostedt 
4420ef7a4a16SSteven Rostedt 	rb_init_page(bpage);
4421ef7a4a16SSteven Rostedt 
4422044fa782SSteven Rostedt 	return bpage;
44238789a9e7SSteven Rostedt }
4424d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
44258789a9e7SSteven Rostedt 
44268789a9e7SSteven Rostedt /**
44278789a9e7SSteven Rostedt  * ring_buffer_free_read_page - free an allocated read page
44288789a9e7SSteven Rostedt  * @buffer: the buffer the page was allocate for
44298789a9e7SSteven Rostedt  * @data: the page to free
44308789a9e7SSteven Rostedt  *
44318789a9e7SSteven Rostedt  * Free a page allocated from ring_buffer_alloc_read_page.
44328789a9e7SSteven Rostedt  */
44338789a9e7SSteven Rostedt void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
44348789a9e7SSteven Rostedt {
44358789a9e7SSteven Rostedt 	free_page((unsigned long)data);
44368789a9e7SSteven Rostedt }
4437d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
44388789a9e7SSteven Rostedt 
44398789a9e7SSteven Rostedt /**
44408789a9e7SSteven Rostedt  * ring_buffer_read_page - extract a page from the ring buffer
44418789a9e7SSteven Rostedt  * @buffer: buffer to extract from
44428789a9e7SSteven Rostedt  * @data_page: the page to use allocated from ring_buffer_alloc_read_page
4443ef7a4a16SSteven Rostedt  * @len: amount to extract
44448789a9e7SSteven Rostedt  * @cpu: the cpu of the buffer to extract
44458789a9e7SSteven Rostedt  * @full: should the extraction only happen when the page is full.
44468789a9e7SSteven Rostedt  *
44478789a9e7SSteven Rostedt  * This function will pull out a page from the ring buffer and consume it.
44488789a9e7SSteven Rostedt  * @data_page must be the address of the variable that was returned
44498789a9e7SSteven Rostedt  * from ring_buffer_alloc_read_page. This is because the page might be used
44508789a9e7SSteven Rostedt  * to swap with a page in the ring buffer.
44518789a9e7SSteven Rostedt  *
44528789a9e7SSteven Rostedt  * for example:
4453d611851bSzhangwei(Jovi)  *	rpage = ring_buffer_alloc_read_page(buffer, cpu);
44548789a9e7SSteven Rostedt  *	if (!rpage)
44558789a9e7SSteven Rostedt  *		return error;
4456ef7a4a16SSteven Rostedt  *	ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
4457667d2412SLai Jiangshan  *	if (ret >= 0)
4458667d2412SLai Jiangshan  *		process_page(rpage, ret);
44598789a9e7SSteven Rostedt  *
44608789a9e7SSteven Rostedt  * When @full is set, the function will not return true unless
44618789a9e7SSteven Rostedt  * the writer is off the reader page.
44628789a9e7SSteven Rostedt  *
44638789a9e7SSteven Rostedt  * Note: it is up to the calling functions to handle sleeps and wakeups.
44648789a9e7SSteven Rostedt  *  The ring buffer can be used anywhere in the kernel and can not
44658789a9e7SSteven Rostedt  *  blindly call wake_up. The layer that uses the ring buffer must be
44668789a9e7SSteven Rostedt  *  responsible for that.
44678789a9e7SSteven Rostedt  *
44688789a9e7SSteven Rostedt  * Returns:
4469667d2412SLai Jiangshan  *  >=0 if data has been transferred, returns the offset of consumed data.
4470667d2412SLai Jiangshan  *  <0 if no data has been transferred.
44718789a9e7SSteven Rostedt  */
44728789a9e7SSteven Rostedt int ring_buffer_read_page(struct ring_buffer *buffer,
4473ef7a4a16SSteven Rostedt 			  void **data_page, size_t len, int cpu, int full)
44748789a9e7SSteven Rostedt {
44758789a9e7SSteven Rostedt 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
44768789a9e7SSteven Rostedt 	struct ring_buffer_event *event;
4477044fa782SSteven Rostedt 	struct buffer_data_page *bpage;
4478ef7a4a16SSteven Rostedt 	struct buffer_page *reader;
4479ff0ff84aSSteven Rostedt 	unsigned long missed_events;
44808789a9e7SSteven Rostedt 	unsigned long flags;
4481ef7a4a16SSteven Rostedt 	unsigned int commit;
4482667d2412SLai Jiangshan 	unsigned int read;
44834f3640f8SSteven Rostedt 	u64 save_timestamp;
4484667d2412SLai Jiangshan 	int ret = -1;
44858789a9e7SSteven Rostedt 
4486554f786eSSteven Rostedt 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
4487554f786eSSteven Rostedt 		goto out;
4488554f786eSSteven Rostedt 
4489474d32b6SSteven Rostedt 	/*
4490474d32b6SSteven Rostedt 	 * If len is not big enough to hold the page header, then
4491474d32b6SSteven Rostedt 	 * we can not copy anything.
4492474d32b6SSteven Rostedt 	 */
4493474d32b6SSteven Rostedt 	if (len <= BUF_PAGE_HDR_SIZE)
4494554f786eSSteven Rostedt 		goto out;
4495474d32b6SSteven Rostedt 
4496474d32b6SSteven Rostedt 	len -= BUF_PAGE_HDR_SIZE;
4497474d32b6SSteven Rostedt 
44988789a9e7SSteven Rostedt 	if (!data_page)
4499554f786eSSteven Rostedt 		goto out;
45008789a9e7SSteven Rostedt 
4501044fa782SSteven Rostedt 	bpage = *data_page;
4502044fa782SSteven Rostedt 	if (!bpage)
4503554f786eSSteven Rostedt 		goto out;
45048789a9e7SSteven Rostedt 
45055389f6faSThomas Gleixner 	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
45068789a9e7SSteven Rostedt 
4507ef7a4a16SSteven Rostedt 	reader = rb_get_reader_page(cpu_buffer);
4508ef7a4a16SSteven Rostedt 	if (!reader)
4509554f786eSSteven Rostedt 		goto out_unlock;
45108789a9e7SSteven Rostedt 
4511ef7a4a16SSteven Rostedt 	event = rb_reader_event(cpu_buffer);
4512667d2412SLai Jiangshan 
4513ef7a4a16SSteven Rostedt 	read = reader->read;
4514ef7a4a16SSteven Rostedt 	commit = rb_page_commit(reader);
4515ef7a4a16SSteven Rostedt 
451666a8cb95SSteven Rostedt 	/* Check if any events were dropped */
4517ff0ff84aSSteven Rostedt 	missed_events = cpu_buffer->lost_events;
451866a8cb95SSteven Rostedt 
45198789a9e7SSteven Rostedt 	/*
4520474d32b6SSteven Rostedt 	 * If this page has been partially read or
4521474d32b6SSteven Rostedt 	 * if len is not big enough to read the rest of the page or
4522474d32b6SSteven Rostedt 	 * a writer is still on the page, then
4523474d32b6SSteven Rostedt 	 * we must copy the data from the page to the buffer.
4524474d32b6SSteven Rostedt 	 * Otherwise, we can simply swap the page with the one passed in.
45258789a9e7SSteven Rostedt 	 */
4526474d32b6SSteven Rostedt 	if (read || (len < (commit - read)) ||
4527ef7a4a16SSteven Rostedt 	    cpu_buffer->reader_page == cpu_buffer->commit_page) {
4528667d2412SLai Jiangshan 		struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4529474d32b6SSteven Rostedt 		unsigned int rpos = read;
4530474d32b6SSteven Rostedt 		unsigned int pos = 0;
4531ef7a4a16SSteven Rostedt 		unsigned int size;
45328789a9e7SSteven Rostedt 
45338789a9e7SSteven Rostedt 		if (full)
4534554f786eSSteven Rostedt 			goto out_unlock;
45358789a9e7SSteven Rostedt 
4536ef7a4a16SSteven Rostedt 		if (len > (commit - read))
4537ef7a4a16SSteven Rostedt 			len = (commit - read);
4538ef7a4a16SSteven Rostedt 
453969d1b839SSteven Rostedt 		/* Always keep the time extend and data together */
454069d1b839SSteven Rostedt 		size = rb_event_ts_length(event);
4541ef7a4a16SSteven Rostedt 
4542ef7a4a16SSteven Rostedt 		if (len < size)
4543554f786eSSteven Rostedt 			goto out_unlock;
4544ef7a4a16SSteven Rostedt 
45454f3640f8SSteven Rostedt 		/* save the current timestamp, since the user will need it */
45464f3640f8SSteven Rostedt 		save_timestamp = cpu_buffer->read_stamp;
45474f3640f8SSteven Rostedt 
4548ef7a4a16SSteven Rostedt 		/* Need to copy one event at a time */
4549ef7a4a16SSteven Rostedt 		do {
4550e1e35927SDavid Sharp 			/* We need the size of one event, because
4551e1e35927SDavid Sharp 			 * rb_advance_reader only advances by one event,
4552e1e35927SDavid Sharp 			 * whereas rb_event_ts_length may include the size of
4553e1e35927SDavid Sharp 			 * one or two events.
4554e1e35927SDavid Sharp 			 * We have already ensured there's enough space if this
4555e1e35927SDavid Sharp 			 * is a time extend. */
4556e1e35927SDavid Sharp 			size = rb_event_length(event);
4557474d32b6SSteven Rostedt 			memcpy(bpage->data + pos, rpage->data + rpos, size);
4558ef7a4a16SSteven Rostedt 
4559ef7a4a16SSteven Rostedt 			len -= size;
4560ef7a4a16SSteven Rostedt 
4561ef7a4a16SSteven Rostedt 			rb_advance_reader(cpu_buffer);
4562474d32b6SSteven Rostedt 			rpos = reader->read;
4563474d32b6SSteven Rostedt 			pos += size;
4564ef7a4a16SSteven Rostedt 
456518fab912SHuang Ying 			if (rpos >= commit)
456618fab912SHuang Ying 				break;
456718fab912SHuang Ying 
4568ef7a4a16SSteven Rostedt 			event = rb_reader_event(cpu_buffer);
456969d1b839SSteven Rostedt 			/* Always keep the time extend and data together */
457069d1b839SSteven Rostedt 			size = rb_event_ts_length(event);
4571e1e35927SDavid Sharp 		} while (len >= size);
4572667d2412SLai Jiangshan 
4573667d2412SLai Jiangshan 		/* update bpage */
4574ef7a4a16SSteven Rostedt 		local_set(&bpage->commit, pos);
45754f3640f8SSteven Rostedt 		bpage->time_stamp = save_timestamp;
4576ef7a4a16SSteven Rostedt 
4577474d32b6SSteven Rostedt 		/* we copied everything to the beginning */
4578474d32b6SSteven Rostedt 		read = 0;
45798789a9e7SSteven Rostedt 	} else {
4580afbab76aSSteven Rostedt 		/* update the entry counter */
458177ae365eSSteven Rostedt 		cpu_buffer->read += rb_page_entries(reader);
4582c64e148aSVaibhav Nagarnaik 		cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4583afbab76aSSteven Rostedt 
45848789a9e7SSteven Rostedt 		/* swap the pages */
4585044fa782SSteven Rostedt 		rb_init_page(bpage);
4586ef7a4a16SSteven Rostedt 		bpage = reader->page;
4587ef7a4a16SSteven Rostedt 		reader->page = *data_page;
4588ef7a4a16SSteven Rostedt 		local_set(&reader->write, 0);
4589778c55d4SSteven Rostedt 		local_set(&reader->entries, 0);
4590ef7a4a16SSteven Rostedt 		reader->read = 0;
4591044fa782SSteven Rostedt 		*data_page = bpage;
4592ff0ff84aSSteven Rostedt 
4593ff0ff84aSSteven Rostedt 		/*
4594ff0ff84aSSteven Rostedt 		 * Use the real_end for the data size,
4595ff0ff84aSSteven Rostedt 		 * This gives us a chance to store the lost events
4596ff0ff84aSSteven Rostedt 		 * on the page.
4597ff0ff84aSSteven Rostedt 		 */
4598ff0ff84aSSteven Rostedt 		if (reader->real_end)
4599ff0ff84aSSteven Rostedt 			local_set(&bpage->commit, reader->real_end);
4600ef7a4a16SSteven Rostedt 	}
4601ef7a4a16SSteven Rostedt 	ret = read;
4602ef7a4a16SSteven Rostedt 
460366a8cb95SSteven Rostedt 	cpu_buffer->lost_events = 0;
46042711ca23SSteven Rostedt 
46052711ca23SSteven Rostedt 	commit = local_read(&bpage->commit);
460666a8cb95SSteven Rostedt 	/*
460766a8cb95SSteven Rostedt 	 * Set a flag in the commit field if we lost events
460866a8cb95SSteven Rostedt 	 */
4609ff0ff84aSSteven Rostedt 	if (missed_events) {
4610ff0ff84aSSteven Rostedt 		/* If there is room at the end of the page to save the
4611ff0ff84aSSteven Rostedt 		 * missed events, then record it there.
4612ff0ff84aSSteven Rostedt 		 */
4613ff0ff84aSSteven Rostedt 		if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4614ff0ff84aSSteven Rostedt 			memcpy(&bpage->data[commit], &missed_events,
4615ff0ff84aSSteven Rostedt 			       sizeof(missed_events));
4616ff0ff84aSSteven Rostedt 			local_add(RB_MISSED_STORED, &bpage->commit);
46172711ca23SSteven Rostedt 			commit += sizeof(missed_events);
4618ff0ff84aSSteven Rostedt 		}
461966a8cb95SSteven Rostedt 		local_add(RB_MISSED_EVENTS, &bpage->commit);
4620ff0ff84aSSteven Rostedt 	}
462166a8cb95SSteven Rostedt 
46222711ca23SSteven Rostedt 	/*
46232711ca23SSteven Rostedt 	 * This page may be off to user land. Zero it out here.
46242711ca23SSteven Rostedt 	 */
46252711ca23SSteven Rostedt 	if (commit < BUF_PAGE_SIZE)
46262711ca23SSteven Rostedt 		memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
46272711ca23SSteven Rostedt 
4628554f786eSSteven Rostedt  out_unlock:
46295389f6faSThomas Gleixner 	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
46308789a9e7SSteven Rostedt 
4631554f786eSSteven Rostedt  out:
46328789a9e7SSteven Rostedt 	return ret;
46338789a9e7SSteven Rostedt }
4634d6ce96daSSteven Rostedt EXPORT_SYMBOL_GPL(ring_buffer_read_page);
46358789a9e7SSteven Rostedt 
463659222efeSSteven Rostedt #ifdef CONFIG_HOTPLUG_CPU
463709c9e84dSFrederic Weisbecker static int rb_cpu_notify(struct notifier_block *self,
4638554f786eSSteven Rostedt 			 unsigned long action, void *hcpu)
4639554f786eSSteven Rostedt {
4640554f786eSSteven Rostedt 	struct ring_buffer *buffer =
4641554f786eSSteven Rostedt 		container_of(self, struct ring_buffer, cpu_notify);
4642554f786eSSteven Rostedt 	long cpu = (long)hcpu;
46439b94a8fbSSteven Rostedt (Red Hat) 	long nr_pages_same;
46449b94a8fbSSteven Rostedt (Red Hat) 	int cpu_i;
46459b94a8fbSSteven Rostedt (Red Hat) 	unsigned long nr_pages;
4646554f786eSSteven Rostedt 
4647554f786eSSteven Rostedt 	switch (action) {
4648554f786eSSteven Rostedt 	case CPU_UP_PREPARE:
4649554f786eSSteven Rostedt 	case CPU_UP_PREPARE_FROZEN:
46503f237a79SRusty Russell 		if (cpumask_test_cpu(cpu, buffer->cpumask))
4651554f786eSSteven Rostedt 			return NOTIFY_OK;
4652554f786eSSteven Rostedt 
4653438ced17SVaibhav Nagarnaik 		nr_pages = 0;
4654438ced17SVaibhav Nagarnaik 		nr_pages_same = 1;
4655438ced17SVaibhav Nagarnaik 		/* check if all cpu sizes are same */
4656438ced17SVaibhav Nagarnaik 		for_each_buffer_cpu(buffer, cpu_i) {
4657438ced17SVaibhav Nagarnaik 			/* fill in the size from first enabled cpu */
4658438ced17SVaibhav Nagarnaik 			if (nr_pages == 0)
4659438ced17SVaibhav Nagarnaik 				nr_pages = buffer->buffers[cpu_i]->nr_pages;
4660438ced17SVaibhav Nagarnaik 			if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4661438ced17SVaibhav Nagarnaik 				nr_pages_same = 0;
4662438ced17SVaibhav Nagarnaik 				break;
4663438ced17SVaibhav Nagarnaik 			}
4664438ced17SVaibhav Nagarnaik 		}
4665438ced17SVaibhav Nagarnaik 		/* allocate minimum pages, user can later expand it */
4666438ced17SVaibhav Nagarnaik 		if (!nr_pages_same)
4667438ced17SVaibhav Nagarnaik 			nr_pages = 2;
4668554f786eSSteven Rostedt 		buffer->buffers[cpu] =
4669438ced17SVaibhav Nagarnaik 			rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4670554f786eSSteven Rostedt 		if (!buffer->buffers[cpu]) {
4671554f786eSSteven Rostedt 			WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4672554f786eSSteven Rostedt 			     cpu);
4673554f786eSSteven Rostedt 			return NOTIFY_OK;
4674554f786eSSteven Rostedt 		}
4675554f786eSSteven Rostedt 		smp_wmb();
46763f237a79SRusty Russell 		cpumask_set_cpu(cpu, buffer->cpumask);
4677554f786eSSteven Rostedt 		break;
4678554f786eSSteven Rostedt 	case CPU_DOWN_PREPARE:
4679554f786eSSteven Rostedt 	case CPU_DOWN_PREPARE_FROZEN:
4680554f786eSSteven Rostedt 		/*
4681554f786eSSteven Rostedt 		 * Do nothing.
4682554f786eSSteven Rostedt 		 *  If we were to free the buffer, then the user would
4683554f786eSSteven Rostedt 		 *  lose any trace that was in the buffer.
4684554f786eSSteven Rostedt 		 */
4685554f786eSSteven Rostedt 		break;
4686554f786eSSteven Rostedt 	default:
4687554f786eSSteven Rostedt 		break;
4688554f786eSSteven Rostedt 	}
4689554f786eSSteven Rostedt 	return NOTIFY_OK;
4690554f786eSSteven Rostedt }
4691554f786eSSteven Rostedt #endif
46926c43e554SSteven Rostedt (Red Hat) 
46936c43e554SSteven Rostedt (Red Hat) #ifdef CONFIG_RING_BUFFER_STARTUP_TEST
46946c43e554SSteven Rostedt (Red Hat) /*
46956c43e554SSteven Rostedt (Red Hat)  * This is a basic integrity check of the ring buffer.
46966c43e554SSteven Rostedt (Red Hat)  * Late in the boot cycle this test will run when configured in.
46976c43e554SSteven Rostedt (Red Hat)  * It will kick off a thread per CPU that will go into a loop
46986c43e554SSteven Rostedt (Red Hat)  * writing to the per cpu ring buffer various sizes of data.
46996c43e554SSteven Rostedt (Red Hat)  * Some of the data will be large items, some small.
47006c43e554SSteven Rostedt (Red Hat)  *
47016c43e554SSteven Rostedt (Red Hat)  * Another thread is created that goes into a spin, sending out
47026c43e554SSteven Rostedt (Red Hat)  * IPIs to the other CPUs to also write into the ring buffer.
47036c43e554SSteven Rostedt (Red Hat)  * this is to test the nesting ability of the buffer.
47046c43e554SSteven Rostedt (Red Hat)  *
47056c43e554SSteven Rostedt (Red Hat)  * Basic stats are recorded and reported. If something in the
47066c43e554SSteven Rostedt (Red Hat)  * ring buffer should happen that's not expected, a big warning
47076c43e554SSteven Rostedt (Red Hat)  * is displayed and all ring buffers are disabled.
47086c43e554SSteven Rostedt (Red Hat)  */
47096c43e554SSteven Rostedt (Red Hat) static struct task_struct *rb_threads[NR_CPUS] __initdata;
47106c43e554SSteven Rostedt (Red Hat) 
47116c43e554SSteven Rostedt (Red Hat) struct rb_test_data {
47126c43e554SSteven Rostedt (Red Hat) 	struct ring_buffer	*buffer;
47136c43e554SSteven Rostedt (Red Hat) 	unsigned long		events;
47146c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_written;
47156c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_alloc;
47166c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_dropped;
47176c43e554SSteven Rostedt (Red Hat) 	unsigned long		events_nested;
47186c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_written_nested;
47196c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_alloc_nested;
47206c43e554SSteven Rostedt (Red Hat) 	unsigned long		bytes_dropped_nested;
47216c43e554SSteven Rostedt (Red Hat) 	int			min_size_nested;
47226c43e554SSteven Rostedt (Red Hat) 	int			max_size_nested;
47236c43e554SSteven Rostedt (Red Hat) 	int			max_size;
47246c43e554SSteven Rostedt (Red Hat) 	int			min_size;
47256c43e554SSteven Rostedt (Red Hat) 	int			cpu;
47266c43e554SSteven Rostedt (Red Hat) 	int			cnt;
47276c43e554SSteven Rostedt (Red Hat) };
47286c43e554SSteven Rostedt (Red Hat) 
47296c43e554SSteven Rostedt (Red Hat) static struct rb_test_data rb_data[NR_CPUS] __initdata;
47306c43e554SSteven Rostedt (Red Hat) 
47316c43e554SSteven Rostedt (Red Hat) /* 1 meg per cpu */
47326c43e554SSteven Rostedt (Red Hat) #define RB_TEST_BUFFER_SIZE	1048576
47336c43e554SSteven Rostedt (Red Hat) 
47346c43e554SSteven Rostedt (Red Hat) static char rb_string[] __initdata =
47356c43e554SSteven Rostedt (Red Hat) 	"abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
47366c43e554SSteven Rostedt (Red Hat) 	"?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
47376c43e554SSteven Rostedt (Red Hat) 	"!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
47386c43e554SSteven Rostedt (Red Hat) 
47396c43e554SSteven Rostedt (Red Hat) static bool rb_test_started __initdata;
47406c43e554SSteven Rostedt (Red Hat) 
47416c43e554SSteven Rostedt (Red Hat) struct rb_item {
47426c43e554SSteven Rostedt (Red Hat) 	int size;
47436c43e554SSteven Rostedt (Red Hat) 	char str[];
47446c43e554SSteven Rostedt (Red Hat) };
47456c43e554SSteven Rostedt (Red Hat) 
47466c43e554SSteven Rostedt (Red Hat) static __init int rb_write_something(struct rb_test_data *data, bool nested)
47476c43e554SSteven Rostedt (Red Hat) {
47486c43e554SSteven Rostedt (Red Hat) 	struct ring_buffer_event *event;
47496c43e554SSteven Rostedt (Red Hat) 	struct rb_item *item;
47506c43e554SSteven Rostedt (Red Hat) 	bool started;
47516c43e554SSteven Rostedt (Red Hat) 	int event_len;
47526c43e554SSteven Rostedt (Red Hat) 	int size;
47536c43e554SSteven Rostedt (Red Hat) 	int len;
47546c43e554SSteven Rostedt (Red Hat) 	int cnt;
47556c43e554SSteven Rostedt (Red Hat) 
47566c43e554SSteven Rostedt (Red Hat) 	/* Have nested writes different that what is written */
47576c43e554SSteven Rostedt (Red Hat) 	cnt = data->cnt + (nested ? 27 : 0);
47586c43e554SSteven Rostedt (Red Hat) 
47596c43e554SSteven Rostedt (Red Hat) 	/* Multiply cnt by ~e, to make some unique increment */
47606c43e554SSteven Rostedt (Red Hat) 	size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
47616c43e554SSteven Rostedt (Red Hat) 
47626c43e554SSteven Rostedt (Red Hat) 	len = size + sizeof(struct rb_item);
47636c43e554SSteven Rostedt (Red Hat) 
47646c43e554SSteven Rostedt (Red Hat) 	started = rb_test_started;
47656c43e554SSteven Rostedt (Red Hat) 	/* read rb_test_started before checking buffer enabled */
47666c43e554SSteven Rostedt (Red Hat) 	smp_rmb();
47676c43e554SSteven Rostedt (Red Hat) 
47686c43e554SSteven Rostedt (Red Hat) 	event = ring_buffer_lock_reserve(data->buffer, len);
47696c43e554SSteven Rostedt (Red Hat) 	if (!event) {
47706c43e554SSteven Rostedt (Red Hat) 		/* Ignore dropped events before test starts. */
47716c43e554SSteven Rostedt (Red Hat) 		if (started) {
47726c43e554SSteven Rostedt (Red Hat) 			if (nested)
47736c43e554SSteven Rostedt (Red Hat) 				data->bytes_dropped += len;
47746c43e554SSteven Rostedt (Red Hat) 			else
47756c43e554SSteven Rostedt (Red Hat) 				data->bytes_dropped_nested += len;
47766c43e554SSteven Rostedt (Red Hat) 		}
47776c43e554SSteven Rostedt (Red Hat) 		return len;
47786c43e554SSteven Rostedt (Red Hat) 	}
47796c43e554SSteven Rostedt (Red Hat) 
47806c43e554SSteven Rostedt (Red Hat) 	event_len = ring_buffer_event_length(event);
47816c43e554SSteven Rostedt (Red Hat) 
47826c43e554SSteven Rostedt (Red Hat) 	if (RB_WARN_ON(data->buffer, event_len < len))
47836c43e554SSteven Rostedt (Red Hat) 		goto out;
47846c43e554SSteven Rostedt (Red Hat) 
47856c43e554SSteven Rostedt (Red Hat) 	item = ring_buffer_event_data(event);
47866c43e554SSteven Rostedt (Red Hat) 	item->size = size;
47876c43e554SSteven Rostedt (Red Hat) 	memcpy(item->str, rb_string, size);
47886c43e554SSteven Rostedt (Red Hat) 
47896c43e554SSteven Rostedt (Red Hat) 	if (nested) {
47906c43e554SSteven Rostedt (Red Hat) 		data->bytes_alloc_nested += event_len;
47916c43e554SSteven Rostedt (Red Hat) 		data->bytes_written_nested += len;
47926c43e554SSteven Rostedt (Red Hat) 		data->events_nested++;
47936c43e554SSteven Rostedt (Red Hat) 		if (!data->min_size_nested || len < data->min_size_nested)
47946c43e554SSteven Rostedt (Red Hat) 			data->min_size_nested = len;
47956c43e554SSteven Rostedt (Red Hat) 		if (len > data->max_size_nested)
47966c43e554SSteven Rostedt (Red Hat) 			data->max_size_nested = len;
47976c43e554SSteven Rostedt (Red Hat) 	} else {
47986c43e554SSteven Rostedt (Red Hat) 		data->bytes_alloc += event_len;
47996c43e554SSteven Rostedt (Red Hat) 		data->bytes_written += len;
48006c43e554SSteven Rostedt (Red Hat) 		data->events++;
48016c43e554SSteven Rostedt (Red Hat) 		if (!data->min_size || len < data->min_size)
48026c43e554SSteven Rostedt (Red Hat) 			data->max_size = len;
48036c43e554SSteven Rostedt (Red Hat) 		if (len > data->max_size)
48046c43e554SSteven Rostedt (Red Hat) 			data->max_size = len;
48056c43e554SSteven Rostedt (Red Hat) 	}
48066c43e554SSteven Rostedt (Red Hat) 
48076c43e554SSteven Rostedt (Red Hat)  out:
48086c43e554SSteven Rostedt (Red Hat) 	ring_buffer_unlock_commit(data->buffer, event);
48096c43e554SSteven Rostedt (Red Hat) 
48106c43e554SSteven Rostedt (Red Hat) 	return 0;
48116c43e554SSteven Rostedt (Red Hat) }
48126c43e554SSteven Rostedt (Red Hat) 
48136c43e554SSteven Rostedt (Red Hat) static __init int rb_test(void *arg)
48146c43e554SSteven Rostedt (Red Hat) {
48156c43e554SSteven Rostedt (Red Hat) 	struct rb_test_data *data = arg;
48166c43e554SSteven Rostedt (Red Hat) 
48176c43e554SSteven Rostedt (Red Hat) 	while (!kthread_should_stop()) {
48186c43e554SSteven Rostedt (Red Hat) 		rb_write_something(data, false);
48196c43e554SSteven Rostedt (Red Hat) 		data->cnt++;
48206c43e554SSteven Rostedt (Red Hat) 
48216c43e554SSteven Rostedt (Red Hat) 		set_current_state(TASK_INTERRUPTIBLE);
48226c43e554SSteven Rostedt (Red Hat) 		/* Now sleep between a min of 100-300us and a max of 1ms */
48236c43e554SSteven Rostedt (Red Hat) 		usleep_range(((data->cnt % 3) + 1) * 100, 1000);
48246c43e554SSteven Rostedt (Red Hat) 	}
48256c43e554SSteven Rostedt (Red Hat) 
48266c43e554SSteven Rostedt (Red Hat) 	return 0;
48276c43e554SSteven Rostedt (Red Hat) }
48286c43e554SSteven Rostedt (Red Hat) 
48296c43e554SSteven Rostedt (Red Hat) static __init void rb_ipi(void *ignore)
48306c43e554SSteven Rostedt (Red Hat) {
48316c43e554SSteven Rostedt (Red Hat) 	struct rb_test_data *data;
48326c43e554SSteven Rostedt (Red Hat) 	int cpu = smp_processor_id();
48336c43e554SSteven Rostedt (Red Hat) 
48346c43e554SSteven Rostedt (Red Hat) 	data = &rb_data[cpu];
48356c43e554SSteven Rostedt (Red Hat) 	rb_write_something(data, true);
48366c43e554SSteven Rostedt (Red Hat) }
48376c43e554SSteven Rostedt (Red Hat) 
48386c43e554SSteven Rostedt (Red Hat) static __init int rb_hammer_test(void *arg)
48396c43e554SSteven Rostedt (Red Hat) {
48406c43e554SSteven Rostedt (Red Hat) 	while (!kthread_should_stop()) {
48416c43e554SSteven Rostedt (Red Hat) 
48426c43e554SSteven Rostedt (Red Hat) 		/* Send an IPI to all cpus to write data! */
48436c43e554SSteven Rostedt (Red Hat) 		smp_call_function(rb_ipi, NULL, 1);
48446c43e554SSteven Rostedt (Red Hat) 		/* No sleep, but for non preempt, let others run */
48456c43e554SSteven Rostedt (Red Hat) 		schedule();
48466c43e554SSteven Rostedt (Red Hat) 	}
48476c43e554SSteven Rostedt (Red Hat) 
48486c43e554SSteven Rostedt (Red Hat) 	return 0;
48496c43e554SSteven Rostedt (Red Hat) }
48506c43e554SSteven Rostedt (Red Hat) 
48516c43e554SSteven Rostedt (Red Hat) static __init int test_ringbuffer(void)
48526c43e554SSteven Rostedt (Red Hat) {
48536c43e554SSteven Rostedt (Red Hat) 	struct task_struct *rb_hammer;
48546c43e554SSteven Rostedt (Red Hat) 	struct ring_buffer *buffer;
48556c43e554SSteven Rostedt (Red Hat) 	int cpu;
48566c43e554SSteven Rostedt (Red Hat) 	int ret = 0;
48576c43e554SSteven Rostedt (Red Hat) 
48586c43e554SSteven Rostedt (Red Hat) 	pr_info("Running ring buffer tests...\n");
48596c43e554SSteven Rostedt (Red Hat) 
48606c43e554SSteven Rostedt (Red Hat) 	buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
48616c43e554SSteven Rostedt (Red Hat) 	if (WARN_ON(!buffer))
48626c43e554SSteven Rostedt (Red Hat) 		return 0;
48636c43e554SSteven Rostedt (Red Hat) 
48646c43e554SSteven Rostedt (Red Hat) 	/* Disable buffer so that threads can't write to it yet */
48656c43e554SSteven Rostedt (Red Hat) 	ring_buffer_record_off(buffer);
48666c43e554SSteven Rostedt (Red Hat) 
48676c43e554SSteven Rostedt (Red Hat) 	for_each_online_cpu(cpu) {
48686c43e554SSteven Rostedt (Red Hat) 		rb_data[cpu].buffer = buffer;
48696c43e554SSteven Rostedt (Red Hat) 		rb_data[cpu].cpu = cpu;
48706c43e554SSteven Rostedt (Red Hat) 		rb_data[cpu].cnt = cpu;
48716c43e554SSteven Rostedt (Red Hat) 		rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
48726c43e554SSteven Rostedt (Red Hat) 						 "rbtester/%d", cpu);
48736c43e554SSteven Rostedt (Red Hat) 		if (WARN_ON(!rb_threads[cpu])) {
48746c43e554SSteven Rostedt (Red Hat) 			pr_cont("FAILED\n");
48756c43e554SSteven Rostedt (Red Hat) 			ret = -1;
48766c43e554SSteven Rostedt (Red Hat) 			goto out_free;
48776c43e554SSteven Rostedt (Red Hat) 		}
48786c43e554SSteven Rostedt (Red Hat) 
48796c43e554SSteven Rostedt (Red Hat) 		kthread_bind(rb_threads[cpu], cpu);
48806c43e554SSteven Rostedt (Red Hat)  		wake_up_process(rb_threads[cpu]);
48816c43e554SSteven Rostedt (Red Hat) 	}
48826c43e554SSteven Rostedt (Red Hat) 
48836c43e554SSteven Rostedt (Red Hat) 	/* Now create the rb hammer! */
48846c43e554SSteven Rostedt (Red Hat) 	rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
48856c43e554SSteven Rostedt (Red Hat) 	if (WARN_ON(!rb_hammer)) {
48866c43e554SSteven Rostedt (Red Hat) 		pr_cont("FAILED\n");
48876c43e554SSteven Rostedt (Red Hat) 		ret = -1;
48886c43e554SSteven Rostedt (Red Hat) 		goto out_free;
48896c43e554SSteven Rostedt (Red Hat) 	}
48906c43e554SSteven Rostedt (Red Hat) 
48916c43e554SSteven Rostedt (Red Hat) 	ring_buffer_record_on(buffer);
48926c43e554SSteven Rostedt (Red Hat) 	/*
48936c43e554SSteven Rostedt (Red Hat) 	 * Show buffer is enabled before setting rb_test_started.
48946c43e554SSteven Rostedt (Red Hat) 	 * Yes there's a small race window where events could be
48956c43e554SSteven Rostedt (Red Hat) 	 * dropped and the thread wont catch it. But when a ring
48966c43e554SSteven Rostedt (Red Hat) 	 * buffer gets enabled, there will always be some kind of
48976c43e554SSteven Rostedt (Red Hat) 	 * delay before other CPUs see it. Thus, we don't care about
48986c43e554SSteven Rostedt (Red Hat) 	 * those dropped events. We care about events dropped after
48996c43e554SSteven Rostedt (Red Hat) 	 * the threads see that the buffer is active.
49006c43e554SSteven Rostedt (Red Hat) 	 */
49016c43e554SSteven Rostedt (Red Hat) 	smp_wmb();
49026c43e554SSteven Rostedt (Red Hat) 	rb_test_started = true;
49036c43e554SSteven Rostedt (Red Hat) 
49046c43e554SSteven Rostedt (Red Hat) 	set_current_state(TASK_INTERRUPTIBLE);
49056c43e554SSteven Rostedt (Red Hat) 	/* Just run for 10 seconds */;
49066c43e554SSteven Rostedt (Red Hat) 	schedule_timeout(10 * HZ);
49076c43e554SSteven Rostedt (Red Hat) 
49086c43e554SSteven Rostedt (Red Hat) 	kthread_stop(rb_hammer);
49096c43e554SSteven Rostedt (Red Hat) 
49106c43e554SSteven Rostedt (Red Hat)  out_free:
49116c43e554SSteven Rostedt (Red Hat) 	for_each_online_cpu(cpu) {
49126c43e554SSteven Rostedt (Red Hat) 		if (!rb_threads[cpu])
49136c43e554SSteven Rostedt (Red Hat) 			break;
49146c43e554SSteven Rostedt (Red Hat) 		kthread_stop(rb_threads[cpu]);
49156c43e554SSteven Rostedt (Red Hat) 	}
49166c43e554SSteven Rostedt (Red Hat) 	if (ret) {
49176c43e554SSteven Rostedt (Red Hat) 		ring_buffer_free(buffer);
49186c43e554SSteven Rostedt (Red Hat) 		return ret;
49196c43e554SSteven Rostedt (Red Hat) 	}
49206c43e554SSteven Rostedt (Red Hat) 
49216c43e554SSteven Rostedt (Red Hat) 	/* Report! */
49226c43e554SSteven Rostedt (Red Hat) 	pr_info("finished\n");
49236c43e554SSteven Rostedt (Red Hat) 	for_each_online_cpu(cpu) {
49246c43e554SSteven Rostedt (Red Hat) 		struct ring_buffer_event *event;
49256c43e554SSteven Rostedt (Red Hat) 		struct rb_test_data *data = &rb_data[cpu];
49266c43e554SSteven Rostedt (Red Hat) 		struct rb_item *item;
49276c43e554SSteven Rostedt (Red Hat) 		unsigned long total_events;
49286c43e554SSteven Rostedt (Red Hat) 		unsigned long total_dropped;
49296c43e554SSteven Rostedt (Red Hat) 		unsigned long total_written;
49306c43e554SSteven Rostedt (Red Hat) 		unsigned long total_alloc;
49316c43e554SSteven Rostedt (Red Hat) 		unsigned long total_read = 0;
49326c43e554SSteven Rostedt (Red Hat) 		unsigned long total_size = 0;
49336c43e554SSteven Rostedt (Red Hat) 		unsigned long total_len = 0;
49346c43e554SSteven Rostedt (Red Hat) 		unsigned long total_lost = 0;
49356c43e554SSteven Rostedt (Red Hat) 		unsigned long lost;
49366c43e554SSteven Rostedt (Red Hat) 		int big_event_size;
49376c43e554SSteven Rostedt (Red Hat) 		int small_event_size;
49386c43e554SSteven Rostedt (Red Hat) 
49396c43e554SSteven Rostedt (Red Hat) 		ret = -1;
49406c43e554SSteven Rostedt (Red Hat) 
49416c43e554SSteven Rostedt (Red Hat) 		total_events = data->events + data->events_nested;
49426c43e554SSteven Rostedt (Red Hat) 		total_written = data->bytes_written + data->bytes_written_nested;
49436c43e554SSteven Rostedt (Red Hat) 		total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
49446c43e554SSteven Rostedt (Red Hat) 		total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
49456c43e554SSteven Rostedt (Red Hat) 
49466c43e554SSteven Rostedt (Red Hat) 		big_event_size = data->max_size + data->max_size_nested;
49476c43e554SSteven Rostedt (Red Hat) 		small_event_size = data->min_size + data->min_size_nested;
49486c43e554SSteven Rostedt (Red Hat) 
49496c43e554SSteven Rostedt (Red Hat) 		pr_info("CPU %d:\n", cpu);
49506c43e554SSteven Rostedt (Red Hat) 		pr_info("              events:    %ld\n", total_events);
49516c43e554SSteven Rostedt (Red Hat) 		pr_info("       dropped bytes:    %ld\n", total_dropped);
49526c43e554SSteven Rostedt (Red Hat) 		pr_info("       alloced bytes:    %ld\n", total_alloc);
49536c43e554SSteven Rostedt (Red Hat) 		pr_info("       written bytes:    %ld\n", total_written);
49546c43e554SSteven Rostedt (Red Hat) 		pr_info("       biggest event:    %d\n", big_event_size);
49556c43e554SSteven Rostedt (Red Hat) 		pr_info("      smallest event:    %d\n", small_event_size);
49566c43e554SSteven Rostedt (Red Hat) 
49576c43e554SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(buffer, total_dropped))
49586c43e554SSteven Rostedt (Red Hat) 			break;
49596c43e554SSteven Rostedt (Red Hat) 
49606c43e554SSteven Rostedt (Red Hat) 		ret = 0;
49616c43e554SSteven Rostedt (Red Hat) 
49626c43e554SSteven Rostedt (Red Hat) 		while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
49636c43e554SSteven Rostedt (Red Hat) 			total_lost += lost;
49646c43e554SSteven Rostedt (Red Hat) 			item = ring_buffer_event_data(event);
49656c43e554SSteven Rostedt (Red Hat) 			total_len += ring_buffer_event_length(event);
49666c43e554SSteven Rostedt (Red Hat) 			total_size += item->size + sizeof(struct rb_item);
49676c43e554SSteven Rostedt (Red Hat) 			if (memcmp(&item->str[0], rb_string, item->size) != 0) {
49686c43e554SSteven Rostedt (Red Hat) 				pr_info("FAILED!\n");
49696c43e554SSteven Rostedt (Red Hat) 				pr_info("buffer had: %.*s\n", item->size, item->str);
49706c43e554SSteven Rostedt (Red Hat) 				pr_info("expected:   %.*s\n", item->size, rb_string);
49716c43e554SSteven Rostedt (Red Hat) 				RB_WARN_ON(buffer, 1);
49726c43e554SSteven Rostedt (Red Hat) 				ret = -1;
49736c43e554SSteven Rostedt (Red Hat) 				break;
49746c43e554SSteven Rostedt (Red Hat) 			}
49756c43e554SSteven Rostedt (Red Hat) 			total_read++;
49766c43e554SSteven Rostedt (Red Hat) 		}
49776c43e554SSteven Rostedt (Red Hat) 		if (ret)
49786c43e554SSteven Rostedt (Red Hat) 			break;
49796c43e554SSteven Rostedt (Red Hat) 
49806c43e554SSteven Rostedt (Red Hat) 		ret = -1;
49816c43e554SSteven Rostedt (Red Hat) 
49826c43e554SSteven Rostedt (Red Hat) 		pr_info("         read events:   %ld\n", total_read);
49836c43e554SSteven Rostedt (Red Hat) 		pr_info("         lost events:   %ld\n", total_lost);
49846c43e554SSteven Rostedt (Red Hat) 		pr_info("        total events:   %ld\n", total_lost + total_read);
49856c43e554SSteven Rostedt (Red Hat) 		pr_info("  recorded len bytes:   %ld\n", total_len);
49866c43e554SSteven Rostedt (Red Hat) 		pr_info(" recorded size bytes:   %ld\n", total_size);
49876c43e554SSteven Rostedt (Red Hat) 		if (total_lost)
49886c43e554SSteven Rostedt (Red Hat) 			pr_info(" With dropped events, record len and size may not match\n"
49896c43e554SSteven Rostedt (Red Hat) 				" alloced and written from above\n");
49906c43e554SSteven Rostedt (Red Hat) 		if (!total_lost) {
49916c43e554SSteven Rostedt (Red Hat) 			if (RB_WARN_ON(buffer, total_len != total_alloc ||
49926c43e554SSteven Rostedt (Red Hat) 				       total_size != total_written))
49936c43e554SSteven Rostedt (Red Hat) 				break;
49946c43e554SSteven Rostedt (Red Hat) 		}
49956c43e554SSteven Rostedt (Red Hat) 		if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
49966c43e554SSteven Rostedt (Red Hat) 			break;
49976c43e554SSteven Rostedt (Red Hat) 
49986c43e554SSteven Rostedt (Red Hat) 		ret = 0;
49996c43e554SSteven Rostedt (Red Hat) 	}
50006c43e554SSteven Rostedt (Red Hat) 	if (!ret)
50016c43e554SSteven Rostedt (Red Hat) 		pr_info("Ring buffer PASSED!\n");
50026c43e554SSteven Rostedt (Red Hat) 
50036c43e554SSteven Rostedt (Red Hat) 	ring_buffer_free(buffer);
50046c43e554SSteven Rostedt (Red Hat) 	return 0;
50056c43e554SSteven Rostedt (Red Hat) }
50066c43e554SSteven Rostedt (Red Hat) 
50076c43e554SSteven Rostedt (Red Hat) late_initcall(test_ringbuffer);
50086c43e554SSteven Rostedt (Red Hat) #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
5009