xref: /linux-6.15/fs/jbd2/commit.c (revision fd3b3d7f)
1f5166768STheodore Ts'o // SPDX-License-Identifier: GPL-2.0+
2470decc6SDave Kleikamp /*
3f7f4bccbSMingming Cao  * linux/fs/jbd2/commit.c
4470decc6SDave Kleikamp  *
5470decc6SDave Kleikamp  * Written by Stephen C. Tweedie <[email protected]>, 1998
6470decc6SDave Kleikamp  *
7470decc6SDave Kleikamp  * Copyright 1998 Red Hat corp --- All Rights Reserved
8470decc6SDave Kleikamp  *
9470decc6SDave Kleikamp  * Journal commit routines for the generic filesystem journaling code;
10470decc6SDave Kleikamp  * part of the ext2fs journaling system.
11470decc6SDave Kleikamp  */
12470decc6SDave Kleikamp 
13470decc6SDave Kleikamp #include <linux/time.h>
14470decc6SDave Kleikamp #include <linux/fs.h>
15f7f4bccbSMingming Cao #include <linux/jbd2.h>
16470decc6SDave Kleikamp #include <linux/errno.h>
17470decc6SDave Kleikamp #include <linux/slab.h>
18470decc6SDave Kleikamp #include <linux/mm.h>
19470decc6SDave Kleikamp #include <linux/pagemap.h>
208e85fb3fSJohann Lombardi #include <linux/jiffies.h>
21818d276cSGirish Shilamkar #include <linux/crc32.h>
22cd1aac32SAneesh Kumar K.V #include <linux/writeback.h>
23cd1aac32SAneesh Kumar K.V #include <linux/backing-dev.h>
24fd98496fSTheodore Ts'o #include <linux/bio.h>
250e3d2a63STheodore Ts'o #include <linux/blkdev.h>
2639e3ac25SBrian King #include <linux/bitops.h>
27879c5e6bSTheodore Ts'o #include <trace/events/jbd2.h>
28470decc6SDave Kleikamp 
29470decc6SDave Kleikamp /*
30b34090e5SJan Kara  * IO end handler for temporary buffer_heads handling writes to the journal.
31470decc6SDave Kleikamp  */
journal_end_buffer_io_sync(struct buffer_head * bh,int uptodate)32470decc6SDave Kleikamp static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
33470decc6SDave Kleikamp {
34b34090e5SJan Kara 	struct buffer_head *orig_bh = bh->b_private;
35b34090e5SJan Kara 
36470decc6SDave Kleikamp 	BUFFER_TRACE(bh, "");
37470decc6SDave Kleikamp 	if (uptodate)
38470decc6SDave Kleikamp 		set_buffer_uptodate(bh);
39470decc6SDave Kleikamp 	else
40470decc6SDave Kleikamp 		clear_buffer_uptodate(bh);
41b34090e5SJan Kara 	if (orig_bh) {
42b34090e5SJan Kara 		clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
434e857c58SPeter Zijlstra 		smp_mb__after_atomic();
44b34090e5SJan Kara 		wake_up_bit(&orig_bh->b_state, BH_Shadow);
45b34090e5SJan Kara 	}
46470decc6SDave Kleikamp 	unlock_buffer(bh);
47470decc6SDave Kleikamp }
48470decc6SDave Kleikamp 
49470decc6SDave Kleikamp /*
5087c89c23SJan Kara  * When an ext4 file is truncated, it is possible that some pages are not
5187c89c23SJan Kara  * successfully freed, because they are attached to a committing transaction.
52470decc6SDave Kleikamp  * After the transaction commits, these pages are left on the LRU, with no
53470decc6SDave Kleikamp  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
54470decc6SDave Kleikamp  * by the VM, but their apparent absence upsets the VM accounting, and it makes
55470decc6SDave Kleikamp  * the numbers in /proc/meminfo look odd.
56470decc6SDave Kleikamp  *
57470decc6SDave Kleikamp  * So here, we have a buffer which has just come off the forget list.  Look to
58470decc6SDave Kleikamp  * see if we can strip all buffers from the backing page.
59470decc6SDave Kleikamp  *
60*fd3b3d7fSKemeng Shi  * Called under j_list_lock. The caller provided us with a ref against the
61*fd3b3d7fSKemeng Shi  * buffer, and we drop that here.
62470decc6SDave Kleikamp  */
release_buffer_page(struct buffer_head * bh)63470decc6SDave Kleikamp static void release_buffer_page(struct buffer_head *bh)
64470decc6SDave Kleikamp {
6573122255SMatthew Wilcox (Oracle) 	struct folio *folio;
66470decc6SDave Kleikamp 
67470decc6SDave Kleikamp 	if (buffer_dirty(bh))
68470decc6SDave Kleikamp 		goto nope;
69470decc6SDave Kleikamp 	if (atomic_read(&bh->b_count) != 1)
70470decc6SDave Kleikamp 		goto nope;
710d22fe2fSMatthew Wilcox (Oracle) 	folio = bh->b_folio;
7273122255SMatthew Wilcox (Oracle) 	if (folio->mapping)
73470decc6SDave Kleikamp 		goto nope;
74470decc6SDave Kleikamp 
75470decc6SDave Kleikamp 	/* OK, it's a truncated page */
7673122255SMatthew Wilcox (Oracle) 	if (!folio_trylock(folio))
77470decc6SDave Kleikamp 		goto nope;
78470decc6SDave Kleikamp 
7973122255SMatthew Wilcox (Oracle) 	folio_get(folio);
80470decc6SDave Kleikamp 	__brelse(bh);
8168189fefSMatthew Wilcox (Oracle) 	try_to_free_buffers(folio);
8273122255SMatthew Wilcox (Oracle) 	folio_unlock(folio);
8373122255SMatthew Wilcox (Oracle) 	folio_put(folio);
84470decc6SDave Kleikamp 	return;
85470decc6SDave Kleikamp 
86470decc6SDave Kleikamp nope:
87470decc6SDave Kleikamp 	__brelse(bh);
88470decc6SDave Kleikamp }
89470decc6SDave Kleikamp 
jbd2_commit_block_csum_set(journal_t * j,struct buffer_head * bh)90e5a120aeSJan Kara static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
911f56c589SDarrick J. Wong {
921f56c589SDarrick J. Wong 	struct commit_header *h;
931f56c589SDarrick J. Wong 	__u32 csum;
941f56c589SDarrick J. Wong 
95db9ee220SDarrick J. Wong 	if (!jbd2_journal_has_csum_v2or3(j))
961f56c589SDarrick J. Wong 		return;
971f56c589SDarrick J. Wong 
98e5a120aeSJan Kara 	h = (struct commit_header *)(bh->b_data);
991f56c589SDarrick J. Wong 	h->h_chksum_type = 0;
1001f56c589SDarrick J. Wong 	h->h_chksum_size = 0;
1011f56c589SDarrick J. Wong 	h->h_chksum[0] = 0;
102e5a120aeSJan Kara 	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
1031f56c589SDarrick J. Wong 	h->h_chksum[0] = cpu_to_be32(csum);
1041f56c589SDarrick J. Wong }
1051f56c589SDarrick J. Wong 
106470decc6SDave Kleikamp /*
107818d276cSGirish Shilamkar  * Done it all: now submit the commit record.  We should have
108470decc6SDave Kleikamp  * cleaned up our previous buffers by now, so if we are in abort
109470decc6SDave Kleikamp  * mode we can now just skip the rest of the journal write
110470decc6SDave Kleikamp  * entirely.
111470decc6SDave Kleikamp  *
112470decc6SDave Kleikamp  * Returns 1 if the journal needs to be aborted or 0 on success
113470decc6SDave Kleikamp  */
journal_submit_commit_record(journal_t * journal,transaction_t * commit_transaction,struct buffer_head ** cbh,__u32 crc32_sum)114818d276cSGirish Shilamkar static int journal_submit_commit_record(journal_t *journal,
115818d276cSGirish Shilamkar 					transaction_t *commit_transaction,
116818d276cSGirish Shilamkar 					struct buffer_head **cbh,
117818d276cSGirish Shilamkar 					__u32 crc32_sum)
118470decc6SDave Kleikamp {
119818d276cSGirish Shilamkar 	struct commit_header *tmp;
120470decc6SDave Kleikamp 	struct buffer_head *bh;
121b42d1d6bSArnd Bergmann 	struct timespec64 now;
1226a3afb6aSZhang Yi 	blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS;
123470decc6SDave Kleikamp 
1246cba611eSZhang Huan 	*cbh = NULL;
1256cba611eSZhang Huan 
126470decc6SDave Kleikamp 	if (is_journal_aborted(journal))
127470decc6SDave Kleikamp 		return 0;
128470decc6SDave Kleikamp 
12932ab6715SJan Kara 	bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
13032ab6715SJan Kara 						JBD2_COMMIT_BLOCK);
131e5a120aeSJan Kara 	if (!bh)
132470decc6SDave Kleikamp 		return 1;
133470decc6SDave Kleikamp 
134818d276cSGirish Shilamkar 	tmp = (struct commit_header *)bh->b_data;
135b42d1d6bSArnd Bergmann 	ktime_get_coarse_real_ts64(&now);
136736603abSTheodore Ts'o 	tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
137736603abSTheodore Ts'o 	tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
138818d276cSGirish Shilamkar 
13956316a0dSDarrick J. Wong 	if (jbd2_has_feature_checksum(journal)) {
140818d276cSGirish Shilamkar 		tmp->h_chksum_type 	= JBD2_CRC32_CHKSUM;
141818d276cSGirish Shilamkar 		tmp->h_chksum_size 	= JBD2_CRC32_CHKSUM_SIZE;
142818d276cSGirish Shilamkar 		tmp->h_chksum[0] 	= cpu_to_be32(crc32_sum);
143470decc6SDave Kleikamp 	}
144e5a120aeSJan Kara 	jbd2_commit_block_csum_set(journal, bh);
145470decc6SDave Kleikamp 
146e5a120aeSJan Kara 	BUFFER_TRACE(bh, "submit commit block");
147818d276cSGirish Shilamkar 	lock_buffer(bh);
14845a90bfdSTheodore Ts'o 	clear_buffer_dirty(bh);
149818d276cSGirish Shilamkar 	set_buffer_uptodate(bh);
150818d276cSGirish Shilamkar 	bh->b_end_io = journal_end_buffer_io_sync;
151818d276cSGirish Shilamkar 
152818d276cSGirish Shilamkar 	if (journal->j_flags & JBD2_BARRIER &&
15356316a0dSDarrick J. Wong 	    !jbd2_has_feature_async_commit(journal))
154f3ed5df3SRitesh Harjani (IBM) 		write_flags |= REQ_PREFLUSH | REQ_FUA;
155470decc6SDave Kleikamp 
156f3ed5df3SRitesh Harjani (IBM) 	submit_bh(write_flags, bh);
157818d276cSGirish Shilamkar 	*cbh = bh;
158f3ed5df3SRitesh Harjani (IBM) 	return 0;
159818d276cSGirish Shilamkar }
160470decc6SDave Kleikamp 
161818d276cSGirish Shilamkar /*
162818d276cSGirish Shilamkar  * This function along with journal_submit_commit_record
163818d276cSGirish Shilamkar  * allows to write the commit record asynchronously.
164818d276cSGirish Shilamkar  */
journal_wait_on_commit_record(journal_t * journal,struct buffer_head * bh)165fd98496fSTheodore Ts'o static int journal_wait_on_commit_record(journal_t *journal,
166fd98496fSTheodore Ts'o 					 struct buffer_head *bh)
167818d276cSGirish Shilamkar {
168818d276cSGirish Shilamkar 	int ret = 0;
169818d276cSGirish Shilamkar 
170818d276cSGirish Shilamkar 	clear_buffer_dirty(bh);
171818d276cSGirish Shilamkar 	wait_on_buffer(bh);
172818d276cSGirish Shilamkar 
173818d276cSGirish Shilamkar 	if (unlikely(!buffer_uptodate(bh)))
174818d276cSGirish Shilamkar 		ret = -EIO;
175818d276cSGirish Shilamkar 	put_bh(bh);            /* One for getblk() */
176818d276cSGirish Shilamkar 
177818d276cSGirish Shilamkar 	return ret;
178818d276cSGirish Shilamkar }
179818d276cSGirish Shilamkar 
180ff780b91SHarshad Shirwadkar /* Send all the data buffers related to an inode */
jbd2_submit_inode_data(journal_t * journal,struct jbd2_inode * jinode)181f30ff35fSJan Kara int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
182ff780b91SHarshad Shirwadkar {
183ff780b91SHarshad Shirwadkar 	if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
184ff780b91SHarshad Shirwadkar 		return 0;
185ff780b91SHarshad Shirwadkar 
186ff780b91SHarshad Shirwadkar 	trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
187f30ff35fSJan Kara 	return journal->j_submit_inode_data_buffers(jinode);
188ff780b91SHarshad Shirwadkar 
189ff780b91SHarshad Shirwadkar }
190ff780b91SHarshad Shirwadkar EXPORT_SYMBOL(jbd2_submit_inode_data);
191ff780b91SHarshad Shirwadkar 
jbd2_wait_inode_data(journal_t * journal,struct jbd2_inode * jinode)192ff780b91SHarshad Shirwadkar int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
193ff780b91SHarshad Shirwadkar {
194ff780b91SHarshad Shirwadkar 	if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
195ff780b91SHarshad Shirwadkar 		!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
196ff780b91SHarshad Shirwadkar 		return 0;
197ff780b91SHarshad Shirwadkar 	return filemap_fdatawait_range_keep_errors(
198ff780b91SHarshad Shirwadkar 		jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
199ff780b91SHarshad Shirwadkar 		jinode->i_dirty_end);
200ff780b91SHarshad Shirwadkar }
201ff780b91SHarshad Shirwadkar EXPORT_SYMBOL(jbd2_wait_inode_data);
202ff780b91SHarshad Shirwadkar 
203cd1aac32SAneesh Kumar K.V /*
204c851ed54SJan Kara  * Submit all the data buffers of inode associated with the transaction to
205c851ed54SJan Kara  * disk.
206c851ed54SJan Kara  *
207c851ed54SJan Kara  * We are in a committing transaction. Therefore no new inode can be added to
208c851ed54SJan Kara  * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
209c851ed54SJan Kara  * operate on from being released while we write out pages.
210c851ed54SJan Kara  */
journal_submit_data_buffers(journal_t * journal,transaction_t * commit_transaction)211cd1aac32SAneesh Kumar K.V static int journal_submit_data_buffers(journal_t *journal,
212c851ed54SJan Kara 		transaction_t *commit_transaction)
213c851ed54SJan Kara {
214c851ed54SJan Kara 	struct jbd2_inode *jinode;
215c851ed54SJan Kara 	int err, ret = 0;
216c851ed54SJan Kara 
217c851ed54SJan Kara 	spin_lock(&journal->j_list_lock);
218c851ed54SJan Kara 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
21941617e1aSJan Kara 		if (!(jinode->i_flags & JI_WRITE_DATA))
22041617e1aSJan Kara 			continue;
221cb0d9d47SJan Kara 		jinode->i_flags |= JI_COMMIT_RUNNING;
222c851ed54SJan Kara 		spin_unlock(&journal->j_list_lock);
223342af94eSMauricio Faria de Oliveira 		/* submit the inode data buffers. */
224879c5e6bSTheodore Ts'o 		trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
225342af94eSMauricio Faria de Oliveira 		if (journal->j_submit_inode_data_buffers) {
226342af94eSMauricio Faria de Oliveira 			err = journal->j_submit_inode_data_buffers(jinode);
227c851ed54SJan Kara 			if (!ret)
228c851ed54SJan Kara 				ret = err;
229342af94eSMauricio Faria de Oliveira 		}
230c851ed54SJan Kara 		spin_lock(&journal->j_list_lock);
231c851ed54SJan Kara 		J_ASSERT(jinode->i_transaction == commit_transaction);
232cb0d9d47SJan Kara 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
233cb0d9d47SJan Kara 		smp_mb();
234c851ed54SJan Kara 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
235c851ed54SJan Kara 	}
236c851ed54SJan Kara 	spin_unlock(&journal->j_list_lock);
237c851ed54SJan Kara 	return ret;
238c851ed54SJan Kara }
239c851ed54SJan Kara 
jbd2_journal_finish_inode_data_buffers(struct jbd2_inode * jinode)240aa3c0c61SMauricio Faria de Oliveira int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
241aa3c0c61SMauricio Faria de Oliveira {
242aa3c0c61SMauricio Faria de Oliveira 	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
243aa3c0c61SMauricio Faria de Oliveira 
244aa3c0c61SMauricio Faria de Oliveira 	return filemap_fdatawait_range_keep_errors(mapping,
245aa3c0c61SMauricio Faria de Oliveira 						   jinode->i_dirty_start,
246aa3c0c61SMauricio Faria de Oliveira 						   jinode->i_dirty_end);
247aa3c0c61SMauricio Faria de Oliveira }
248aa3c0c61SMauricio Faria de Oliveira 
249c851ed54SJan Kara /*
250c851ed54SJan Kara  * Wait for data submitted for writeout, refile inodes to proper
251c851ed54SJan Kara  * transaction if needed.
252c851ed54SJan Kara  *
253c851ed54SJan Kara  */
journal_finish_inode_data_buffers(journal_t * journal,transaction_t * commit_transaction)254c851ed54SJan Kara static int journal_finish_inode_data_buffers(journal_t *journal,
255c851ed54SJan Kara 		transaction_t *commit_transaction)
256c851ed54SJan Kara {
257c851ed54SJan Kara 	struct jbd2_inode *jinode, *next_i;
258c851ed54SJan Kara 	int err, ret = 0;
259c851ed54SJan Kara 
260cd1aac32SAneesh Kumar K.V 	/* For locking, see the comment in journal_submit_data_buffers() */
261c851ed54SJan Kara 	spin_lock(&journal->j_list_lock);
262c851ed54SJan Kara 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
26341617e1aSJan Kara 		if (!(jinode->i_flags & JI_WAIT_DATA))
26441617e1aSJan Kara 			continue;
265cb0d9d47SJan Kara 		jinode->i_flags |= JI_COMMIT_RUNNING;
266c851ed54SJan Kara 		spin_unlock(&journal->j_list_lock);
267342af94eSMauricio Faria de Oliveira 		/* wait for the inode data buffers writeout. */
268342af94eSMauricio Faria de Oliveira 		if (journal->j_finish_inode_data_buffers) {
269342af94eSMauricio Faria de Oliveira 			err = journal->j_finish_inode_data_buffers(jinode);
270c851ed54SJan Kara 			if (!ret)
271c851ed54SJan Kara 				ret = err;
272342af94eSMauricio Faria de Oliveira 		}
2736c02757cSYe Bin 		cond_resched();
274c851ed54SJan Kara 		spin_lock(&journal->j_list_lock);
275cb0d9d47SJan Kara 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
276cb0d9d47SJan Kara 		smp_mb();
277c851ed54SJan Kara 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
278c851ed54SJan Kara 	}
279c851ed54SJan Kara 
280c851ed54SJan Kara 	/* Now refile inode to proper lists */
281c851ed54SJan Kara 	list_for_each_entry_safe(jinode, next_i,
282c851ed54SJan Kara 				 &commit_transaction->t_inode_list, i_list) {
283c851ed54SJan Kara 		list_del(&jinode->i_list);
284c851ed54SJan Kara 		if (jinode->i_next_transaction) {
285c851ed54SJan Kara 			jinode->i_transaction = jinode->i_next_transaction;
286c851ed54SJan Kara 			jinode->i_next_transaction = NULL;
287c851ed54SJan Kara 			list_add(&jinode->i_list,
288c851ed54SJan Kara 				&jinode->i_transaction->t_inode_list);
289c851ed54SJan Kara 		} else {
290c851ed54SJan Kara 			jinode->i_transaction = NULL;
2916ba0e7dcSRoss Zwisler 			jinode->i_dirty_start = 0;
2926ba0e7dcSRoss Zwisler 			jinode->i_dirty_end = 0;
293c851ed54SJan Kara 		}
294c851ed54SJan Kara 	}
295c851ed54SJan Kara 	spin_unlock(&journal->j_list_lock);
296c851ed54SJan Kara 
297c851ed54SJan Kara 	return ret;
298c851ed54SJan Kara }
299c851ed54SJan Kara 
jbd2_checksum_data(__u32 crc32_sum,struct buffer_head * bh)300818d276cSGirish Shilamkar static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
301818d276cSGirish Shilamkar {
302818d276cSGirish Shilamkar 	char *addr;
303818d276cSGirish Shilamkar 	__u32 checksum;
304818d276cSGirish Shilamkar 
305147d4a09SRitesh Harjani (IBM) 	addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
306147d4a09SRitesh Harjani (IBM) 	checksum = crc32_be(crc32_sum, addr, bh->b_size);
307147d4a09SRitesh Harjani (IBM) 	kunmap_local(addr);
308818d276cSGirish Shilamkar 
309818d276cSGirish Shilamkar 	return checksum;
310818d276cSGirish Shilamkar }
311818d276cSGirish Shilamkar 
write_tag_block(journal_t * j,journal_block_tag_t * tag,unsigned long long block)312db9ee220SDarrick J. Wong static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
31318eba7aaSMingming Cao 				   unsigned long long block)
314b517bea1SZach Brown {
315b517bea1SZach Brown 	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
31656316a0dSDarrick J. Wong 	if (jbd2_has_feature_64bit(j))
317b517bea1SZach Brown 		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
318b517bea1SZach Brown }
319b517bea1SZach Brown 
jbd2_block_tag_csum_set(journal_t * j,journal_block_tag_t * tag,struct buffer_head * bh,__u32 sequence)320c3900875SDarrick J. Wong static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
321c3900875SDarrick J. Wong 				    struct buffer_head *bh, __u32 sequence)
322c3900875SDarrick J. Wong {
323db9ee220SDarrick J. Wong 	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
324c3900875SDarrick J. Wong 	__u8 *addr;
325eee06c56SDarrick J. Wong 	__u32 csum32;
32618a6ea1eSDarrick J. Wong 	__be32 seq;
327c3900875SDarrick J. Wong 
328db9ee220SDarrick J. Wong 	if (!jbd2_journal_has_csum_v2or3(j))
329c3900875SDarrick J. Wong 		return;
330c3900875SDarrick J. Wong 
33118a6ea1eSDarrick J. Wong 	seq = cpu_to_be32(sequence);
332147d4a09SRitesh Harjani (IBM) 	addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
33318a6ea1eSDarrick J. Wong 	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
334147d4a09SRitesh Harjani (IBM) 	csum32 = jbd2_chksum(j, csum32, addr, bh->b_size);
335147d4a09SRitesh Harjani (IBM) 	kunmap_local(addr);
336c3900875SDarrick J. Wong 
33756316a0dSDarrick J. Wong 	if (jbd2_has_feature_csum3(j))
338db9ee220SDarrick J. Wong 		tag3->t_checksum = cpu_to_be32(csum32);
339db9ee220SDarrick J. Wong 	else
340eee06c56SDarrick J. Wong 		tag->t_checksum = cpu_to_be16(csum32);
341c3900875SDarrick J. Wong }
342470decc6SDave Kleikamp /*
343f7f4bccbSMingming Cao  * jbd2_journal_commit_transaction
344470decc6SDave Kleikamp  *
345470decc6SDave Kleikamp  * The primary function for committing a transaction to the log.  This
346470decc6SDave Kleikamp  * function is called by the journal thread to begin a complete commit.
347470decc6SDave Kleikamp  */
jbd2_journal_commit_transaction(journal_t * journal)348f7f4bccbSMingming Cao void jbd2_journal_commit_transaction(journal_t *journal)
349470decc6SDave Kleikamp {
3508e85fb3fSJohann Lombardi 	struct transaction_stats_s stats;
351470decc6SDave Kleikamp 	transaction_t *commit_transaction;
352e5a120aeSJan Kara 	struct journal_head *jh;
353e5a120aeSJan Kara 	struct buffer_head *descriptor;
354470decc6SDave Kleikamp 	struct buffer_head **wbuf = journal->j_wbuf;
355470decc6SDave Kleikamp 	int bufs;
356abe48a52SKemeng Shi 	int escape;
357470decc6SDave Kleikamp 	int err;
35818eba7aaSMingming Cao 	unsigned long long blocknr;
359e07f7183SJosef Bacik 	ktime_t start_time;
360e07f7183SJosef Bacik 	u64 commit_time;
361470decc6SDave Kleikamp 	char *tagp = NULL;
362470decc6SDave Kleikamp 	journal_block_tag_t *tag = NULL;
363470decc6SDave Kleikamp 	int space_left = 0;
364470decc6SDave Kleikamp 	int first_tag = 0;
365470decc6SDave Kleikamp 	int tag_flag;
366794446c6SDmitry Monakhov 	int i;
367b517bea1SZach Brown 	int tag_bytes = journal_tag_bytes(journal);
368818d276cSGirish Shilamkar 	struct buffer_head *cbh = NULL; /* For transactional checksums */
369818d276cSGirish Shilamkar 	__u32 crc32_sum = ~0;
37082f04ab4SJens Axboe 	struct blk_plug plug;
3713339578fSJan Kara 	/* Tail of the journal */
3723339578fSJan Kara 	unsigned long first_block;
3733339578fSJan Kara 	tid_t first_tid;
3743339578fSJan Kara 	int update_tail;
3753caa487fSDarrick J. Wong 	int csum_size = 0;
376f5113effSJan Kara 	LIST_HEAD(io_bufs);
377e5a120aeSJan Kara 	LIST_HEAD(log_bufs);
3783caa487fSDarrick J. Wong 
379db9ee220SDarrick J. Wong 	if (jbd2_journal_has_csum_v2or3(journal))
3803caa487fSDarrick J. Wong 		csum_size = sizeof(struct jbd2_journal_block_tail);
381470decc6SDave Kleikamp 
382470decc6SDave Kleikamp 	/*
383470decc6SDave Kleikamp 	 * First job: lock down the current transaction and wait for
384470decc6SDave Kleikamp 	 * all outstanding updates to complete.
385470decc6SDave Kleikamp 	 */
386470decc6SDave Kleikamp 
387f7f4bccbSMingming Cao 	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
388f7f4bccbSMingming Cao 	if (journal->j_flags & JBD2_FLUSHED) {
389cb3b3bf2SJan Kara 		jbd2_debug(3, "super block updated\n");
3906fa7aa50STejun Heo 		mutex_lock_io(&journal->j_checkpoint_mutex);
39179feb521SJan Kara 		/*
39279feb521SJan Kara 		 * We hold j_checkpoint_mutex so tail cannot change under us.
39379feb521SJan Kara 		 * We don't need any special data guarantees for writing sb
39479feb521SJan Kara 		 * since journal is empty and it is ok for write to be
39579feb521SJan Kara 		 * flushed only with transaction commit.
39679feb521SJan Kara 		 */
39779feb521SJan Kara 		jbd2_journal_update_sb_log_tail(journal,
39879feb521SJan Kara 						journal->j_tail_sequence,
3996a3afb6aSZhang Yi 						journal->j_tail, 0);
400a78bb11dSJan Kara 		mutex_unlock(&journal->j_checkpoint_mutex);
401470decc6SDave Kleikamp 	} else {
402cb3b3bf2SJan Kara 		jbd2_debug(3, "superblock not updated\n");
403470decc6SDave Kleikamp 	}
404470decc6SDave Kleikamp 
405470decc6SDave Kleikamp 	J_ASSERT(journal->j_running_transaction != NULL);
406470decc6SDave Kleikamp 	J_ASSERT(journal->j_committing_transaction == NULL);
407470decc6SDave Kleikamp 
408ff780b91SHarshad Shirwadkar 	write_lock(&journal->j_state_lock);
409ff780b91SHarshad Shirwadkar 	journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
410ff780b91SHarshad Shirwadkar 	while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
411ff780b91SHarshad Shirwadkar 		DEFINE_WAIT(wait);
412ff780b91SHarshad Shirwadkar 
413ff780b91SHarshad Shirwadkar 		prepare_to_wait(&journal->j_fc_wait, &wait,
414ff780b91SHarshad Shirwadkar 				TASK_UNINTERRUPTIBLE);
415ff780b91SHarshad Shirwadkar 		write_unlock(&journal->j_state_lock);
416ff780b91SHarshad Shirwadkar 		schedule();
417ff780b91SHarshad Shirwadkar 		write_lock(&journal->j_state_lock);
418ff780b91SHarshad Shirwadkar 		finish_wait(&journal->j_fc_wait, &wait);
419cc80586aSHarshad Shirwadkar 		/*
420cc80586aSHarshad Shirwadkar 		 * TODO: by blocking fast commits here, we are increasing
421cc80586aSHarshad Shirwadkar 		 * fsync() latency slightly. Strictly speaking, we don't need
422cc80586aSHarshad Shirwadkar 		 * to block fast commits until the transaction enters T_FLUSH
423cc80586aSHarshad Shirwadkar 		 * state. So an optimization is possible where we block new fast
424cc80586aSHarshad Shirwadkar 		 * commits here and wait for existing ones to complete
425cc80586aSHarshad Shirwadkar 		 * just before we enter T_FLUSH. That way, the existing fast
426cc80586aSHarshad Shirwadkar 		 * commits and this full commit can proceed parallely.
427cc80586aSHarshad Shirwadkar 		 */
428ff780b91SHarshad Shirwadkar 	}
429ff780b91SHarshad Shirwadkar 	write_unlock(&journal->j_state_lock);
430ff780b91SHarshad Shirwadkar 
431470decc6SDave Kleikamp 	commit_transaction = journal->j_running_transaction;
432470decc6SDave Kleikamp 
433879c5e6bSTheodore Ts'o 	trace_jbd2_start_commit(journal, commit_transaction);
434cb3b3bf2SJan Kara 	jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
435470decc6SDave Kleikamp 			commit_transaction->t_tid);
436470decc6SDave Kleikamp 
437a931da6aSTheodore Ts'o 	write_lock(&journal->j_state_lock);
438ff780b91SHarshad Shirwadkar 	journal->j_fc_off = 0;
4393ca841c1SPaul Gortmaker 	J_ASSERT(commit_transaction->t_state == T_RUNNING);
440470decc6SDave Kleikamp 	commit_transaction->t_state = T_LOCKED;
441470decc6SDave Kleikamp 
442879c5e6bSTheodore Ts'o 	trace_jbd2_commit_locking(journal, commit_transaction);
443bf699327STheodore Ts'o 	stats.run.rs_wait = commit_transaction->t_max_wait;
4449fff24aaSTheodore Ts'o 	stats.run.rs_request_delay = 0;
445bf699327STheodore Ts'o 	stats.run.rs_locked = jiffies;
4469fff24aaSTheodore Ts'o 	if (commit_transaction->t_requested)
4479fff24aaSTheodore Ts'o 		stats.run.rs_request_delay =
4489fff24aaSTheodore Ts'o 			jbd2_time_diff(commit_transaction->t_requested,
4499fff24aaSTheodore Ts'o 				       stats.run.rs_locked);
450bf699327STheodore Ts'o 	stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
451bf699327STheodore Ts'o 					      stats.run.rs_locked);
4528e85fb3fSJohann Lombardi 
4534f981868SRitesh Harjani 	// waits for any t_updates to finish
4544f981868SRitesh Harjani 	jbd2_journal_wait_updates(journal);
455470decc6SDave Kleikamp 
45696f1e097SJan Kara 	commit_transaction->t_state = T_SWITCH;
457470decc6SDave Kleikamp 
458a51dca9cSTheodore Ts'o 	J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
459470decc6SDave Kleikamp 			journal->j_max_transaction_buffers);
460470decc6SDave Kleikamp 
461470decc6SDave Kleikamp 	/*
462470decc6SDave Kleikamp 	 * First thing we are allowed to do is to discard any remaining
463470decc6SDave Kleikamp 	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
464470decc6SDave Kleikamp 	 * that there are no such buffers: if a large filesystem
465470decc6SDave Kleikamp 	 * operation like a truncate needs to split itself over multiple
466f7f4bccbSMingming Cao 	 * transactions, then it may try to do a jbd2_journal_restart() while
467470decc6SDave Kleikamp 	 * there are still BJ_Reserved buffers outstanding.  These must
468470decc6SDave Kleikamp 	 * be released cleanly from the current transaction.
469470decc6SDave Kleikamp 	 *
470470decc6SDave Kleikamp 	 * In this case, the filesystem must still reserve write access
471470decc6SDave Kleikamp 	 * again before modifying the buffer in the new transaction, but
472470decc6SDave Kleikamp 	 * we do not require it to remember exactly which old buffers it
473470decc6SDave Kleikamp 	 * has reserved.  This is consistent with the existing behaviour
474f7f4bccbSMingming Cao 	 * that multiple jbd2_journal_get_write_access() calls to the same
47525985edcSLucas De Marchi 	 * buffer are perfectly permissible.
47623e3d7f7SYe Bin 	 * We use journal->j_state_lock here to serialize processing of
47723e3d7f7SYe Bin 	 * t_reserved_list with eviction of buffers from journal_unmap_buffer().
478470decc6SDave Kleikamp 	 */
479470decc6SDave Kleikamp 	while (commit_transaction->t_reserved_list) {
480470decc6SDave Kleikamp 		jh = commit_transaction->t_reserved_list;
481470decc6SDave Kleikamp 		JBUFFER_TRACE(jh, "reserved, unused: refile");
482470decc6SDave Kleikamp 		/*
483f7f4bccbSMingming Cao 		 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
484470decc6SDave Kleikamp 		 * leave undo-committed data.
485470decc6SDave Kleikamp 		 */
486470decc6SDave Kleikamp 		if (jh->b_committed_data) {
487470decc6SDave Kleikamp 			struct buffer_head *bh = jh2bh(jh);
488470decc6SDave Kleikamp 
48946417064SThomas Gleixner 			spin_lock(&jh->b_state_lock);
490af1e76d6SMingming Cao 			jbd2_free(jh->b_committed_data, bh->b_size);
491470decc6SDave Kleikamp 			jh->b_committed_data = NULL;
49246417064SThomas Gleixner 			spin_unlock(&jh->b_state_lock);
493470decc6SDave Kleikamp 		}
494f7f4bccbSMingming Cao 		jbd2_journal_refile_buffer(journal, jh);
495470decc6SDave Kleikamp 	}
496470decc6SDave Kleikamp 
49723e3d7f7SYe Bin 	write_unlock(&journal->j_state_lock);
498470decc6SDave Kleikamp 	/*
499470decc6SDave Kleikamp 	 * Now try to drop any written-back buffers from the journal's
500470decc6SDave Kleikamp 	 * checkpoint lists.  We do this *before* commit because it potentially
501470decc6SDave Kleikamp 	 * frees some memory
502470decc6SDave Kleikamp 	 */
503470decc6SDave Kleikamp 	spin_lock(&journal->j_list_lock);
50426770a71SYe Bin 	__jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
505470decc6SDave Kleikamp 	spin_unlock(&journal->j_list_lock);
506470decc6SDave Kleikamp 
507cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 1\n");
508470decc6SDave Kleikamp 
509470decc6SDave Kleikamp 	/*
5101ba37268SYongqiang Yang 	 * Clear revoked flag to reflect there is no revoked buffers
5111ba37268SYongqiang Yang 	 * in the next transaction which is going to be started.
5121ba37268SYongqiang Yang 	 */
5131ba37268SYongqiang Yang 	jbd2_clear_buffer_revoked_flags(journal);
5141ba37268SYongqiang Yang 
5151ba37268SYongqiang Yang 	/*
516470decc6SDave Kleikamp 	 * Switch to a new revoke table.
517470decc6SDave Kleikamp 	 */
518f7f4bccbSMingming Cao 	jbd2_journal_switch_revoke_table(journal);
519470decc6SDave Kleikamp 
520a89573ceSZhang Yi 	write_lock(&journal->j_state_lock);
5218f7d89f3SJan Kara 	/*
5228f7d89f3SJan Kara 	 * Reserved credits cannot be claimed anymore, free them
5238f7d89f3SJan Kara 	 */
5248f7d89f3SJan Kara 	atomic_sub(atomic_read(&journal->j_reserved_credits),
5258f7d89f3SJan Kara 		   &commit_transaction->t_outstanding_credits);
5268f7d89f3SJan Kara 
527879c5e6bSTheodore Ts'o 	trace_jbd2_commit_flushing(journal, commit_transaction);
528bf699327STheodore Ts'o 	stats.run.rs_flushing = jiffies;
529bf699327STheodore Ts'o 	stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
530bf699327STheodore Ts'o 					     stats.run.rs_flushing);
5318e85fb3fSJohann Lombardi 
532470decc6SDave Kleikamp 	commit_transaction->t_state = T_FLUSH;
533470decc6SDave Kleikamp 	journal->j_committing_transaction = commit_transaction;
534470decc6SDave Kleikamp 	journal->j_running_transaction = NULL;
535e07f7183SJosef Bacik 	start_time = ktime_get();
536470decc6SDave Kleikamp 	commit_transaction->t_log_start = journal->j_head;
53734fc8768SAndrew Perepechko 	wake_up_all(&journal->j_wait_transaction_locked);
538a931da6aSTheodore Ts'o 	write_unlock(&journal->j_state_lock);
539470decc6SDave Kleikamp 
540cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 2a\n");
541470decc6SDave Kleikamp 
542470decc6SDave Kleikamp 	/*
543470decc6SDave Kleikamp 	 * Now start flushing things to disk, in the order they appear
544470decc6SDave Kleikamp 	 * on the transaction lists.  Data blocks go first.
545470decc6SDave Kleikamp 	 */
546cd1aac32SAneesh Kumar K.V 	err = journal_submit_data_buffers(journal, commit_transaction);
547c851ed54SJan Kara 	if (err)
548c851ed54SJan Kara 		jbd2_journal_abort(journal, err);
549470decc6SDave Kleikamp 
55082f04ab4SJens Axboe 	blk_start_plug(&plug);
5519bcf976cSJan Kara 	jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
552470decc6SDave Kleikamp 
553cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 2b\n");
554470decc6SDave Kleikamp 
555470decc6SDave Kleikamp 	/*
556470decc6SDave Kleikamp 	 * Way to go: we have now written out all of the data for a
557470decc6SDave Kleikamp 	 * transaction!  Now comes the tricky part: we need to write out
558470decc6SDave Kleikamp 	 * metadata.  Loop over the transaction's entire buffer list:
559470decc6SDave Kleikamp 	 */
560a931da6aSTheodore Ts'o 	write_lock(&journal->j_state_lock);
561470decc6SDave Kleikamp 	commit_transaction->t_state = T_COMMIT;
562a931da6aSTheodore Ts'o 	write_unlock(&journal->j_state_lock);
563470decc6SDave Kleikamp 
564879c5e6bSTheodore Ts'o 	trace_jbd2_commit_logging(journal, commit_transaction);
565bf699327STheodore Ts'o 	stats.run.rs_logging = jiffies;
566bf699327STheodore Ts'o 	stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
567bf699327STheodore Ts'o 					       stats.run.rs_logging);
5689f356e5aSJan Kara 	stats.run.rs_blocks = commit_transaction->t_nr_buffers;
569bf699327STheodore Ts'o 	stats.run.rs_blocks_logged = 0;
5708e85fb3fSJohann Lombardi 
5711dfc3220SJosef Bacik 	J_ASSERT(commit_transaction->t_nr_buffers <=
572a51dca9cSTheodore Ts'o 		 atomic_read(&commit_transaction->t_outstanding_credits));
5731dfc3220SJosef Bacik 
574470decc6SDave Kleikamp 	bufs = 0;
575e5a120aeSJan Kara 	descriptor = NULL;
576470decc6SDave Kleikamp 	while (commit_transaction->t_buffers) {
577470decc6SDave Kleikamp 
578470decc6SDave Kleikamp 		/* Find the next buffer to be journaled... */
579470decc6SDave Kleikamp 
580470decc6SDave Kleikamp 		jh = commit_transaction->t_buffers;
581470decc6SDave Kleikamp 
582470decc6SDave Kleikamp 		/* If we're in abort mode, we just un-journal the buffer and
5837ad7445fSHidehiro Kawai 		   release it. */
584470decc6SDave Kleikamp 
585470decc6SDave Kleikamp 		if (is_journal_aborted(journal)) {
5867ad7445fSHidehiro Kawai 			clear_buffer_jbddirty(jh2bh(jh));
587470decc6SDave Kleikamp 			JBUFFER_TRACE(jh, "journal is aborting: refile");
588e06c8227SJoel Becker 			jbd2_buffer_abort_trigger(jh,
589e06c8227SJoel Becker 						  jh->b_frozen_data ?
590e06c8227SJoel Becker 						  jh->b_frozen_triggers :
591e06c8227SJoel Becker 						  jh->b_triggers);
592f7f4bccbSMingming Cao 			jbd2_journal_refile_buffer(journal, jh);
593470decc6SDave Kleikamp 			/* If that was the last one, we need to clean up
594470decc6SDave Kleikamp 			 * any descriptor buffers which may have been
595470decc6SDave Kleikamp 			 * already allocated, even if we are now
596470decc6SDave Kleikamp 			 * aborting. */
597470decc6SDave Kleikamp 			if (!commit_transaction->t_buffers)
598470decc6SDave Kleikamp 				goto start_journal_io;
599470decc6SDave Kleikamp 			continue;
600470decc6SDave Kleikamp 		}
601470decc6SDave Kleikamp 
602470decc6SDave Kleikamp 		/* Make sure we have a descriptor block in which to
603470decc6SDave Kleikamp 		   record the metadata buffer. */
604470decc6SDave Kleikamp 
605470decc6SDave Kleikamp 		if (!descriptor) {
606470decc6SDave Kleikamp 			J_ASSERT (bufs == 0);
607470decc6SDave Kleikamp 
608cb3b3bf2SJan Kara 			jbd2_debug(4, "JBD2: get descriptor\n");
609470decc6SDave Kleikamp 
61032ab6715SJan Kara 			descriptor = jbd2_journal_get_descriptor_buffer(
61132ab6715SJan Kara 							commit_transaction,
61232ab6715SJan Kara 							JBD2_DESCRIPTOR_BLOCK);
613470decc6SDave Kleikamp 			if (!descriptor) {
614a7fa2bafSJan Kara 				jbd2_journal_abort(journal, -EIO);
615470decc6SDave Kleikamp 				continue;
616470decc6SDave Kleikamp 			}
617470decc6SDave Kleikamp 
618cb3b3bf2SJan Kara 			jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
619e5a120aeSJan Kara 				(unsigned long long)descriptor->b_blocknr,
620e5a120aeSJan Kara 				descriptor->b_data);
621e5a120aeSJan Kara 			tagp = &descriptor->b_data[sizeof(journal_header_t)];
622e5a120aeSJan Kara 			space_left = descriptor->b_size -
623e5a120aeSJan Kara 						sizeof(journal_header_t);
624470decc6SDave Kleikamp 			first_tag = 1;
625e5a120aeSJan Kara 			set_buffer_jwrite(descriptor);
626e5a120aeSJan Kara 			set_buffer_dirty(descriptor);
627e5a120aeSJan Kara 			wbuf[bufs++] = descriptor;
628470decc6SDave Kleikamp 
629470decc6SDave Kleikamp 			/* Record it so that we can wait for IO
630470decc6SDave Kleikamp                            completion later */
631e5a120aeSJan Kara 			BUFFER_TRACE(descriptor, "ph3: file as descriptor");
632e5a120aeSJan Kara 			jbd2_file_log_bh(&log_bufs, descriptor);
633470decc6SDave Kleikamp 		}
634470decc6SDave Kleikamp 
635470decc6SDave Kleikamp 		/* Where is the buffer to be written? */
636470decc6SDave Kleikamp 
637f7f4bccbSMingming Cao 		err = jbd2_journal_next_log_block(journal, &blocknr);
638470decc6SDave Kleikamp 		/* If the block mapping failed, just abandon the buffer
639470decc6SDave Kleikamp 		   and repeat this loop: we'll fall into the
640470decc6SDave Kleikamp 		   refile-on-abort condition above. */
641470decc6SDave Kleikamp 		if (err) {
642a7fa2bafSJan Kara 			jbd2_journal_abort(journal, err);
643470decc6SDave Kleikamp 			continue;
644470decc6SDave Kleikamp 		}
645470decc6SDave Kleikamp 
646470decc6SDave Kleikamp 		/*
647470decc6SDave Kleikamp 		 * start_this_handle() uses t_outstanding_credits to determine
6480db45889SJan Kara 		 * the free space in the log.
649470decc6SDave Kleikamp 		 */
650a51dca9cSTheodore Ts'o 		atomic_dec(&commit_transaction->t_outstanding_credits);
651470decc6SDave Kleikamp 
652470decc6SDave Kleikamp 		/* Bump b_count to prevent truncate from stumbling over
653470decc6SDave Kleikamp                    the shadowed buffer!  @@@ This can go if we ever get
654f5113effSJan Kara                    rid of the shadow pairing of buffers. */
655470decc6SDave Kleikamp 		atomic_inc(&jh2bh(jh)->b_count);
656470decc6SDave Kleikamp 
657470decc6SDave Kleikamp 		/*
658f5113effSJan Kara 		 * Make a temporary IO buffer with which to write it out
659f5113effSJan Kara 		 * (this will requeue the metadata buffer to BJ_Shadow).
660470decc6SDave Kleikamp 		 */
661f5113effSJan Kara 		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
662470decc6SDave Kleikamp 		JBUFFER_TRACE(jh, "ph3: write metadata");
663abe48a52SKemeng Shi 		escape = jbd2_journal_write_metadata_buffer(commit_transaction,
664f5113effSJan Kara 						jh, &wbuf[bufs], blocknr);
665f5113effSJan Kara 		jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
666470decc6SDave Kleikamp 
667470decc6SDave Kleikamp 		/* Record the new block's tag in the current descriptor
668470decc6SDave Kleikamp                    buffer */
669470decc6SDave Kleikamp 
670470decc6SDave Kleikamp 		tag_flag = 0;
671abe48a52SKemeng Shi 		if (escape)
672f7f4bccbSMingming Cao 			tag_flag |= JBD2_FLAG_ESCAPE;
673470decc6SDave Kleikamp 		if (!first_tag)
674f7f4bccbSMingming Cao 			tag_flag |= JBD2_FLAG_SAME_UUID;
675470decc6SDave Kleikamp 
676470decc6SDave Kleikamp 		tag = (journal_block_tag_t *) tagp;
677db9ee220SDarrick J. Wong 		write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
6788f888ef8SDarrick J. Wong 		tag->t_flags = cpu_to_be16(tag_flag);
679f5113effSJan Kara 		jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
680c3900875SDarrick J. Wong 					commit_transaction->t_tid);
681b517bea1SZach Brown 		tagp += tag_bytes;
682b517bea1SZach Brown 		space_left -= tag_bytes;
683f5113effSJan Kara 		bufs++;
684470decc6SDave Kleikamp 
685470decc6SDave Kleikamp 		if (first_tag) {
686470decc6SDave Kleikamp 			memcpy (tagp, journal->j_uuid, 16);
687470decc6SDave Kleikamp 			tagp += 16;
688470decc6SDave Kleikamp 			space_left -= 16;
689470decc6SDave Kleikamp 			first_tag = 0;
690470decc6SDave Kleikamp 		}
691470decc6SDave Kleikamp 
692470decc6SDave Kleikamp 		/* If there's no more to do, or if the descriptor is full,
693470decc6SDave Kleikamp 		   let the IO rip! */
694470decc6SDave Kleikamp 
695470decc6SDave Kleikamp 		if (bufs == journal->j_wbufsize ||
696470decc6SDave Kleikamp 		    commit_transaction->t_buffers == NULL ||
6973caa487fSDarrick J. Wong 		    space_left < tag_bytes + 16 + csum_size) {
698470decc6SDave Kleikamp 
699cb3b3bf2SJan Kara 			jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
700470decc6SDave Kleikamp 
701470decc6SDave Kleikamp 			/* Write an end-of-descriptor marker before
702470decc6SDave Kleikamp                            submitting the IOs.  "tag" still points to
703470decc6SDave Kleikamp                            the last tag we set up. */
704470decc6SDave Kleikamp 
7058f888ef8SDarrick J. Wong 			tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
706470decc6SDave Kleikamp start_journal_io:
7076e876c3dSluojiajun 			if (descriptor)
7086e876c3dSluojiajun 				jbd2_descriptor_block_csum_set(journal,
7096e876c3dSluojiajun 							descriptor);
7106e876c3dSluojiajun 
711470decc6SDave Kleikamp 			for (i = 0; i < bufs; i++) {
712470decc6SDave Kleikamp 				struct buffer_head *bh = wbuf[i];
7136a3afb6aSZhang Yi 
714818d276cSGirish Shilamkar 				/*
715818d276cSGirish Shilamkar 				 * Compute checksum.
716818d276cSGirish Shilamkar 				 */
71756316a0dSDarrick J. Wong 				if (jbd2_has_feature_checksum(journal)) {
718818d276cSGirish Shilamkar 					crc32_sum =
719818d276cSGirish Shilamkar 					    jbd2_checksum_data(crc32_sum, bh);
720818d276cSGirish Shilamkar 				}
721818d276cSGirish Shilamkar 
722470decc6SDave Kleikamp 				lock_buffer(bh);
723470decc6SDave Kleikamp 				clear_buffer_dirty(bh);
724470decc6SDave Kleikamp 				set_buffer_uptodate(bh);
725470decc6SDave Kleikamp 				bh->b_end_io = journal_end_buffer_io_sync;
7266a3afb6aSZhang Yi 				submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS,
7276a3afb6aSZhang Yi 					  bh);
728470decc6SDave Kleikamp 			}
729470decc6SDave Kleikamp 			cond_resched();
730470decc6SDave Kleikamp 
731470decc6SDave Kleikamp 			/* Force a new descriptor to be generated next
732470decc6SDave Kleikamp                            time round the loop. */
733470decc6SDave Kleikamp 			descriptor = NULL;
734470decc6SDave Kleikamp 			bufs = 0;
735470decc6SDave Kleikamp 		}
736470decc6SDave Kleikamp 	}
737470decc6SDave Kleikamp 
738c851ed54SJan Kara 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
739e9e34f4eSHidehiro Kawai 	if (err) {
740e9e34f4eSHidehiro Kawai 		printk(KERN_WARNING
7416e969ef3SBaokun Li 			"JBD2: Detected IO errors %d while flushing file data on %s\n",
7426e969ef3SBaokun Li 			err, journal->j_devname);
743e9e34f4eSHidehiro Kawai 		err = 0;
744e9e34f4eSHidehiro Kawai 	}
745c851ed54SJan Kara 
7463339578fSJan Kara 	/*
7473339578fSJan Kara 	 * Get current oldest transaction in the log before we issue flush
7483339578fSJan Kara 	 * to the filesystem device. After the flush we can be sure that
7493339578fSJan Kara 	 * blocks of all older transactions are checkpointed to persistent
7503339578fSJan Kara 	 * storage and we will be safe to update journal start in the
7513339578fSJan Kara 	 * superblock with the numbers we get here.
7523339578fSJan Kara 	 */
7533339578fSJan Kara 	update_tail =
7543339578fSJan Kara 		jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
7553339578fSJan Kara 
756bbd2be36SJan Kara 	write_lock(&journal->j_state_lock);
7573339578fSJan Kara 	if (update_tail) {
7583339578fSJan Kara 		long freed = first_block - journal->j_tail;
7593339578fSJan Kara 
7603339578fSJan Kara 		if (first_block < journal->j_tail)
7613339578fSJan Kara 			freed += journal->j_last - journal->j_first;
7623339578fSJan Kara 		/* Update tail only if we free significant amount of space */
7634aa99c71SJan Kara 		if (freed < journal->j_max_transaction_buffers)
7643339578fSJan Kara 			update_tail = 0;
7653339578fSJan Kara 	}
766bbd2be36SJan Kara 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
767bbd2be36SJan Kara 	commit_transaction->t_state = T_COMMIT_DFLUSH;
768bbd2be36SJan Kara 	write_unlock(&journal->j_state_lock);
7693339578fSJan Kara 
770818d276cSGirish Shilamkar 	/*
771818d276cSGirish Shilamkar 	 * If the journal is not located on the file system device,
772818d276cSGirish Shilamkar 	 * then we must flush the file system device before we issue
773a0851ea9SZhang Yi 	 * the commit record and update the journal tail sequence.
774818d276cSGirish Shilamkar 	 */
775a0851ea9SZhang Yi 	if ((commit_transaction->t_need_data_flush || update_tail) &&
776818d276cSGirish Shilamkar 	    (journal->j_fs_dev != journal->j_dev) &&
777818d276cSGirish Shilamkar 	    (journal->j_flags & JBD2_BARRIER))
778c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(journal->j_fs_dev);
779818d276cSGirish Shilamkar 
780818d276cSGirish Shilamkar 	/* Done it all: now write the commit record asynchronously. */
78156316a0dSDarrick J. Wong 	if (jbd2_has_feature_async_commit(journal)) {
782818d276cSGirish Shilamkar 		err = journal_submit_commit_record(journal, commit_transaction,
783470decc6SDave Kleikamp 						 &cbh, crc32_sum);
784470decc6SDave Kleikamp 		if (err)
785d0a186e0Szhangyi (F) 			jbd2_journal_abort(journal, err);
786470decc6SDave Kleikamp 	}
787470decc6SDave Kleikamp 
78882f04ab4SJens Axboe 	blk_finish_plug(&plug);
78982f04ab4SJens Axboe 
790470decc6SDave Kleikamp 	/* Lo and behold: we have just managed to send a transaction to
791470decc6SDave Kleikamp            the log.  Before we can commit it, wait for the IO so far to
792470decc6SDave Kleikamp            complete.  Control buffers being written are on the
793470decc6SDave Kleikamp            transaction's t_log_list queue, and metadata buffers are on
794f5113effSJan Kara            the io_bufs list.
795470decc6SDave Kleikamp 
796470decc6SDave Kleikamp 	   Wait for the buffers in reverse order.  That way we are
797470decc6SDave Kleikamp 	   less likely to be woken up until all IOs have completed, and
798470decc6SDave Kleikamp 	   so we incur less scheduling load.
799470decc6SDave Kleikamp 	*/
800470decc6SDave Kleikamp 
801cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 3\n");
802470decc6SDave Kleikamp 
803f5113effSJan Kara 	while (!list_empty(&io_bufs)) {
804f5113effSJan Kara 		struct buffer_head *bh = list_entry(io_bufs.prev,
805f5113effSJan Kara 						    struct buffer_head,
806f5113effSJan Kara 						    b_assoc_buffers);
807470decc6SDave Kleikamp 
808470decc6SDave Kleikamp 		wait_on_buffer(bh);
809f5113effSJan Kara 		cond_resched();
810470decc6SDave Kleikamp 
811470decc6SDave Kleikamp 		if (unlikely(!buffer_uptodate(bh)))
812470decc6SDave Kleikamp 			err = -EIO;
813f5113effSJan Kara 		jbd2_unfile_log_bh(bh);
814015c6033SJan Kara 		stats.run.rs_blocks_logged++;
815470decc6SDave Kleikamp 
816470decc6SDave Kleikamp 		/*
817f5113effSJan Kara 		 * The list contains temporary buffer heads created by
818f5113effSJan Kara 		 * jbd2_journal_write_metadata_buffer().
819470decc6SDave Kleikamp 		 */
820470decc6SDave Kleikamp 		BUFFER_TRACE(bh, "dumping temporary bh");
821470decc6SDave Kleikamp 		__brelse(bh);
822470decc6SDave Kleikamp 		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
823470decc6SDave Kleikamp 		free_buffer_head(bh);
824470decc6SDave Kleikamp 
825f5113effSJan Kara 		/* We also have to refile the corresponding shadowed buffer */
826470decc6SDave Kleikamp 		jh = commit_transaction->t_shadow_list->b_tprev;
827470decc6SDave Kleikamp 		bh = jh2bh(jh);
828f5113effSJan Kara 		clear_buffer_jwrite(bh);
829470decc6SDave Kleikamp 		J_ASSERT_BH(bh, buffer_jbddirty(bh));
830b34090e5SJan Kara 		J_ASSERT_BH(bh, !buffer_shadow(bh));
831470decc6SDave Kleikamp 
832470decc6SDave Kleikamp 		/* The metadata is now released for reuse, but we need
833470decc6SDave Kleikamp                    to remember it against this transaction so that when
834470decc6SDave Kleikamp                    we finally commit, we can do any checkpointing
835470decc6SDave Kleikamp                    required. */
836470decc6SDave Kleikamp 		JBUFFER_TRACE(jh, "file as BJ_Forget");
837f7f4bccbSMingming Cao 		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
838470decc6SDave Kleikamp 		JBUFFER_TRACE(jh, "brelse shadowed buffer");
839470decc6SDave Kleikamp 		__brelse(bh);
840470decc6SDave Kleikamp 	}
841470decc6SDave Kleikamp 
842470decc6SDave Kleikamp 	J_ASSERT (commit_transaction->t_shadow_list == NULL);
843470decc6SDave Kleikamp 
844cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 4\n");
845470decc6SDave Kleikamp 
846470decc6SDave Kleikamp 	/* Here we wait for the revoke record and descriptor record buffers */
847e5a120aeSJan Kara 	while (!list_empty(&log_bufs)) {
848470decc6SDave Kleikamp 		struct buffer_head *bh;
849470decc6SDave Kleikamp 
850e5a120aeSJan Kara 		bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
851470decc6SDave Kleikamp 		wait_on_buffer(bh);
852e5a120aeSJan Kara 		cond_resched();
853470decc6SDave Kleikamp 
854470decc6SDave Kleikamp 		if (unlikely(!buffer_uptodate(bh)))
855470decc6SDave Kleikamp 			err = -EIO;
856470decc6SDave Kleikamp 
857470decc6SDave Kleikamp 		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
858470decc6SDave Kleikamp 		clear_buffer_jwrite(bh);
859e5a120aeSJan Kara 		jbd2_unfile_log_bh(bh);
860015c6033SJan Kara 		stats.run.rs_blocks_logged++;
861470decc6SDave Kleikamp 		__brelse(bh);		/* One for getblk */
862470decc6SDave Kleikamp 		/* AKPM: bforget here */
863470decc6SDave Kleikamp 	}
864470decc6SDave Kleikamp 
86577e841deSHidehiro Kawai 	if (err)
86677e841deSHidehiro Kawai 		jbd2_journal_abort(journal, err);
86777e841deSHidehiro Kawai 
868cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 5\n");
869bbd2be36SJan Kara 	write_lock(&journal->j_state_lock);
870bbd2be36SJan Kara 	J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
871bbd2be36SJan Kara 	commit_transaction->t_state = T_COMMIT_JFLUSH;
872bbd2be36SJan Kara 	write_unlock(&journal->j_state_lock);
873470decc6SDave Kleikamp 
87456316a0dSDarrick J. Wong 	if (!jbd2_has_feature_async_commit(journal)) {
875818d276cSGirish Shilamkar 		err = journal_submit_commit_record(journal, commit_transaction,
876818d276cSGirish Shilamkar 						&cbh, crc32_sum);
877818d276cSGirish Shilamkar 		if (err)
878d0a186e0Szhangyi (F) 			jbd2_journal_abort(journal, err);
879818d276cSGirish Shilamkar 	}
8806cba611eSZhang Huan 	if (cbh)
881fd98496fSTheodore Ts'o 		err = journal_wait_on_commit_record(journal, cbh);
882015c6033SJan Kara 	stats.run.rs_blocks_logged++;
88356316a0dSDarrick J. Wong 	if (jbd2_has_feature_async_commit(journal) &&
884f73bee49SJan Kara 	    journal->j_flags & JBD2_BARRIER) {
885c6bf3f0eSChristoph Hellwig 		blkdev_issue_flush(journal->j_dev);
886f73bee49SJan Kara 	}
887470decc6SDave Kleikamp 
888470decc6SDave Kleikamp 	if (err)
889a7fa2bafSJan Kara 		jbd2_journal_abort(journal, err);
890470decc6SDave Kleikamp 
8919f356e5aSJan Kara 	WARN_ON_ONCE(
8929f356e5aSJan Kara 		atomic_read(&commit_transaction->t_outstanding_credits) < 0);
8939f356e5aSJan Kara 
8943339578fSJan Kara 	/*
8953339578fSJan Kara 	 * Now disk caches for filesystem device are flushed so we are safe to
8963339578fSJan Kara 	 * erase checkpointed transactions from the log by updating journal
8973339578fSJan Kara 	 * superblock.
8983339578fSJan Kara 	 */
8993339578fSJan Kara 	if (update_tail)
9003339578fSJan Kara 		jbd2_update_log_tail(journal, first_tid, first_block);
9013339578fSJan Kara 
902470decc6SDave Kleikamp 	/* End of a transaction!  Finally, we can do checkpoint
903470decc6SDave Kleikamp            processing: any buffers committed as a result of this
904470decc6SDave Kleikamp            transaction can be removed from any checkpoint list it was on
905470decc6SDave Kleikamp            before. */
906470decc6SDave Kleikamp 
907cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 6\n");
908470decc6SDave Kleikamp 
909c851ed54SJan Kara 	J_ASSERT(list_empty(&commit_transaction->t_inode_list));
910470decc6SDave Kleikamp 	J_ASSERT(commit_transaction->t_buffers == NULL);
911470decc6SDave Kleikamp 	J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
912470decc6SDave Kleikamp 	J_ASSERT(commit_transaction->t_shadow_list == NULL);
913470decc6SDave Kleikamp 
914470decc6SDave Kleikamp restart_loop:
915470decc6SDave Kleikamp 	/*
916470decc6SDave Kleikamp 	 * As there are other places (journal_unmap_buffer()) adding buffers
917470decc6SDave Kleikamp 	 * to this list we have to be careful and hold the j_list_lock.
918470decc6SDave Kleikamp 	 */
919470decc6SDave Kleikamp 	spin_lock(&journal->j_list_lock);
920470decc6SDave Kleikamp 	while (commit_transaction->t_forget) {
921470decc6SDave Kleikamp 		transaction_t *cp_transaction;
922470decc6SDave Kleikamp 		struct buffer_head *bh;
923de1b7941SJan Kara 		int try_to_free = 0;
92493108ebbSJan Kara 		bool drop_ref;
925470decc6SDave Kleikamp 
926470decc6SDave Kleikamp 		jh = commit_transaction->t_forget;
927470decc6SDave Kleikamp 		spin_unlock(&journal->j_list_lock);
928470decc6SDave Kleikamp 		bh = jh2bh(jh);
929de1b7941SJan Kara 		/*
930de1b7941SJan Kara 		 * Get a reference so that bh cannot be freed before we are
931de1b7941SJan Kara 		 * done with it.
932de1b7941SJan Kara 		 */
933de1b7941SJan Kara 		get_bh(bh);
93446417064SThomas Gleixner 		spin_lock(&jh->b_state_lock);
93523e2af35Sdingdinghua 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);
936470decc6SDave Kleikamp 
937470decc6SDave Kleikamp 		/*
938470decc6SDave Kleikamp 		 * If there is undo-protected committed data against
939470decc6SDave Kleikamp 		 * this buffer, then we can remove it now.  If it is a
940470decc6SDave Kleikamp 		 * buffer needing such protection, the old frozen_data
941470decc6SDave Kleikamp 		 * field now points to a committed version of the
942470decc6SDave Kleikamp 		 * buffer, so rotate that field to the new committed
943470decc6SDave Kleikamp 		 * data.
944470decc6SDave Kleikamp 		 *
945470decc6SDave Kleikamp 		 * Otherwise, we can just throw away the frozen data now.
946e06c8227SJoel Becker 		 *
947e06c8227SJoel Becker 		 * We also know that the frozen data has already fired
948e06c8227SJoel Becker 		 * its triggers if they exist, so we can clear that too.
949470decc6SDave Kleikamp 		 */
950470decc6SDave Kleikamp 		if (jh->b_committed_data) {
951af1e76d6SMingming Cao 			jbd2_free(jh->b_committed_data, bh->b_size);
952470decc6SDave Kleikamp 			jh->b_committed_data = NULL;
953470decc6SDave Kleikamp 			if (jh->b_frozen_data) {
954470decc6SDave Kleikamp 				jh->b_committed_data = jh->b_frozen_data;
955470decc6SDave Kleikamp 				jh->b_frozen_data = NULL;
956e06c8227SJoel Becker 				jh->b_frozen_triggers = NULL;
957470decc6SDave Kleikamp 			}
958470decc6SDave Kleikamp 		} else if (jh->b_frozen_data) {
959af1e76d6SMingming Cao 			jbd2_free(jh->b_frozen_data, bh->b_size);
960470decc6SDave Kleikamp 			jh->b_frozen_data = NULL;
961e06c8227SJoel Becker 			jh->b_frozen_triggers = NULL;
962470decc6SDave Kleikamp 		}
963470decc6SDave Kleikamp 
964470decc6SDave Kleikamp 		spin_lock(&journal->j_list_lock);
965470decc6SDave Kleikamp 		cp_transaction = jh->b_cp_transaction;
966470decc6SDave Kleikamp 		if (cp_transaction) {
967470decc6SDave Kleikamp 			JBUFFER_TRACE(jh, "remove from old cp transaction");
9688e85fb3fSJohann Lombardi 			cp_transaction->t_chp_stats.cs_dropped++;
969f7f4bccbSMingming Cao 			__jbd2_journal_remove_checkpoint(jh);
970470decc6SDave Kleikamp 		}
971470decc6SDave Kleikamp 
972470decc6SDave Kleikamp 		/* Only re-checkpoint the buffer_head if it is marked
973470decc6SDave Kleikamp 		 * dirty.  If the buffer was added to the BJ_Forget list
974f7f4bccbSMingming Cao 		 * by jbd2_journal_forget, it may no longer be dirty and
975470decc6SDave Kleikamp 		 * there's no point in keeping a checkpoint record for
976470decc6SDave Kleikamp 		 * it. */
977470decc6SDave Kleikamp 
978b794e7a6SJan Kara 		/*
9796a66a7deSzhangyi (F) 		 * A buffer which has been freed while still being journaled
9806a66a7deSzhangyi (F) 		 * by a previous transaction, refile the buffer to BJ_Forget of
9816a66a7deSzhangyi (F) 		 * the running transaction. If the just committed transaction
9826a66a7deSzhangyi (F) 		 * contains "add to orphan" operation, we can completely
9836a66a7deSzhangyi (F) 		 * invalidate the buffer now. We are rather through in that
9846a66a7deSzhangyi (F) 		 * since the buffer may be still accessible when blocksize <
9856a66a7deSzhangyi (F) 		 * pagesize and it is attached to the last partial page.
986b794e7a6SJan Kara 		 */
9876a66a7deSzhangyi (F) 		if (buffer_freed(bh) && !jh->b_next_transaction) {
988c96dceeaSzhangyi (F) 			struct address_space *mapping;
989c96dceeaSzhangyi (F) 
990470decc6SDave Kleikamp 			clear_buffer_freed(bh);
991470decc6SDave Kleikamp 			clear_buffer_jbddirty(bh);
992c96dceeaSzhangyi (F) 
993c96dceeaSzhangyi (F) 			/*
994c96dceeaSzhangyi (F) 			 * Block device buffers need to stay mapped all the
995c96dceeaSzhangyi (F) 			 * time, so it is enough to clear buffer_jbddirty and
996c96dceeaSzhangyi (F) 			 * buffer_freed bits. For the file mapping buffers (i.e.
997c96dceeaSzhangyi (F) 			 * journalled data) we need to unmap buffer and clear
998c96dceeaSzhangyi (F) 			 * more bits. We also need to be careful about the check
999c96dceeaSzhangyi (F) 			 * because the data page mapping can get cleared under
1000780f66e5Szhangyi (F) 			 * our hands. Note that if mapping == NULL, we don't
1001780f66e5Szhangyi (F) 			 * need to make buffer unmapped because the page is
1002780f66e5Szhangyi (F) 			 * already detached from the mapping and buffers cannot
1003780f66e5Szhangyi (F) 			 * get reused.
1004c96dceeaSzhangyi (F) 			 */
10050d22fe2fSMatthew Wilcox (Oracle) 			mapping = READ_ONCE(bh->b_folio->mapping);
1006c96dceeaSzhangyi (F) 			if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
1007b794e7a6SJan Kara 				clear_buffer_mapped(bh);
1008b794e7a6SJan Kara 				clear_buffer_new(bh);
1009b794e7a6SJan Kara 				clear_buffer_req(bh);
1010b794e7a6SJan Kara 				bh->b_bdev = NULL;
1011b794e7a6SJan Kara 			}
1012c96dceeaSzhangyi (F) 		}
1013470decc6SDave Kleikamp 
1014470decc6SDave Kleikamp 		if (buffer_jbddirty(bh)) {
1015470decc6SDave Kleikamp 			JBUFFER_TRACE(jh, "add to new checkpointing trans");
1016f7f4bccbSMingming Cao 			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
10177ad7445fSHidehiro Kawai 			if (is_journal_aborted(journal))
10187ad7445fSHidehiro Kawai 				clear_buffer_jbddirty(bh);
1019470decc6SDave Kleikamp 		} else {
1020470decc6SDave Kleikamp 			J_ASSERT_BH(bh, !buffer_dirty(bh));
1021de1b7941SJan Kara 			/*
1022de1b7941SJan Kara 			 * The buffer on BJ_Forget list and not jbddirty means
1023470decc6SDave Kleikamp 			 * it has been freed by this transaction and hence it
1024470decc6SDave Kleikamp 			 * could not have been reallocated until this
1025470decc6SDave Kleikamp 			 * transaction has committed. *BUT* it could be
1026470decc6SDave Kleikamp 			 * reallocated once we have written all the data to
1027470decc6SDave Kleikamp 			 * disk and before we process the buffer on BJ_Forget
1028de1b7941SJan Kara 			 * list.
1029de1b7941SJan Kara 			 */
1030de1b7941SJan Kara 			if (!jh->b_next_transaction)
1031de1b7941SJan Kara 				try_to_free = 1;
1032470decc6SDave Kleikamp 		}
1033de1b7941SJan Kara 		JBUFFER_TRACE(jh, "refile or unfile buffer");
103493108ebbSJan Kara 		drop_ref = __jbd2_journal_refile_buffer(jh);
103546417064SThomas Gleixner 		spin_unlock(&jh->b_state_lock);
103693108ebbSJan Kara 		if (drop_ref)
103793108ebbSJan Kara 			jbd2_journal_put_journal_head(jh);
1038de1b7941SJan Kara 		if (try_to_free)
1039de1b7941SJan Kara 			release_buffer_page(bh);	/* Drops bh reference */
1040de1b7941SJan Kara 		else
1041de1b7941SJan Kara 			__brelse(bh);
1042470decc6SDave Kleikamp 		cond_resched_lock(&journal->j_list_lock);
1043470decc6SDave Kleikamp 	}
1044470decc6SDave Kleikamp 	spin_unlock(&journal->j_list_lock);
1045470decc6SDave Kleikamp 	/*
1046f5a7a6b0SJan Kara 	 * This is a bit sleazy.  We use j_list_lock to protect transition
1047f5a7a6b0SJan Kara 	 * of a transaction into T_FINISHED state and calling
1048f5a7a6b0SJan Kara 	 * __jbd2_journal_drop_transaction(). Otherwise we could race with
1049f5a7a6b0SJan Kara 	 * other checkpointing code processing the transaction...
1050470decc6SDave Kleikamp 	 */
1051a931da6aSTheodore Ts'o 	write_lock(&journal->j_state_lock);
1052470decc6SDave Kleikamp 	spin_lock(&journal->j_list_lock);
1053470decc6SDave Kleikamp 	/*
1054470decc6SDave Kleikamp 	 * Now recheck if some buffers did not get attached to the transaction
1055470decc6SDave Kleikamp 	 * while the lock was dropped...
1056470decc6SDave Kleikamp 	 */
1057470decc6SDave Kleikamp 	if (commit_transaction->t_forget) {
1058470decc6SDave Kleikamp 		spin_unlock(&journal->j_list_lock);
1059a931da6aSTheodore Ts'o 		write_unlock(&journal->j_state_lock);
1060470decc6SDave Kleikamp 		goto restart_loop;
1061470decc6SDave Kleikamp 	}
1062470decc6SDave Kleikamp 
1063d4e839d4STheodore Ts'o 	/* Add the transaction to the checkpoint list
1064d4e839d4STheodore Ts'o 	 * __journal_remove_checkpoint() can not destroy transaction
1065d4e839d4STheodore Ts'o 	 * under us because it is not marked as T_FINISHED yet */
1066d4e839d4STheodore Ts'o 	if (journal->j_checkpoint_transactions == NULL) {
1067d4e839d4STheodore Ts'o 		journal->j_checkpoint_transactions = commit_transaction;
1068d4e839d4STheodore Ts'o 		commit_transaction->t_cpnext = commit_transaction;
1069d4e839d4STheodore Ts'o 		commit_transaction->t_cpprev = commit_transaction;
1070d4e839d4STheodore Ts'o 	} else {
1071d4e839d4STheodore Ts'o 		commit_transaction->t_cpnext =
1072d4e839d4STheodore Ts'o 			journal->j_checkpoint_transactions;
1073d4e839d4STheodore Ts'o 		commit_transaction->t_cpprev =
1074d4e839d4STheodore Ts'o 			commit_transaction->t_cpnext->t_cpprev;
1075d4e839d4STheodore Ts'o 		commit_transaction->t_cpnext->t_cpprev =
1076d4e839d4STheodore Ts'o 			commit_transaction;
1077d4e839d4STheodore Ts'o 		commit_transaction->t_cpprev->t_cpnext =
1078d4e839d4STheodore Ts'o 				commit_transaction;
1079d4e839d4STheodore Ts'o 	}
1080d4e839d4STheodore Ts'o 	spin_unlock(&journal->j_list_lock);
1081d4e839d4STheodore Ts'o 
1082470decc6SDave Kleikamp 	/* Done with this transaction! */
1083470decc6SDave Kleikamp 
1084cb3b3bf2SJan Kara 	jbd2_debug(3, "JBD2: commit phase 7\n");
1085470decc6SDave Kleikamp 
1086bbd2be36SJan Kara 	J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
1087470decc6SDave Kleikamp 
10888e85fb3fSJohann Lombardi 	commit_transaction->t_start = jiffies;
1089bf699327STheodore Ts'o 	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
10908e85fb3fSJohann Lombardi 					      commit_transaction->t_start);
10918e85fb3fSJohann Lombardi 
10928e85fb3fSJohann Lombardi 	/*
1093bf699327STheodore Ts'o 	 * File the transaction statistics
10948e85fb3fSJohann Lombardi 	 */
10958e85fb3fSJohann Lombardi 	stats.ts_tid = commit_transaction->t_tid;
10968dd42046STheodore Ts'o 	stats.run.rs_handle_count =
10978dd42046STheodore Ts'o 		atomic_read(&commit_transaction->t_handle_count);
1098bf699327STheodore Ts'o 	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1099bf699327STheodore Ts'o 			     commit_transaction->t_tid, &stats.run);
110042cf3452STheodore Ts'o 	stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
11018e85fb3fSJohann Lombardi 
1102794446c6SDmitry Monakhov 	commit_transaction->t_state = T_COMMIT_CALLBACK;
1103470decc6SDave Kleikamp 	J_ASSERT(commit_transaction == journal->j_committing_transaction);
11047c73ddb7SZhang Yi 	WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid);
1105470decc6SDave Kleikamp 	journal->j_committing_transaction = NULL;
1106e07f7183SJosef Bacik 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1107470decc6SDave Kleikamp 
1108e07f7183SJosef Bacik 	/*
1109e07f7183SJosef Bacik 	 * weight the commit time higher than the average time so we don't
1110e07f7183SJosef Bacik 	 * react too strongly to vast changes in the commit time
1111e07f7183SJosef Bacik 	 */
1112e07f7183SJosef Bacik 	if (likely(journal->j_average_commit_time))
1113e07f7183SJosef Bacik 		journal->j_average_commit_time = (commit_time +
1114e07f7183SJosef Bacik 				journal->j_average_commit_time*3) / 4;
1115e07f7183SJosef Bacik 	else
1116e07f7183SJosef Bacik 		journal->j_average_commit_time = commit_time;
1117794446c6SDmitry Monakhov 
1118a931da6aSTheodore Ts'o 	write_unlock(&journal->j_state_lock);
11196c20ec85STheodore Ts'o 
1120fb68407bSAneesh Kumar K.V 	if (journal->j_commit_callback)
1121fb68407bSAneesh Kumar K.V 		journal->j_commit_callback(journal, commit_transaction);
1122ff780b91SHarshad Shirwadkar 	if (journal->j_fc_cleanup_callback)
1123e85c81baSXin Yin 		journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
1124fb68407bSAneesh Kumar K.V 
1125879c5e6bSTheodore Ts'o 	trace_jbd2_end_commit(journal, commit_transaction);
1126cb3b3bf2SJan Kara 	jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
1127470decc6SDave Kleikamp 		  journal->j_commit_sequence, journal->j_tail_sequence);
1128470decc6SDave Kleikamp 
1129794446c6SDmitry Monakhov 	write_lock(&journal->j_state_lock);
1130ff780b91SHarshad Shirwadkar 	journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
1131ff780b91SHarshad Shirwadkar 	journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
1132794446c6SDmitry Monakhov 	spin_lock(&journal->j_list_lock);
1133794446c6SDmitry Monakhov 	commit_transaction->t_state = T_FINISHED;
1134d4e839d4STheodore Ts'o 	/* Check if the transaction can be dropped now that we are finished */
1135be222553SZhang Yi 	if (commit_transaction->t_checkpoint_list == NULL) {
1136794446c6SDmitry Monakhov 		__jbd2_journal_drop_transaction(journal, commit_transaction);
1137794446c6SDmitry Monakhov 		jbd2_journal_free_transaction(commit_transaction);
1138794446c6SDmitry Monakhov 	}
1139794446c6SDmitry Monakhov 	spin_unlock(&journal->j_list_lock);
1140794446c6SDmitry Monakhov 	write_unlock(&journal->j_state_lock);
1141470decc6SDave Kleikamp 	wake_up(&journal->j_wait_done_commit);
1142ff780b91SHarshad Shirwadkar 	wake_up(&journal->j_fc_wait);
114342cf3452STheodore Ts'o 
114442cf3452STheodore Ts'o 	/*
114542cf3452STheodore Ts'o 	 * Calculate overall stats
114642cf3452STheodore Ts'o 	 */
114742cf3452STheodore Ts'o 	spin_lock(&journal->j_history_lock);
114842cf3452STheodore Ts'o 	journal->j_stats.ts_tid++;
114942cf3452STheodore Ts'o 	journal->j_stats.ts_requested += stats.ts_requested;
115042cf3452STheodore Ts'o 	journal->j_stats.run.rs_wait += stats.run.rs_wait;
115142cf3452STheodore Ts'o 	journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
115242cf3452STheodore Ts'o 	journal->j_stats.run.rs_running += stats.run.rs_running;
115342cf3452STheodore Ts'o 	journal->j_stats.run.rs_locked += stats.run.rs_locked;
115442cf3452STheodore Ts'o 	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
115542cf3452STheodore Ts'o 	journal->j_stats.run.rs_logging += stats.run.rs_logging;
115642cf3452STheodore Ts'o 	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
115742cf3452STheodore Ts'o 	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
115842cf3452STheodore Ts'o 	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
115942cf3452STheodore Ts'o 	spin_unlock(&journal->j_history_lock);
1160470decc6SDave Kleikamp }
1161