1f5166768STheodore Ts'o // SPDX-License-Identifier: GPL-2.0+
2470decc6SDave Kleikamp /*
3f7f4bccbSMingming Cao * linux/fs/jbd2/commit.c
4470decc6SDave Kleikamp *
5470decc6SDave Kleikamp * Written by Stephen C. Tweedie <[email protected]>, 1998
6470decc6SDave Kleikamp *
7470decc6SDave Kleikamp * Copyright 1998 Red Hat corp --- All Rights Reserved
8470decc6SDave Kleikamp *
9470decc6SDave Kleikamp * Journal commit routines for the generic filesystem journaling code;
10470decc6SDave Kleikamp * part of the ext2fs journaling system.
11470decc6SDave Kleikamp */
12470decc6SDave Kleikamp
13470decc6SDave Kleikamp #include <linux/time.h>
14470decc6SDave Kleikamp #include <linux/fs.h>
15f7f4bccbSMingming Cao #include <linux/jbd2.h>
16470decc6SDave Kleikamp #include <linux/errno.h>
17470decc6SDave Kleikamp #include <linux/slab.h>
18470decc6SDave Kleikamp #include <linux/mm.h>
19470decc6SDave Kleikamp #include <linux/pagemap.h>
208e85fb3fSJohann Lombardi #include <linux/jiffies.h>
21818d276cSGirish Shilamkar #include <linux/crc32.h>
22cd1aac32SAneesh Kumar K.V #include <linux/writeback.h>
23cd1aac32SAneesh Kumar K.V #include <linux/backing-dev.h>
24fd98496fSTheodore Ts'o #include <linux/bio.h>
250e3d2a63STheodore Ts'o #include <linux/blkdev.h>
2639e3ac25SBrian King #include <linux/bitops.h>
27879c5e6bSTheodore Ts'o #include <trace/events/jbd2.h>
28470decc6SDave Kleikamp
29470decc6SDave Kleikamp /*
30b34090e5SJan Kara * IO end handler for temporary buffer_heads handling writes to the journal.
31470decc6SDave Kleikamp */
journal_end_buffer_io_sync(struct buffer_head * bh,int uptodate)32470decc6SDave Kleikamp static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
33470decc6SDave Kleikamp {
34b34090e5SJan Kara struct buffer_head *orig_bh = bh->b_private;
35b34090e5SJan Kara
36470decc6SDave Kleikamp BUFFER_TRACE(bh, "");
37470decc6SDave Kleikamp if (uptodate)
38470decc6SDave Kleikamp set_buffer_uptodate(bh);
39470decc6SDave Kleikamp else
40470decc6SDave Kleikamp clear_buffer_uptodate(bh);
41b34090e5SJan Kara if (orig_bh) {
42b34090e5SJan Kara clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
434e857c58SPeter Zijlstra smp_mb__after_atomic();
44b34090e5SJan Kara wake_up_bit(&orig_bh->b_state, BH_Shadow);
45b34090e5SJan Kara }
46470decc6SDave Kleikamp unlock_buffer(bh);
47470decc6SDave Kleikamp }
48470decc6SDave Kleikamp
49470decc6SDave Kleikamp /*
5087c89c23SJan Kara * When an ext4 file is truncated, it is possible that some pages are not
5187c89c23SJan Kara * successfully freed, because they are attached to a committing transaction.
52470decc6SDave Kleikamp * After the transaction commits, these pages are left on the LRU, with no
53470decc6SDave Kleikamp * ->mapping, and with attached buffers. These pages are trivially reclaimable
54470decc6SDave Kleikamp * by the VM, but their apparent absence upsets the VM accounting, and it makes
55470decc6SDave Kleikamp * the numbers in /proc/meminfo look odd.
56470decc6SDave Kleikamp *
57470decc6SDave Kleikamp * So here, we have a buffer which has just come off the forget list. Look to
58470decc6SDave Kleikamp * see if we can strip all buffers from the backing page.
59470decc6SDave Kleikamp *
60*fd3b3d7fSKemeng Shi * Called under j_list_lock. The caller provided us with a ref against the
61*fd3b3d7fSKemeng Shi * buffer, and we drop that here.
62470decc6SDave Kleikamp */
release_buffer_page(struct buffer_head * bh)63470decc6SDave Kleikamp static void release_buffer_page(struct buffer_head *bh)
64470decc6SDave Kleikamp {
6573122255SMatthew Wilcox (Oracle) struct folio *folio;
66470decc6SDave Kleikamp
67470decc6SDave Kleikamp if (buffer_dirty(bh))
68470decc6SDave Kleikamp goto nope;
69470decc6SDave Kleikamp if (atomic_read(&bh->b_count) != 1)
70470decc6SDave Kleikamp goto nope;
710d22fe2fSMatthew Wilcox (Oracle) folio = bh->b_folio;
7273122255SMatthew Wilcox (Oracle) if (folio->mapping)
73470decc6SDave Kleikamp goto nope;
74470decc6SDave Kleikamp
75470decc6SDave Kleikamp /* OK, it's a truncated page */
7673122255SMatthew Wilcox (Oracle) if (!folio_trylock(folio))
77470decc6SDave Kleikamp goto nope;
78470decc6SDave Kleikamp
7973122255SMatthew Wilcox (Oracle) folio_get(folio);
80470decc6SDave Kleikamp __brelse(bh);
8168189fefSMatthew Wilcox (Oracle) try_to_free_buffers(folio);
8273122255SMatthew Wilcox (Oracle) folio_unlock(folio);
8373122255SMatthew Wilcox (Oracle) folio_put(folio);
84470decc6SDave Kleikamp return;
85470decc6SDave Kleikamp
86470decc6SDave Kleikamp nope:
87470decc6SDave Kleikamp __brelse(bh);
88470decc6SDave Kleikamp }
89470decc6SDave Kleikamp
jbd2_commit_block_csum_set(journal_t * j,struct buffer_head * bh)90e5a120aeSJan Kara static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
911f56c589SDarrick J. Wong {
921f56c589SDarrick J. Wong struct commit_header *h;
931f56c589SDarrick J. Wong __u32 csum;
941f56c589SDarrick J. Wong
95db9ee220SDarrick J. Wong if (!jbd2_journal_has_csum_v2or3(j))
961f56c589SDarrick J. Wong return;
971f56c589SDarrick J. Wong
98e5a120aeSJan Kara h = (struct commit_header *)(bh->b_data);
991f56c589SDarrick J. Wong h->h_chksum_type = 0;
1001f56c589SDarrick J. Wong h->h_chksum_size = 0;
1011f56c589SDarrick J. Wong h->h_chksum[0] = 0;
102e5a120aeSJan Kara csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
1031f56c589SDarrick J. Wong h->h_chksum[0] = cpu_to_be32(csum);
1041f56c589SDarrick J. Wong }
1051f56c589SDarrick J. Wong
106470decc6SDave Kleikamp /*
107818d276cSGirish Shilamkar * Done it all: now submit the commit record. We should have
108470decc6SDave Kleikamp * cleaned up our previous buffers by now, so if we are in abort
109470decc6SDave Kleikamp * mode we can now just skip the rest of the journal write
110470decc6SDave Kleikamp * entirely.
111470decc6SDave Kleikamp *
112470decc6SDave Kleikamp * Returns 1 if the journal needs to be aborted or 0 on success
113470decc6SDave Kleikamp */
journal_submit_commit_record(journal_t * journal,transaction_t * commit_transaction,struct buffer_head ** cbh,__u32 crc32_sum)114818d276cSGirish Shilamkar static int journal_submit_commit_record(journal_t *journal,
115818d276cSGirish Shilamkar transaction_t *commit_transaction,
116818d276cSGirish Shilamkar struct buffer_head **cbh,
117818d276cSGirish Shilamkar __u32 crc32_sum)
118470decc6SDave Kleikamp {
119818d276cSGirish Shilamkar struct commit_header *tmp;
120470decc6SDave Kleikamp struct buffer_head *bh;
121b42d1d6bSArnd Bergmann struct timespec64 now;
1226a3afb6aSZhang Yi blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS;
123470decc6SDave Kleikamp
1246cba611eSZhang Huan *cbh = NULL;
1256cba611eSZhang Huan
126470decc6SDave Kleikamp if (is_journal_aborted(journal))
127470decc6SDave Kleikamp return 0;
128470decc6SDave Kleikamp
12932ab6715SJan Kara bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
13032ab6715SJan Kara JBD2_COMMIT_BLOCK);
131e5a120aeSJan Kara if (!bh)
132470decc6SDave Kleikamp return 1;
133470decc6SDave Kleikamp
134818d276cSGirish Shilamkar tmp = (struct commit_header *)bh->b_data;
135b42d1d6bSArnd Bergmann ktime_get_coarse_real_ts64(&now);
136736603abSTheodore Ts'o tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
137736603abSTheodore Ts'o tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
138818d276cSGirish Shilamkar
13956316a0dSDarrick J. Wong if (jbd2_has_feature_checksum(journal)) {
140818d276cSGirish Shilamkar tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
141818d276cSGirish Shilamkar tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
142818d276cSGirish Shilamkar tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
143470decc6SDave Kleikamp }
144e5a120aeSJan Kara jbd2_commit_block_csum_set(journal, bh);
145470decc6SDave Kleikamp
146e5a120aeSJan Kara BUFFER_TRACE(bh, "submit commit block");
147818d276cSGirish Shilamkar lock_buffer(bh);
14845a90bfdSTheodore Ts'o clear_buffer_dirty(bh);
149818d276cSGirish Shilamkar set_buffer_uptodate(bh);
150818d276cSGirish Shilamkar bh->b_end_io = journal_end_buffer_io_sync;
151818d276cSGirish Shilamkar
152818d276cSGirish Shilamkar if (journal->j_flags & JBD2_BARRIER &&
15356316a0dSDarrick J. Wong !jbd2_has_feature_async_commit(journal))
154f3ed5df3SRitesh Harjani (IBM) write_flags |= REQ_PREFLUSH | REQ_FUA;
155470decc6SDave Kleikamp
156f3ed5df3SRitesh Harjani (IBM) submit_bh(write_flags, bh);
157818d276cSGirish Shilamkar *cbh = bh;
158f3ed5df3SRitesh Harjani (IBM) return 0;
159818d276cSGirish Shilamkar }
160470decc6SDave Kleikamp
161818d276cSGirish Shilamkar /*
162818d276cSGirish Shilamkar * This function along with journal_submit_commit_record
163818d276cSGirish Shilamkar * allows to write the commit record asynchronously.
164818d276cSGirish Shilamkar */
journal_wait_on_commit_record(journal_t * journal,struct buffer_head * bh)165fd98496fSTheodore Ts'o static int journal_wait_on_commit_record(journal_t *journal,
166fd98496fSTheodore Ts'o struct buffer_head *bh)
167818d276cSGirish Shilamkar {
168818d276cSGirish Shilamkar int ret = 0;
169818d276cSGirish Shilamkar
170818d276cSGirish Shilamkar clear_buffer_dirty(bh);
171818d276cSGirish Shilamkar wait_on_buffer(bh);
172818d276cSGirish Shilamkar
173818d276cSGirish Shilamkar if (unlikely(!buffer_uptodate(bh)))
174818d276cSGirish Shilamkar ret = -EIO;
175818d276cSGirish Shilamkar put_bh(bh); /* One for getblk() */
176818d276cSGirish Shilamkar
177818d276cSGirish Shilamkar return ret;
178818d276cSGirish Shilamkar }
179818d276cSGirish Shilamkar
180ff780b91SHarshad Shirwadkar /* Send all the data buffers related to an inode */
jbd2_submit_inode_data(journal_t * journal,struct jbd2_inode * jinode)181f30ff35fSJan Kara int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
182ff780b91SHarshad Shirwadkar {
183ff780b91SHarshad Shirwadkar if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
184ff780b91SHarshad Shirwadkar return 0;
185ff780b91SHarshad Shirwadkar
186ff780b91SHarshad Shirwadkar trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
187f30ff35fSJan Kara return journal->j_submit_inode_data_buffers(jinode);
188ff780b91SHarshad Shirwadkar
189ff780b91SHarshad Shirwadkar }
190ff780b91SHarshad Shirwadkar EXPORT_SYMBOL(jbd2_submit_inode_data);
191ff780b91SHarshad Shirwadkar
jbd2_wait_inode_data(journal_t * journal,struct jbd2_inode * jinode)192ff780b91SHarshad Shirwadkar int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
193ff780b91SHarshad Shirwadkar {
194ff780b91SHarshad Shirwadkar if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
195ff780b91SHarshad Shirwadkar !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
196ff780b91SHarshad Shirwadkar return 0;
197ff780b91SHarshad Shirwadkar return filemap_fdatawait_range_keep_errors(
198ff780b91SHarshad Shirwadkar jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
199ff780b91SHarshad Shirwadkar jinode->i_dirty_end);
200ff780b91SHarshad Shirwadkar }
201ff780b91SHarshad Shirwadkar EXPORT_SYMBOL(jbd2_wait_inode_data);
202ff780b91SHarshad Shirwadkar
203cd1aac32SAneesh Kumar K.V /*
204c851ed54SJan Kara * Submit all the data buffers of inode associated with the transaction to
205c851ed54SJan Kara * disk.
206c851ed54SJan Kara *
207c851ed54SJan Kara * We are in a committing transaction. Therefore no new inode can be added to
208c851ed54SJan Kara * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
209c851ed54SJan Kara * operate on from being released while we write out pages.
210c851ed54SJan Kara */
journal_submit_data_buffers(journal_t * journal,transaction_t * commit_transaction)211cd1aac32SAneesh Kumar K.V static int journal_submit_data_buffers(journal_t *journal,
212c851ed54SJan Kara transaction_t *commit_transaction)
213c851ed54SJan Kara {
214c851ed54SJan Kara struct jbd2_inode *jinode;
215c851ed54SJan Kara int err, ret = 0;
216c851ed54SJan Kara
217c851ed54SJan Kara spin_lock(&journal->j_list_lock);
218c851ed54SJan Kara list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
21941617e1aSJan Kara if (!(jinode->i_flags & JI_WRITE_DATA))
22041617e1aSJan Kara continue;
221cb0d9d47SJan Kara jinode->i_flags |= JI_COMMIT_RUNNING;
222c851ed54SJan Kara spin_unlock(&journal->j_list_lock);
223342af94eSMauricio Faria de Oliveira /* submit the inode data buffers. */
224879c5e6bSTheodore Ts'o trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
225342af94eSMauricio Faria de Oliveira if (journal->j_submit_inode_data_buffers) {
226342af94eSMauricio Faria de Oliveira err = journal->j_submit_inode_data_buffers(jinode);
227c851ed54SJan Kara if (!ret)
228c851ed54SJan Kara ret = err;
229342af94eSMauricio Faria de Oliveira }
230c851ed54SJan Kara spin_lock(&journal->j_list_lock);
231c851ed54SJan Kara J_ASSERT(jinode->i_transaction == commit_transaction);
232cb0d9d47SJan Kara jinode->i_flags &= ~JI_COMMIT_RUNNING;
233cb0d9d47SJan Kara smp_mb();
234c851ed54SJan Kara wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
235c851ed54SJan Kara }
236c851ed54SJan Kara spin_unlock(&journal->j_list_lock);
237c851ed54SJan Kara return ret;
238c851ed54SJan Kara }
239c851ed54SJan Kara
jbd2_journal_finish_inode_data_buffers(struct jbd2_inode * jinode)240aa3c0c61SMauricio Faria de Oliveira int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
241aa3c0c61SMauricio Faria de Oliveira {
242aa3c0c61SMauricio Faria de Oliveira struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
243aa3c0c61SMauricio Faria de Oliveira
244aa3c0c61SMauricio Faria de Oliveira return filemap_fdatawait_range_keep_errors(mapping,
245aa3c0c61SMauricio Faria de Oliveira jinode->i_dirty_start,
246aa3c0c61SMauricio Faria de Oliveira jinode->i_dirty_end);
247aa3c0c61SMauricio Faria de Oliveira }
248aa3c0c61SMauricio Faria de Oliveira
249c851ed54SJan Kara /*
250c851ed54SJan Kara * Wait for data submitted for writeout, refile inodes to proper
251c851ed54SJan Kara * transaction if needed.
252c851ed54SJan Kara *
253c851ed54SJan Kara */
journal_finish_inode_data_buffers(journal_t * journal,transaction_t * commit_transaction)254c851ed54SJan Kara static int journal_finish_inode_data_buffers(journal_t *journal,
255c851ed54SJan Kara transaction_t *commit_transaction)
256c851ed54SJan Kara {
257c851ed54SJan Kara struct jbd2_inode *jinode, *next_i;
258c851ed54SJan Kara int err, ret = 0;
259c851ed54SJan Kara
260cd1aac32SAneesh Kumar K.V /* For locking, see the comment in journal_submit_data_buffers() */
261c851ed54SJan Kara spin_lock(&journal->j_list_lock);
262c851ed54SJan Kara list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
26341617e1aSJan Kara if (!(jinode->i_flags & JI_WAIT_DATA))
26441617e1aSJan Kara continue;
265cb0d9d47SJan Kara jinode->i_flags |= JI_COMMIT_RUNNING;
266c851ed54SJan Kara spin_unlock(&journal->j_list_lock);
267342af94eSMauricio Faria de Oliveira /* wait for the inode data buffers writeout. */
268342af94eSMauricio Faria de Oliveira if (journal->j_finish_inode_data_buffers) {
269342af94eSMauricio Faria de Oliveira err = journal->j_finish_inode_data_buffers(jinode);
270c851ed54SJan Kara if (!ret)
271c851ed54SJan Kara ret = err;
272342af94eSMauricio Faria de Oliveira }
2736c02757cSYe Bin cond_resched();
274c851ed54SJan Kara spin_lock(&journal->j_list_lock);
275cb0d9d47SJan Kara jinode->i_flags &= ~JI_COMMIT_RUNNING;
276cb0d9d47SJan Kara smp_mb();
277c851ed54SJan Kara wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
278c851ed54SJan Kara }
279c851ed54SJan Kara
280c851ed54SJan Kara /* Now refile inode to proper lists */
281c851ed54SJan Kara list_for_each_entry_safe(jinode, next_i,
282c851ed54SJan Kara &commit_transaction->t_inode_list, i_list) {
283c851ed54SJan Kara list_del(&jinode->i_list);
284c851ed54SJan Kara if (jinode->i_next_transaction) {
285c851ed54SJan Kara jinode->i_transaction = jinode->i_next_transaction;
286c851ed54SJan Kara jinode->i_next_transaction = NULL;
287c851ed54SJan Kara list_add(&jinode->i_list,
288c851ed54SJan Kara &jinode->i_transaction->t_inode_list);
289c851ed54SJan Kara } else {
290c851ed54SJan Kara jinode->i_transaction = NULL;
2916ba0e7dcSRoss Zwisler jinode->i_dirty_start = 0;
2926ba0e7dcSRoss Zwisler jinode->i_dirty_end = 0;
293c851ed54SJan Kara }
294c851ed54SJan Kara }
295c851ed54SJan Kara spin_unlock(&journal->j_list_lock);
296c851ed54SJan Kara
297c851ed54SJan Kara return ret;
298c851ed54SJan Kara }
299c851ed54SJan Kara
jbd2_checksum_data(__u32 crc32_sum,struct buffer_head * bh)300818d276cSGirish Shilamkar static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
301818d276cSGirish Shilamkar {
302818d276cSGirish Shilamkar char *addr;
303818d276cSGirish Shilamkar __u32 checksum;
304818d276cSGirish Shilamkar
305147d4a09SRitesh Harjani (IBM) addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
306147d4a09SRitesh Harjani (IBM) checksum = crc32_be(crc32_sum, addr, bh->b_size);
307147d4a09SRitesh Harjani (IBM) kunmap_local(addr);
308818d276cSGirish Shilamkar
309818d276cSGirish Shilamkar return checksum;
310818d276cSGirish Shilamkar }
311818d276cSGirish Shilamkar
write_tag_block(journal_t * j,journal_block_tag_t * tag,unsigned long long block)312db9ee220SDarrick J. Wong static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
31318eba7aaSMingming Cao unsigned long long block)
314b517bea1SZach Brown {
315b517bea1SZach Brown tag->t_blocknr = cpu_to_be32(block & (u32)~0);
31656316a0dSDarrick J. Wong if (jbd2_has_feature_64bit(j))
317b517bea1SZach Brown tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
318b517bea1SZach Brown }
319b517bea1SZach Brown
jbd2_block_tag_csum_set(journal_t * j,journal_block_tag_t * tag,struct buffer_head * bh,__u32 sequence)320c3900875SDarrick J. Wong static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
321c3900875SDarrick J. Wong struct buffer_head *bh, __u32 sequence)
322c3900875SDarrick J. Wong {
323db9ee220SDarrick J. Wong journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
324c3900875SDarrick J. Wong __u8 *addr;
325eee06c56SDarrick J. Wong __u32 csum32;
32618a6ea1eSDarrick J. Wong __be32 seq;
327c3900875SDarrick J. Wong
328db9ee220SDarrick J. Wong if (!jbd2_journal_has_csum_v2or3(j))
329c3900875SDarrick J. Wong return;
330c3900875SDarrick J. Wong
33118a6ea1eSDarrick J. Wong seq = cpu_to_be32(sequence);
332147d4a09SRitesh Harjani (IBM) addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
33318a6ea1eSDarrick J. Wong csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
334147d4a09SRitesh Harjani (IBM) csum32 = jbd2_chksum(j, csum32, addr, bh->b_size);
335147d4a09SRitesh Harjani (IBM) kunmap_local(addr);
336c3900875SDarrick J. Wong
33756316a0dSDarrick J. Wong if (jbd2_has_feature_csum3(j))
338db9ee220SDarrick J. Wong tag3->t_checksum = cpu_to_be32(csum32);
339db9ee220SDarrick J. Wong else
340eee06c56SDarrick J. Wong tag->t_checksum = cpu_to_be16(csum32);
341c3900875SDarrick J. Wong }
342470decc6SDave Kleikamp /*
343f7f4bccbSMingming Cao * jbd2_journal_commit_transaction
344470decc6SDave Kleikamp *
345470decc6SDave Kleikamp * The primary function for committing a transaction to the log. This
346470decc6SDave Kleikamp * function is called by the journal thread to begin a complete commit.
347470decc6SDave Kleikamp */
jbd2_journal_commit_transaction(journal_t * journal)348f7f4bccbSMingming Cao void jbd2_journal_commit_transaction(journal_t *journal)
349470decc6SDave Kleikamp {
3508e85fb3fSJohann Lombardi struct transaction_stats_s stats;
351470decc6SDave Kleikamp transaction_t *commit_transaction;
352e5a120aeSJan Kara struct journal_head *jh;
353e5a120aeSJan Kara struct buffer_head *descriptor;
354470decc6SDave Kleikamp struct buffer_head **wbuf = journal->j_wbuf;
355470decc6SDave Kleikamp int bufs;
356abe48a52SKemeng Shi int escape;
357470decc6SDave Kleikamp int err;
35818eba7aaSMingming Cao unsigned long long blocknr;
359e07f7183SJosef Bacik ktime_t start_time;
360e07f7183SJosef Bacik u64 commit_time;
361470decc6SDave Kleikamp char *tagp = NULL;
362470decc6SDave Kleikamp journal_block_tag_t *tag = NULL;
363470decc6SDave Kleikamp int space_left = 0;
364470decc6SDave Kleikamp int first_tag = 0;
365470decc6SDave Kleikamp int tag_flag;
366794446c6SDmitry Monakhov int i;
367b517bea1SZach Brown int tag_bytes = journal_tag_bytes(journal);
368818d276cSGirish Shilamkar struct buffer_head *cbh = NULL; /* For transactional checksums */
369818d276cSGirish Shilamkar __u32 crc32_sum = ~0;
37082f04ab4SJens Axboe struct blk_plug plug;
3713339578fSJan Kara /* Tail of the journal */
3723339578fSJan Kara unsigned long first_block;
3733339578fSJan Kara tid_t first_tid;
3743339578fSJan Kara int update_tail;
3753caa487fSDarrick J. Wong int csum_size = 0;
376f5113effSJan Kara LIST_HEAD(io_bufs);
377e5a120aeSJan Kara LIST_HEAD(log_bufs);
3783caa487fSDarrick J. Wong
379db9ee220SDarrick J. Wong if (jbd2_journal_has_csum_v2or3(journal))
3803caa487fSDarrick J. Wong csum_size = sizeof(struct jbd2_journal_block_tail);
381470decc6SDave Kleikamp
382470decc6SDave Kleikamp /*
383470decc6SDave Kleikamp * First job: lock down the current transaction and wait for
384470decc6SDave Kleikamp * all outstanding updates to complete.
385470decc6SDave Kleikamp */
386470decc6SDave Kleikamp
387f7f4bccbSMingming Cao /* Do we need to erase the effects of a prior jbd2_journal_flush? */
388f7f4bccbSMingming Cao if (journal->j_flags & JBD2_FLUSHED) {
389cb3b3bf2SJan Kara jbd2_debug(3, "super block updated\n");
3906fa7aa50STejun Heo mutex_lock_io(&journal->j_checkpoint_mutex);
39179feb521SJan Kara /*
39279feb521SJan Kara * We hold j_checkpoint_mutex so tail cannot change under us.
39379feb521SJan Kara * We don't need any special data guarantees for writing sb
39479feb521SJan Kara * since journal is empty and it is ok for write to be
39579feb521SJan Kara * flushed only with transaction commit.
39679feb521SJan Kara */
39779feb521SJan Kara jbd2_journal_update_sb_log_tail(journal,
39879feb521SJan Kara journal->j_tail_sequence,
3996a3afb6aSZhang Yi journal->j_tail, 0);
400a78bb11dSJan Kara mutex_unlock(&journal->j_checkpoint_mutex);
401470decc6SDave Kleikamp } else {
402cb3b3bf2SJan Kara jbd2_debug(3, "superblock not updated\n");
403470decc6SDave Kleikamp }
404470decc6SDave Kleikamp
405470decc6SDave Kleikamp J_ASSERT(journal->j_running_transaction != NULL);
406470decc6SDave Kleikamp J_ASSERT(journal->j_committing_transaction == NULL);
407470decc6SDave Kleikamp
408ff780b91SHarshad Shirwadkar write_lock(&journal->j_state_lock);
409ff780b91SHarshad Shirwadkar journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
410ff780b91SHarshad Shirwadkar while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
411ff780b91SHarshad Shirwadkar DEFINE_WAIT(wait);
412ff780b91SHarshad Shirwadkar
413ff780b91SHarshad Shirwadkar prepare_to_wait(&journal->j_fc_wait, &wait,
414ff780b91SHarshad Shirwadkar TASK_UNINTERRUPTIBLE);
415ff780b91SHarshad Shirwadkar write_unlock(&journal->j_state_lock);
416ff780b91SHarshad Shirwadkar schedule();
417ff780b91SHarshad Shirwadkar write_lock(&journal->j_state_lock);
418ff780b91SHarshad Shirwadkar finish_wait(&journal->j_fc_wait, &wait);
419cc80586aSHarshad Shirwadkar /*
420cc80586aSHarshad Shirwadkar * TODO: by blocking fast commits here, we are increasing
421cc80586aSHarshad Shirwadkar * fsync() latency slightly. Strictly speaking, we don't need
422cc80586aSHarshad Shirwadkar * to block fast commits until the transaction enters T_FLUSH
423cc80586aSHarshad Shirwadkar * state. So an optimization is possible where we block new fast
424cc80586aSHarshad Shirwadkar * commits here and wait for existing ones to complete
425cc80586aSHarshad Shirwadkar * just before we enter T_FLUSH. That way, the existing fast
426cc80586aSHarshad Shirwadkar * commits and this full commit can proceed parallely.
427cc80586aSHarshad Shirwadkar */
428ff780b91SHarshad Shirwadkar }
429ff780b91SHarshad Shirwadkar write_unlock(&journal->j_state_lock);
430ff780b91SHarshad Shirwadkar
431470decc6SDave Kleikamp commit_transaction = journal->j_running_transaction;
432470decc6SDave Kleikamp
433879c5e6bSTheodore Ts'o trace_jbd2_start_commit(journal, commit_transaction);
434cb3b3bf2SJan Kara jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
435470decc6SDave Kleikamp commit_transaction->t_tid);
436470decc6SDave Kleikamp
437a931da6aSTheodore Ts'o write_lock(&journal->j_state_lock);
438ff780b91SHarshad Shirwadkar journal->j_fc_off = 0;
4393ca841c1SPaul Gortmaker J_ASSERT(commit_transaction->t_state == T_RUNNING);
440470decc6SDave Kleikamp commit_transaction->t_state = T_LOCKED;
441470decc6SDave Kleikamp
442879c5e6bSTheodore Ts'o trace_jbd2_commit_locking(journal, commit_transaction);
443bf699327STheodore Ts'o stats.run.rs_wait = commit_transaction->t_max_wait;
4449fff24aaSTheodore Ts'o stats.run.rs_request_delay = 0;
445bf699327STheodore Ts'o stats.run.rs_locked = jiffies;
4469fff24aaSTheodore Ts'o if (commit_transaction->t_requested)
4479fff24aaSTheodore Ts'o stats.run.rs_request_delay =
4489fff24aaSTheodore Ts'o jbd2_time_diff(commit_transaction->t_requested,
4499fff24aaSTheodore Ts'o stats.run.rs_locked);
450bf699327STheodore Ts'o stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
451bf699327STheodore Ts'o stats.run.rs_locked);
4528e85fb3fSJohann Lombardi
4534f981868SRitesh Harjani // waits for any t_updates to finish
4544f981868SRitesh Harjani jbd2_journal_wait_updates(journal);
455470decc6SDave Kleikamp
45696f1e097SJan Kara commit_transaction->t_state = T_SWITCH;
457470decc6SDave Kleikamp
458a51dca9cSTheodore Ts'o J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
459470decc6SDave Kleikamp journal->j_max_transaction_buffers);
460470decc6SDave Kleikamp
461470decc6SDave Kleikamp /*
462470decc6SDave Kleikamp * First thing we are allowed to do is to discard any remaining
463470decc6SDave Kleikamp * BJ_Reserved buffers. Note, it is _not_ permissible to assume
464470decc6SDave Kleikamp * that there are no such buffers: if a large filesystem
465470decc6SDave Kleikamp * operation like a truncate needs to split itself over multiple
466f7f4bccbSMingming Cao * transactions, then it may try to do a jbd2_journal_restart() while
467470decc6SDave Kleikamp * there are still BJ_Reserved buffers outstanding. These must
468470decc6SDave Kleikamp * be released cleanly from the current transaction.
469470decc6SDave Kleikamp *
470470decc6SDave Kleikamp * In this case, the filesystem must still reserve write access
471470decc6SDave Kleikamp * again before modifying the buffer in the new transaction, but
472470decc6SDave Kleikamp * we do not require it to remember exactly which old buffers it
473470decc6SDave Kleikamp * has reserved. This is consistent with the existing behaviour
474f7f4bccbSMingming Cao * that multiple jbd2_journal_get_write_access() calls to the same
47525985edcSLucas De Marchi * buffer are perfectly permissible.
47623e3d7f7SYe Bin * We use journal->j_state_lock here to serialize processing of
47723e3d7f7SYe Bin * t_reserved_list with eviction of buffers from journal_unmap_buffer().
478470decc6SDave Kleikamp */
479470decc6SDave Kleikamp while (commit_transaction->t_reserved_list) {
480470decc6SDave Kleikamp jh = commit_transaction->t_reserved_list;
481470decc6SDave Kleikamp JBUFFER_TRACE(jh, "reserved, unused: refile");
482470decc6SDave Kleikamp /*
483f7f4bccbSMingming Cao * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
484470decc6SDave Kleikamp * leave undo-committed data.
485470decc6SDave Kleikamp */
486470decc6SDave Kleikamp if (jh->b_committed_data) {
487470decc6SDave Kleikamp struct buffer_head *bh = jh2bh(jh);
488470decc6SDave Kleikamp
48946417064SThomas Gleixner spin_lock(&jh->b_state_lock);
490af1e76d6SMingming Cao jbd2_free(jh->b_committed_data, bh->b_size);
491470decc6SDave Kleikamp jh->b_committed_data = NULL;
49246417064SThomas Gleixner spin_unlock(&jh->b_state_lock);
493470decc6SDave Kleikamp }
494f7f4bccbSMingming Cao jbd2_journal_refile_buffer(journal, jh);
495470decc6SDave Kleikamp }
496470decc6SDave Kleikamp
49723e3d7f7SYe Bin write_unlock(&journal->j_state_lock);
498470decc6SDave Kleikamp /*
499470decc6SDave Kleikamp * Now try to drop any written-back buffers from the journal's
500470decc6SDave Kleikamp * checkpoint lists. We do this *before* commit because it potentially
501470decc6SDave Kleikamp * frees some memory
502470decc6SDave Kleikamp */
503470decc6SDave Kleikamp spin_lock(&journal->j_list_lock);
50426770a71SYe Bin __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
505470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock);
506470decc6SDave Kleikamp
507cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 1\n");
508470decc6SDave Kleikamp
509470decc6SDave Kleikamp /*
5101ba37268SYongqiang Yang * Clear revoked flag to reflect there is no revoked buffers
5111ba37268SYongqiang Yang * in the next transaction which is going to be started.
5121ba37268SYongqiang Yang */
5131ba37268SYongqiang Yang jbd2_clear_buffer_revoked_flags(journal);
5141ba37268SYongqiang Yang
5151ba37268SYongqiang Yang /*
516470decc6SDave Kleikamp * Switch to a new revoke table.
517470decc6SDave Kleikamp */
518f7f4bccbSMingming Cao jbd2_journal_switch_revoke_table(journal);
519470decc6SDave Kleikamp
520a89573ceSZhang Yi write_lock(&journal->j_state_lock);
5218f7d89f3SJan Kara /*
5228f7d89f3SJan Kara * Reserved credits cannot be claimed anymore, free them
5238f7d89f3SJan Kara */
5248f7d89f3SJan Kara atomic_sub(atomic_read(&journal->j_reserved_credits),
5258f7d89f3SJan Kara &commit_transaction->t_outstanding_credits);
5268f7d89f3SJan Kara
527879c5e6bSTheodore Ts'o trace_jbd2_commit_flushing(journal, commit_transaction);
528bf699327STheodore Ts'o stats.run.rs_flushing = jiffies;
529bf699327STheodore Ts'o stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
530bf699327STheodore Ts'o stats.run.rs_flushing);
5318e85fb3fSJohann Lombardi
532470decc6SDave Kleikamp commit_transaction->t_state = T_FLUSH;
533470decc6SDave Kleikamp journal->j_committing_transaction = commit_transaction;
534470decc6SDave Kleikamp journal->j_running_transaction = NULL;
535e07f7183SJosef Bacik start_time = ktime_get();
536470decc6SDave Kleikamp commit_transaction->t_log_start = journal->j_head;
53734fc8768SAndrew Perepechko wake_up_all(&journal->j_wait_transaction_locked);
538a931da6aSTheodore Ts'o write_unlock(&journal->j_state_lock);
539470decc6SDave Kleikamp
540cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 2a\n");
541470decc6SDave Kleikamp
542470decc6SDave Kleikamp /*
543470decc6SDave Kleikamp * Now start flushing things to disk, in the order they appear
544470decc6SDave Kleikamp * on the transaction lists. Data blocks go first.
545470decc6SDave Kleikamp */
546cd1aac32SAneesh Kumar K.V err = journal_submit_data_buffers(journal, commit_transaction);
547c851ed54SJan Kara if (err)
548c851ed54SJan Kara jbd2_journal_abort(journal, err);
549470decc6SDave Kleikamp
55082f04ab4SJens Axboe blk_start_plug(&plug);
5519bcf976cSJan Kara jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
552470decc6SDave Kleikamp
553cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 2b\n");
554470decc6SDave Kleikamp
555470decc6SDave Kleikamp /*
556470decc6SDave Kleikamp * Way to go: we have now written out all of the data for a
557470decc6SDave Kleikamp * transaction! Now comes the tricky part: we need to write out
558470decc6SDave Kleikamp * metadata. Loop over the transaction's entire buffer list:
559470decc6SDave Kleikamp */
560a931da6aSTheodore Ts'o write_lock(&journal->j_state_lock);
561470decc6SDave Kleikamp commit_transaction->t_state = T_COMMIT;
562a931da6aSTheodore Ts'o write_unlock(&journal->j_state_lock);
563470decc6SDave Kleikamp
564879c5e6bSTheodore Ts'o trace_jbd2_commit_logging(journal, commit_transaction);
565bf699327STheodore Ts'o stats.run.rs_logging = jiffies;
566bf699327STheodore Ts'o stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
567bf699327STheodore Ts'o stats.run.rs_logging);
5689f356e5aSJan Kara stats.run.rs_blocks = commit_transaction->t_nr_buffers;
569bf699327STheodore Ts'o stats.run.rs_blocks_logged = 0;
5708e85fb3fSJohann Lombardi
5711dfc3220SJosef Bacik J_ASSERT(commit_transaction->t_nr_buffers <=
572a51dca9cSTheodore Ts'o atomic_read(&commit_transaction->t_outstanding_credits));
5731dfc3220SJosef Bacik
574470decc6SDave Kleikamp bufs = 0;
575e5a120aeSJan Kara descriptor = NULL;
576470decc6SDave Kleikamp while (commit_transaction->t_buffers) {
577470decc6SDave Kleikamp
578470decc6SDave Kleikamp /* Find the next buffer to be journaled... */
579470decc6SDave Kleikamp
580470decc6SDave Kleikamp jh = commit_transaction->t_buffers;
581470decc6SDave Kleikamp
582470decc6SDave Kleikamp /* If we're in abort mode, we just un-journal the buffer and
5837ad7445fSHidehiro Kawai release it. */
584470decc6SDave Kleikamp
585470decc6SDave Kleikamp if (is_journal_aborted(journal)) {
5867ad7445fSHidehiro Kawai clear_buffer_jbddirty(jh2bh(jh));
587470decc6SDave Kleikamp JBUFFER_TRACE(jh, "journal is aborting: refile");
588e06c8227SJoel Becker jbd2_buffer_abort_trigger(jh,
589e06c8227SJoel Becker jh->b_frozen_data ?
590e06c8227SJoel Becker jh->b_frozen_triggers :
591e06c8227SJoel Becker jh->b_triggers);
592f7f4bccbSMingming Cao jbd2_journal_refile_buffer(journal, jh);
593470decc6SDave Kleikamp /* If that was the last one, we need to clean up
594470decc6SDave Kleikamp * any descriptor buffers which may have been
595470decc6SDave Kleikamp * already allocated, even if we are now
596470decc6SDave Kleikamp * aborting. */
597470decc6SDave Kleikamp if (!commit_transaction->t_buffers)
598470decc6SDave Kleikamp goto start_journal_io;
599470decc6SDave Kleikamp continue;
600470decc6SDave Kleikamp }
601470decc6SDave Kleikamp
602470decc6SDave Kleikamp /* Make sure we have a descriptor block in which to
603470decc6SDave Kleikamp record the metadata buffer. */
604470decc6SDave Kleikamp
605470decc6SDave Kleikamp if (!descriptor) {
606470decc6SDave Kleikamp J_ASSERT (bufs == 0);
607470decc6SDave Kleikamp
608cb3b3bf2SJan Kara jbd2_debug(4, "JBD2: get descriptor\n");
609470decc6SDave Kleikamp
61032ab6715SJan Kara descriptor = jbd2_journal_get_descriptor_buffer(
61132ab6715SJan Kara commit_transaction,
61232ab6715SJan Kara JBD2_DESCRIPTOR_BLOCK);
613470decc6SDave Kleikamp if (!descriptor) {
614a7fa2bafSJan Kara jbd2_journal_abort(journal, -EIO);
615470decc6SDave Kleikamp continue;
616470decc6SDave Kleikamp }
617470decc6SDave Kleikamp
618cb3b3bf2SJan Kara jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
619e5a120aeSJan Kara (unsigned long long)descriptor->b_blocknr,
620e5a120aeSJan Kara descriptor->b_data);
621e5a120aeSJan Kara tagp = &descriptor->b_data[sizeof(journal_header_t)];
622e5a120aeSJan Kara space_left = descriptor->b_size -
623e5a120aeSJan Kara sizeof(journal_header_t);
624470decc6SDave Kleikamp first_tag = 1;
625e5a120aeSJan Kara set_buffer_jwrite(descriptor);
626e5a120aeSJan Kara set_buffer_dirty(descriptor);
627e5a120aeSJan Kara wbuf[bufs++] = descriptor;
628470decc6SDave Kleikamp
629470decc6SDave Kleikamp /* Record it so that we can wait for IO
630470decc6SDave Kleikamp completion later */
631e5a120aeSJan Kara BUFFER_TRACE(descriptor, "ph3: file as descriptor");
632e5a120aeSJan Kara jbd2_file_log_bh(&log_bufs, descriptor);
633470decc6SDave Kleikamp }
634470decc6SDave Kleikamp
635470decc6SDave Kleikamp /* Where is the buffer to be written? */
636470decc6SDave Kleikamp
637f7f4bccbSMingming Cao err = jbd2_journal_next_log_block(journal, &blocknr);
638470decc6SDave Kleikamp /* If the block mapping failed, just abandon the buffer
639470decc6SDave Kleikamp and repeat this loop: we'll fall into the
640470decc6SDave Kleikamp refile-on-abort condition above. */
641470decc6SDave Kleikamp if (err) {
642a7fa2bafSJan Kara jbd2_journal_abort(journal, err);
643470decc6SDave Kleikamp continue;
644470decc6SDave Kleikamp }
645470decc6SDave Kleikamp
646470decc6SDave Kleikamp /*
647470decc6SDave Kleikamp * start_this_handle() uses t_outstanding_credits to determine
6480db45889SJan Kara * the free space in the log.
649470decc6SDave Kleikamp */
650a51dca9cSTheodore Ts'o atomic_dec(&commit_transaction->t_outstanding_credits);
651470decc6SDave Kleikamp
652470decc6SDave Kleikamp /* Bump b_count to prevent truncate from stumbling over
653470decc6SDave Kleikamp the shadowed buffer! @@@ This can go if we ever get
654f5113effSJan Kara rid of the shadow pairing of buffers. */
655470decc6SDave Kleikamp atomic_inc(&jh2bh(jh)->b_count);
656470decc6SDave Kleikamp
657470decc6SDave Kleikamp /*
658f5113effSJan Kara * Make a temporary IO buffer with which to write it out
659f5113effSJan Kara * (this will requeue the metadata buffer to BJ_Shadow).
660470decc6SDave Kleikamp */
661f5113effSJan Kara set_bit(BH_JWrite, &jh2bh(jh)->b_state);
662470decc6SDave Kleikamp JBUFFER_TRACE(jh, "ph3: write metadata");
663abe48a52SKemeng Shi escape = jbd2_journal_write_metadata_buffer(commit_transaction,
664f5113effSJan Kara jh, &wbuf[bufs], blocknr);
665f5113effSJan Kara jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
666470decc6SDave Kleikamp
667470decc6SDave Kleikamp /* Record the new block's tag in the current descriptor
668470decc6SDave Kleikamp buffer */
669470decc6SDave Kleikamp
670470decc6SDave Kleikamp tag_flag = 0;
671abe48a52SKemeng Shi if (escape)
672f7f4bccbSMingming Cao tag_flag |= JBD2_FLAG_ESCAPE;
673470decc6SDave Kleikamp if (!first_tag)
674f7f4bccbSMingming Cao tag_flag |= JBD2_FLAG_SAME_UUID;
675470decc6SDave Kleikamp
676470decc6SDave Kleikamp tag = (journal_block_tag_t *) tagp;
677db9ee220SDarrick J. Wong write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
6788f888ef8SDarrick J. Wong tag->t_flags = cpu_to_be16(tag_flag);
679f5113effSJan Kara jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
680c3900875SDarrick J. Wong commit_transaction->t_tid);
681b517bea1SZach Brown tagp += tag_bytes;
682b517bea1SZach Brown space_left -= tag_bytes;
683f5113effSJan Kara bufs++;
684470decc6SDave Kleikamp
685470decc6SDave Kleikamp if (first_tag) {
686470decc6SDave Kleikamp memcpy (tagp, journal->j_uuid, 16);
687470decc6SDave Kleikamp tagp += 16;
688470decc6SDave Kleikamp space_left -= 16;
689470decc6SDave Kleikamp first_tag = 0;
690470decc6SDave Kleikamp }
691470decc6SDave Kleikamp
692470decc6SDave Kleikamp /* If there's no more to do, or if the descriptor is full,
693470decc6SDave Kleikamp let the IO rip! */
694470decc6SDave Kleikamp
695470decc6SDave Kleikamp if (bufs == journal->j_wbufsize ||
696470decc6SDave Kleikamp commit_transaction->t_buffers == NULL ||
6973caa487fSDarrick J. Wong space_left < tag_bytes + 16 + csum_size) {
698470decc6SDave Kleikamp
699cb3b3bf2SJan Kara jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
700470decc6SDave Kleikamp
701470decc6SDave Kleikamp /* Write an end-of-descriptor marker before
702470decc6SDave Kleikamp submitting the IOs. "tag" still points to
703470decc6SDave Kleikamp the last tag we set up. */
704470decc6SDave Kleikamp
7058f888ef8SDarrick J. Wong tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
706470decc6SDave Kleikamp start_journal_io:
7076e876c3dSluojiajun if (descriptor)
7086e876c3dSluojiajun jbd2_descriptor_block_csum_set(journal,
7096e876c3dSluojiajun descriptor);
7106e876c3dSluojiajun
711470decc6SDave Kleikamp for (i = 0; i < bufs; i++) {
712470decc6SDave Kleikamp struct buffer_head *bh = wbuf[i];
7136a3afb6aSZhang Yi
714818d276cSGirish Shilamkar /*
715818d276cSGirish Shilamkar * Compute checksum.
716818d276cSGirish Shilamkar */
71756316a0dSDarrick J. Wong if (jbd2_has_feature_checksum(journal)) {
718818d276cSGirish Shilamkar crc32_sum =
719818d276cSGirish Shilamkar jbd2_checksum_data(crc32_sum, bh);
720818d276cSGirish Shilamkar }
721818d276cSGirish Shilamkar
722470decc6SDave Kleikamp lock_buffer(bh);
723470decc6SDave Kleikamp clear_buffer_dirty(bh);
724470decc6SDave Kleikamp set_buffer_uptodate(bh);
725470decc6SDave Kleikamp bh->b_end_io = journal_end_buffer_io_sync;
7266a3afb6aSZhang Yi submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS,
7276a3afb6aSZhang Yi bh);
728470decc6SDave Kleikamp }
729470decc6SDave Kleikamp cond_resched();
730470decc6SDave Kleikamp
731470decc6SDave Kleikamp /* Force a new descriptor to be generated next
732470decc6SDave Kleikamp time round the loop. */
733470decc6SDave Kleikamp descriptor = NULL;
734470decc6SDave Kleikamp bufs = 0;
735470decc6SDave Kleikamp }
736470decc6SDave Kleikamp }
737470decc6SDave Kleikamp
738c851ed54SJan Kara err = journal_finish_inode_data_buffers(journal, commit_transaction);
739e9e34f4eSHidehiro Kawai if (err) {
740e9e34f4eSHidehiro Kawai printk(KERN_WARNING
7416e969ef3SBaokun Li "JBD2: Detected IO errors %d while flushing file data on %s\n",
7426e969ef3SBaokun Li err, journal->j_devname);
743e9e34f4eSHidehiro Kawai err = 0;
744e9e34f4eSHidehiro Kawai }
745c851ed54SJan Kara
7463339578fSJan Kara /*
7473339578fSJan Kara * Get current oldest transaction in the log before we issue flush
7483339578fSJan Kara * to the filesystem device. After the flush we can be sure that
7493339578fSJan Kara * blocks of all older transactions are checkpointed to persistent
7503339578fSJan Kara * storage and we will be safe to update journal start in the
7513339578fSJan Kara * superblock with the numbers we get here.
7523339578fSJan Kara */
7533339578fSJan Kara update_tail =
7543339578fSJan Kara jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
7553339578fSJan Kara
756bbd2be36SJan Kara write_lock(&journal->j_state_lock);
7573339578fSJan Kara if (update_tail) {
7583339578fSJan Kara long freed = first_block - journal->j_tail;
7593339578fSJan Kara
7603339578fSJan Kara if (first_block < journal->j_tail)
7613339578fSJan Kara freed += journal->j_last - journal->j_first;
7623339578fSJan Kara /* Update tail only if we free significant amount of space */
7634aa99c71SJan Kara if (freed < journal->j_max_transaction_buffers)
7643339578fSJan Kara update_tail = 0;
7653339578fSJan Kara }
766bbd2be36SJan Kara J_ASSERT(commit_transaction->t_state == T_COMMIT);
767bbd2be36SJan Kara commit_transaction->t_state = T_COMMIT_DFLUSH;
768bbd2be36SJan Kara write_unlock(&journal->j_state_lock);
7693339578fSJan Kara
770818d276cSGirish Shilamkar /*
771818d276cSGirish Shilamkar * If the journal is not located on the file system device,
772818d276cSGirish Shilamkar * then we must flush the file system device before we issue
773a0851ea9SZhang Yi * the commit record and update the journal tail sequence.
774818d276cSGirish Shilamkar */
775a0851ea9SZhang Yi if ((commit_transaction->t_need_data_flush || update_tail) &&
776818d276cSGirish Shilamkar (journal->j_fs_dev != journal->j_dev) &&
777818d276cSGirish Shilamkar (journal->j_flags & JBD2_BARRIER))
778c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_fs_dev);
779818d276cSGirish Shilamkar
780818d276cSGirish Shilamkar /* Done it all: now write the commit record asynchronously. */
78156316a0dSDarrick J. Wong if (jbd2_has_feature_async_commit(journal)) {
782818d276cSGirish Shilamkar err = journal_submit_commit_record(journal, commit_transaction,
783470decc6SDave Kleikamp &cbh, crc32_sum);
784470decc6SDave Kleikamp if (err)
785d0a186e0Szhangyi (F) jbd2_journal_abort(journal, err);
786470decc6SDave Kleikamp }
787470decc6SDave Kleikamp
78882f04ab4SJens Axboe blk_finish_plug(&plug);
78982f04ab4SJens Axboe
790470decc6SDave Kleikamp /* Lo and behold: we have just managed to send a transaction to
791470decc6SDave Kleikamp the log. Before we can commit it, wait for the IO so far to
792470decc6SDave Kleikamp complete. Control buffers being written are on the
793470decc6SDave Kleikamp transaction's t_log_list queue, and metadata buffers are on
794f5113effSJan Kara the io_bufs list.
795470decc6SDave Kleikamp
796470decc6SDave Kleikamp Wait for the buffers in reverse order. That way we are
797470decc6SDave Kleikamp less likely to be woken up until all IOs have completed, and
798470decc6SDave Kleikamp so we incur less scheduling load.
799470decc6SDave Kleikamp */
800470decc6SDave Kleikamp
801cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 3\n");
802470decc6SDave Kleikamp
803f5113effSJan Kara while (!list_empty(&io_bufs)) {
804f5113effSJan Kara struct buffer_head *bh = list_entry(io_bufs.prev,
805f5113effSJan Kara struct buffer_head,
806f5113effSJan Kara b_assoc_buffers);
807470decc6SDave Kleikamp
808470decc6SDave Kleikamp wait_on_buffer(bh);
809f5113effSJan Kara cond_resched();
810470decc6SDave Kleikamp
811470decc6SDave Kleikamp if (unlikely(!buffer_uptodate(bh)))
812470decc6SDave Kleikamp err = -EIO;
813f5113effSJan Kara jbd2_unfile_log_bh(bh);
814015c6033SJan Kara stats.run.rs_blocks_logged++;
815470decc6SDave Kleikamp
816470decc6SDave Kleikamp /*
817f5113effSJan Kara * The list contains temporary buffer heads created by
818f5113effSJan Kara * jbd2_journal_write_metadata_buffer().
819470decc6SDave Kleikamp */
820470decc6SDave Kleikamp BUFFER_TRACE(bh, "dumping temporary bh");
821470decc6SDave Kleikamp __brelse(bh);
822470decc6SDave Kleikamp J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
823470decc6SDave Kleikamp free_buffer_head(bh);
824470decc6SDave Kleikamp
825f5113effSJan Kara /* We also have to refile the corresponding shadowed buffer */
826470decc6SDave Kleikamp jh = commit_transaction->t_shadow_list->b_tprev;
827470decc6SDave Kleikamp bh = jh2bh(jh);
828f5113effSJan Kara clear_buffer_jwrite(bh);
829470decc6SDave Kleikamp J_ASSERT_BH(bh, buffer_jbddirty(bh));
830b34090e5SJan Kara J_ASSERT_BH(bh, !buffer_shadow(bh));
831470decc6SDave Kleikamp
832470decc6SDave Kleikamp /* The metadata is now released for reuse, but we need
833470decc6SDave Kleikamp to remember it against this transaction so that when
834470decc6SDave Kleikamp we finally commit, we can do any checkpointing
835470decc6SDave Kleikamp required. */
836470decc6SDave Kleikamp JBUFFER_TRACE(jh, "file as BJ_Forget");
837f7f4bccbSMingming Cao jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
838470decc6SDave Kleikamp JBUFFER_TRACE(jh, "brelse shadowed buffer");
839470decc6SDave Kleikamp __brelse(bh);
840470decc6SDave Kleikamp }
841470decc6SDave Kleikamp
842470decc6SDave Kleikamp J_ASSERT (commit_transaction->t_shadow_list == NULL);
843470decc6SDave Kleikamp
844cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 4\n");
845470decc6SDave Kleikamp
846470decc6SDave Kleikamp /* Here we wait for the revoke record and descriptor record buffers */
847e5a120aeSJan Kara while (!list_empty(&log_bufs)) {
848470decc6SDave Kleikamp struct buffer_head *bh;
849470decc6SDave Kleikamp
850e5a120aeSJan Kara bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
851470decc6SDave Kleikamp wait_on_buffer(bh);
852e5a120aeSJan Kara cond_resched();
853470decc6SDave Kleikamp
854470decc6SDave Kleikamp if (unlikely(!buffer_uptodate(bh)))
855470decc6SDave Kleikamp err = -EIO;
856470decc6SDave Kleikamp
857470decc6SDave Kleikamp BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
858470decc6SDave Kleikamp clear_buffer_jwrite(bh);
859e5a120aeSJan Kara jbd2_unfile_log_bh(bh);
860015c6033SJan Kara stats.run.rs_blocks_logged++;
861470decc6SDave Kleikamp __brelse(bh); /* One for getblk */
862470decc6SDave Kleikamp /* AKPM: bforget here */
863470decc6SDave Kleikamp }
864470decc6SDave Kleikamp
86577e841deSHidehiro Kawai if (err)
86677e841deSHidehiro Kawai jbd2_journal_abort(journal, err);
86777e841deSHidehiro Kawai
868cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 5\n");
869bbd2be36SJan Kara write_lock(&journal->j_state_lock);
870bbd2be36SJan Kara J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
871bbd2be36SJan Kara commit_transaction->t_state = T_COMMIT_JFLUSH;
872bbd2be36SJan Kara write_unlock(&journal->j_state_lock);
873470decc6SDave Kleikamp
87456316a0dSDarrick J. Wong if (!jbd2_has_feature_async_commit(journal)) {
875818d276cSGirish Shilamkar err = journal_submit_commit_record(journal, commit_transaction,
876818d276cSGirish Shilamkar &cbh, crc32_sum);
877818d276cSGirish Shilamkar if (err)
878d0a186e0Szhangyi (F) jbd2_journal_abort(journal, err);
879818d276cSGirish Shilamkar }
8806cba611eSZhang Huan if (cbh)
881fd98496fSTheodore Ts'o err = journal_wait_on_commit_record(journal, cbh);
882015c6033SJan Kara stats.run.rs_blocks_logged++;
88356316a0dSDarrick J. Wong if (jbd2_has_feature_async_commit(journal) &&
884f73bee49SJan Kara journal->j_flags & JBD2_BARRIER) {
885c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_dev);
886f73bee49SJan Kara }
887470decc6SDave Kleikamp
888470decc6SDave Kleikamp if (err)
889a7fa2bafSJan Kara jbd2_journal_abort(journal, err);
890470decc6SDave Kleikamp
8919f356e5aSJan Kara WARN_ON_ONCE(
8929f356e5aSJan Kara atomic_read(&commit_transaction->t_outstanding_credits) < 0);
8939f356e5aSJan Kara
8943339578fSJan Kara /*
8953339578fSJan Kara * Now disk caches for filesystem device are flushed so we are safe to
8963339578fSJan Kara * erase checkpointed transactions from the log by updating journal
8973339578fSJan Kara * superblock.
8983339578fSJan Kara */
8993339578fSJan Kara if (update_tail)
9003339578fSJan Kara jbd2_update_log_tail(journal, first_tid, first_block);
9013339578fSJan Kara
902470decc6SDave Kleikamp /* End of a transaction! Finally, we can do checkpoint
903470decc6SDave Kleikamp processing: any buffers committed as a result of this
904470decc6SDave Kleikamp transaction can be removed from any checkpoint list it was on
905470decc6SDave Kleikamp before. */
906470decc6SDave Kleikamp
907cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 6\n");
908470decc6SDave Kleikamp
909c851ed54SJan Kara J_ASSERT(list_empty(&commit_transaction->t_inode_list));
910470decc6SDave Kleikamp J_ASSERT(commit_transaction->t_buffers == NULL);
911470decc6SDave Kleikamp J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
912470decc6SDave Kleikamp J_ASSERT(commit_transaction->t_shadow_list == NULL);
913470decc6SDave Kleikamp
914470decc6SDave Kleikamp restart_loop:
915470decc6SDave Kleikamp /*
916470decc6SDave Kleikamp * As there are other places (journal_unmap_buffer()) adding buffers
917470decc6SDave Kleikamp * to this list we have to be careful and hold the j_list_lock.
918470decc6SDave Kleikamp */
919470decc6SDave Kleikamp spin_lock(&journal->j_list_lock);
920470decc6SDave Kleikamp while (commit_transaction->t_forget) {
921470decc6SDave Kleikamp transaction_t *cp_transaction;
922470decc6SDave Kleikamp struct buffer_head *bh;
923de1b7941SJan Kara int try_to_free = 0;
92493108ebbSJan Kara bool drop_ref;
925470decc6SDave Kleikamp
926470decc6SDave Kleikamp jh = commit_transaction->t_forget;
927470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock);
928470decc6SDave Kleikamp bh = jh2bh(jh);
929de1b7941SJan Kara /*
930de1b7941SJan Kara * Get a reference so that bh cannot be freed before we are
931de1b7941SJan Kara * done with it.
932de1b7941SJan Kara */
933de1b7941SJan Kara get_bh(bh);
93446417064SThomas Gleixner spin_lock(&jh->b_state_lock);
93523e2af35Sdingdinghua J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
936470decc6SDave Kleikamp
937470decc6SDave Kleikamp /*
938470decc6SDave Kleikamp * If there is undo-protected committed data against
939470decc6SDave Kleikamp * this buffer, then we can remove it now. If it is a
940470decc6SDave Kleikamp * buffer needing such protection, the old frozen_data
941470decc6SDave Kleikamp * field now points to a committed version of the
942470decc6SDave Kleikamp * buffer, so rotate that field to the new committed
943470decc6SDave Kleikamp * data.
944470decc6SDave Kleikamp *
945470decc6SDave Kleikamp * Otherwise, we can just throw away the frozen data now.
946e06c8227SJoel Becker *
947e06c8227SJoel Becker * We also know that the frozen data has already fired
948e06c8227SJoel Becker * its triggers if they exist, so we can clear that too.
949470decc6SDave Kleikamp */
950470decc6SDave Kleikamp if (jh->b_committed_data) {
951af1e76d6SMingming Cao jbd2_free(jh->b_committed_data, bh->b_size);
952470decc6SDave Kleikamp jh->b_committed_data = NULL;
953470decc6SDave Kleikamp if (jh->b_frozen_data) {
954470decc6SDave Kleikamp jh->b_committed_data = jh->b_frozen_data;
955470decc6SDave Kleikamp jh->b_frozen_data = NULL;
956e06c8227SJoel Becker jh->b_frozen_triggers = NULL;
957470decc6SDave Kleikamp }
958470decc6SDave Kleikamp } else if (jh->b_frozen_data) {
959af1e76d6SMingming Cao jbd2_free(jh->b_frozen_data, bh->b_size);
960470decc6SDave Kleikamp jh->b_frozen_data = NULL;
961e06c8227SJoel Becker jh->b_frozen_triggers = NULL;
962470decc6SDave Kleikamp }
963470decc6SDave Kleikamp
964470decc6SDave Kleikamp spin_lock(&journal->j_list_lock);
965470decc6SDave Kleikamp cp_transaction = jh->b_cp_transaction;
966470decc6SDave Kleikamp if (cp_transaction) {
967470decc6SDave Kleikamp JBUFFER_TRACE(jh, "remove from old cp transaction");
9688e85fb3fSJohann Lombardi cp_transaction->t_chp_stats.cs_dropped++;
969f7f4bccbSMingming Cao __jbd2_journal_remove_checkpoint(jh);
970470decc6SDave Kleikamp }
971470decc6SDave Kleikamp
972470decc6SDave Kleikamp /* Only re-checkpoint the buffer_head if it is marked
973470decc6SDave Kleikamp * dirty. If the buffer was added to the BJ_Forget list
974f7f4bccbSMingming Cao * by jbd2_journal_forget, it may no longer be dirty and
975470decc6SDave Kleikamp * there's no point in keeping a checkpoint record for
976470decc6SDave Kleikamp * it. */
977470decc6SDave Kleikamp
978b794e7a6SJan Kara /*
9796a66a7deSzhangyi (F) * A buffer which has been freed while still being journaled
9806a66a7deSzhangyi (F) * by a previous transaction, refile the buffer to BJ_Forget of
9816a66a7deSzhangyi (F) * the running transaction. If the just committed transaction
9826a66a7deSzhangyi (F) * contains "add to orphan" operation, we can completely
9836a66a7deSzhangyi (F) * invalidate the buffer now. We are rather through in that
9846a66a7deSzhangyi (F) * since the buffer may be still accessible when blocksize <
9856a66a7deSzhangyi (F) * pagesize and it is attached to the last partial page.
986b794e7a6SJan Kara */
9876a66a7deSzhangyi (F) if (buffer_freed(bh) && !jh->b_next_transaction) {
988c96dceeaSzhangyi (F) struct address_space *mapping;
989c96dceeaSzhangyi (F)
990470decc6SDave Kleikamp clear_buffer_freed(bh);
991470decc6SDave Kleikamp clear_buffer_jbddirty(bh);
992c96dceeaSzhangyi (F)
993c96dceeaSzhangyi (F) /*
994c96dceeaSzhangyi (F) * Block device buffers need to stay mapped all the
995c96dceeaSzhangyi (F) * time, so it is enough to clear buffer_jbddirty and
996c96dceeaSzhangyi (F) * buffer_freed bits. For the file mapping buffers (i.e.
997c96dceeaSzhangyi (F) * journalled data) we need to unmap buffer and clear
998c96dceeaSzhangyi (F) * more bits. We also need to be careful about the check
999c96dceeaSzhangyi (F) * because the data page mapping can get cleared under
1000780f66e5Szhangyi (F) * our hands. Note that if mapping == NULL, we don't
1001780f66e5Szhangyi (F) * need to make buffer unmapped because the page is
1002780f66e5Szhangyi (F) * already detached from the mapping and buffers cannot
1003780f66e5Szhangyi (F) * get reused.
1004c96dceeaSzhangyi (F) */
10050d22fe2fSMatthew Wilcox (Oracle) mapping = READ_ONCE(bh->b_folio->mapping);
1006c96dceeaSzhangyi (F) if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
1007b794e7a6SJan Kara clear_buffer_mapped(bh);
1008b794e7a6SJan Kara clear_buffer_new(bh);
1009b794e7a6SJan Kara clear_buffer_req(bh);
1010b794e7a6SJan Kara bh->b_bdev = NULL;
1011b794e7a6SJan Kara }
1012c96dceeaSzhangyi (F) }
1013470decc6SDave Kleikamp
1014470decc6SDave Kleikamp if (buffer_jbddirty(bh)) {
1015470decc6SDave Kleikamp JBUFFER_TRACE(jh, "add to new checkpointing trans");
1016f7f4bccbSMingming Cao __jbd2_journal_insert_checkpoint(jh, commit_transaction);
10177ad7445fSHidehiro Kawai if (is_journal_aborted(journal))
10187ad7445fSHidehiro Kawai clear_buffer_jbddirty(bh);
1019470decc6SDave Kleikamp } else {
1020470decc6SDave Kleikamp J_ASSERT_BH(bh, !buffer_dirty(bh));
1021de1b7941SJan Kara /*
1022de1b7941SJan Kara * The buffer on BJ_Forget list and not jbddirty means
1023470decc6SDave Kleikamp * it has been freed by this transaction and hence it
1024470decc6SDave Kleikamp * could not have been reallocated until this
1025470decc6SDave Kleikamp * transaction has committed. *BUT* it could be
1026470decc6SDave Kleikamp * reallocated once we have written all the data to
1027470decc6SDave Kleikamp * disk and before we process the buffer on BJ_Forget
1028de1b7941SJan Kara * list.
1029de1b7941SJan Kara */
1030de1b7941SJan Kara if (!jh->b_next_transaction)
1031de1b7941SJan Kara try_to_free = 1;
1032470decc6SDave Kleikamp }
1033de1b7941SJan Kara JBUFFER_TRACE(jh, "refile or unfile buffer");
103493108ebbSJan Kara drop_ref = __jbd2_journal_refile_buffer(jh);
103546417064SThomas Gleixner spin_unlock(&jh->b_state_lock);
103693108ebbSJan Kara if (drop_ref)
103793108ebbSJan Kara jbd2_journal_put_journal_head(jh);
1038de1b7941SJan Kara if (try_to_free)
1039de1b7941SJan Kara release_buffer_page(bh); /* Drops bh reference */
1040de1b7941SJan Kara else
1041de1b7941SJan Kara __brelse(bh);
1042470decc6SDave Kleikamp cond_resched_lock(&journal->j_list_lock);
1043470decc6SDave Kleikamp }
1044470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock);
1045470decc6SDave Kleikamp /*
1046f5a7a6b0SJan Kara * This is a bit sleazy. We use j_list_lock to protect transition
1047f5a7a6b0SJan Kara * of a transaction into T_FINISHED state and calling
1048f5a7a6b0SJan Kara * __jbd2_journal_drop_transaction(). Otherwise we could race with
1049f5a7a6b0SJan Kara * other checkpointing code processing the transaction...
1050470decc6SDave Kleikamp */
1051a931da6aSTheodore Ts'o write_lock(&journal->j_state_lock);
1052470decc6SDave Kleikamp spin_lock(&journal->j_list_lock);
1053470decc6SDave Kleikamp /*
1054470decc6SDave Kleikamp * Now recheck if some buffers did not get attached to the transaction
1055470decc6SDave Kleikamp * while the lock was dropped...
1056470decc6SDave Kleikamp */
1057470decc6SDave Kleikamp if (commit_transaction->t_forget) {
1058470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock);
1059a931da6aSTheodore Ts'o write_unlock(&journal->j_state_lock);
1060470decc6SDave Kleikamp goto restart_loop;
1061470decc6SDave Kleikamp }
1062470decc6SDave Kleikamp
1063d4e839d4STheodore Ts'o /* Add the transaction to the checkpoint list
1064d4e839d4STheodore Ts'o * __journal_remove_checkpoint() can not destroy transaction
1065d4e839d4STheodore Ts'o * under us because it is not marked as T_FINISHED yet */
1066d4e839d4STheodore Ts'o if (journal->j_checkpoint_transactions == NULL) {
1067d4e839d4STheodore Ts'o journal->j_checkpoint_transactions = commit_transaction;
1068d4e839d4STheodore Ts'o commit_transaction->t_cpnext = commit_transaction;
1069d4e839d4STheodore Ts'o commit_transaction->t_cpprev = commit_transaction;
1070d4e839d4STheodore Ts'o } else {
1071d4e839d4STheodore Ts'o commit_transaction->t_cpnext =
1072d4e839d4STheodore Ts'o journal->j_checkpoint_transactions;
1073d4e839d4STheodore Ts'o commit_transaction->t_cpprev =
1074d4e839d4STheodore Ts'o commit_transaction->t_cpnext->t_cpprev;
1075d4e839d4STheodore Ts'o commit_transaction->t_cpnext->t_cpprev =
1076d4e839d4STheodore Ts'o commit_transaction;
1077d4e839d4STheodore Ts'o commit_transaction->t_cpprev->t_cpnext =
1078d4e839d4STheodore Ts'o commit_transaction;
1079d4e839d4STheodore Ts'o }
1080d4e839d4STheodore Ts'o spin_unlock(&journal->j_list_lock);
1081d4e839d4STheodore Ts'o
1082470decc6SDave Kleikamp /* Done with this transaction! */
1083470decc6SDave Kleikamp
1084cb3b3bf2SJan Kara jbd2_debug(3, "JBD2: commit phase 7\n");
1085470decc6SDave Kleikamp
1086bbd2be36SJan Kara J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
1087470decc6SDave Kleikamp
10888e85fb3fSJohann Lombardi commit_transaction->t_start = jiffies;
1089bf699327STheodore Ts'o stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
10908e85fb3fSJohann Lombardi commit_transaction->t_start);
10918e85fb3fSJohann Lombardi
10928e85fb3fSJohann Lombardi /*
1093bf699327STheodore Ts'o * File the transaction statistics
10948e85fb3fSJohann Lombardi */
10958e85fb3fSJohann Lombardi stats.ts_tid = commit_transaction->t_tid;
10968dd42046STheodore Ts'o stats.run.rs_handle_count =
10978dd42046STheodore Ts'o atomic_read(&commit_transaction->t_handle_count);
1098bf699327STheodore Ts'o trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1099bf699327STheodore Ts'o commit_transaction->t_tid, &stats.run);
110042cf3452STheodore Ts'o stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
11018e85fb3fSJohann Lombardi
1102794446c6SDmitry Monakhov commit_transaction->t_state = T_COMMIT_CALLBACK;
1103470decc6SDave Kleikamp J_ASSERT(commit_transaction == journal->j_committing_transaction);
11047c73ddb7SZhang Yi WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid);
1105470decc6SDave Kleikamp journal->j_committing_transaction = NULL;
1106e07f7183SJosef Bacik commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1107470decc6SDave Kleikamp
1108e07f7183SJosef Bacik /*
1109e07f7183SJosef Bacik * weight the commit time higher than the average time so we don't
1110e07f7183SJosef Bacik * react too strongly to vast changes in the commit time
1111e07f7183SJosef Bacik */
1112e07f7183SJosef Bacik if (likely(journal->j_average_commit_time))
1113e07f7183SJosef Bacik journal->j_average_commit_time = (commit_time +
1114e07f7183SJosef Bacik journal->j_average_commit_time*3) / 4;
1115e07f7183SJosef Bacik else
1116e07f7183SJosef Bacik journal->j_average_commit_time = commit_time;
1117794446c6SDmitry Monakhov
1118a931da6aSTheodore Ts'o write_unlock(&journal->j_state_lock);
11196c20ec85STheodore Ts'o
1120fb68407bSAneesh Kumar K.V if (journal->j_commit_callback)
1121fb68407bSAneesh Kumar K.V journal->j_commit_callback(journal, commit_transaction);
1122ff780b91SHarshad Shirwadkar if (journal->j_fc_cleanup_callback)
1123e85c81baSXin Yin journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
1124fb68407bSAneesh Kumar K.V
1125879c5e6bSTheodore Ts'o trace_jbd2_end_commit(journal, commit_transaction);
1126cb3b3bf2SJan Kara jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
1127470decc6SDave Kleikamp journal->j_commit_sequence, journal->j_tail_sequence);
1128470decc6SDave Kleikamp
1129794446c6SDmitry Monakhov write_lock(&journal->j_state_lock);
1130ff780b91SHarshad Shirwadkar journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
1131ff780b91SHarshad Shirwadkar journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
1132794446c6SDmitry Monakhov spin_lock(&journal->j_list_lock);
1133794446c6SDmitry Monakhov commit_transaction->t_state = T_FINISHED;
1134d4e839d4STheodore Ts'o /* Check if the transaction can be dropped now that we are finished */
1135be222553SZhang Yi if (commit_transaction->t_checkpoint_list == NULL) {
1136794446c6SDmitry Monakhov __jbd2_journal_drop_transaction(journal, commit_transaction);
1137794446c6SDmitry Monakhov jbd2_journal_free_transaction(commit_transaction);
1138794446c6SDmitry Monakhov }
1139794446c6SDmitry Monakhov spin_unlock(&journal->j_list_lock);
1140794446c6SDmitry Monakhov write_unlock(&journal->j_state_lock);
1141470decc6SDave Kleikamp wake_up(&journal->j_wait_done_commit);
1142ff780b91SHarshad Shirwadkar wake_up(&journal->j_fc_wait);
114342cf3452STheodore Ts'o
114442cf3452STheodore Ts'o /*
114542cf3452STheodore Ts'o * Calculate overall stats
114642cf3452STheodore Ts'o */
114742cf3452STheodore Ts'o spin_lock(&journal->j_history_lock);
114842cf3452STheodore Ts'o journal->j_stats.ts_tid++;
114942cf3452STheodore Ts'o journal->j_stats.ts_requested += stats.ts_requested;
115042cf3452STheodore Ts'o journal->j_stats.run.rs_wait += stats.run.rs_wait;
115142cf3452STheodore Ts'o journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
115242cf3452STheodore Ts'o journal->j_stats.run.rs_running += stats.run.rs_running;
115342cf3452STheodore Ts'o journal->j_stats.run.rs_locked += stats.run.rs_locked;
115442cf3452STheodore Ts'o journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
115542cf3452STheodore Ts'o journal->j_stats.run.rs_logging += stats.run.rs_logging;
115642cf3452STheodore Ts'o journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
115742cf3452STheodore Ts'o journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
115842cf3452STheodore Ts'o journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
115942cf3452STheodore Ts'o spin_unlock(&journal->j_history_lock);
1160470decc6SDave Kleikamp }
1161