bcachefs: Split out journal pins by btree level
This lets us flush the journal to go read-only more effectively. Flushing the journal and going read-only requires halting mutually recursive processes, which strictly speaking are not guaranteed to terminate. Flushing btree node journal pins will kick off a btree node write, and btree node writes on completion must do another btree update to the parent node to update the 'sectors_written' field for that node's key. If the parent node is full and requires a split or compaction, that's going to generate a whole bunch of additional btree updates - alloc info, LRU btree, and more - which then have to be flushed, and the cycle repeats. This process will terminate much more effectively if we tweak journal reclaim to flush btree updates leaf to root: i.e., don't flush updates for a given btree node (kicking off a write, and consuming space within that node up to the next block boundary) if there might still be unflushed updates in child nodes. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
1c316eb57c
commit
1e690efa72
2 changed files with 22 additions and 20 deletions
|
@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
|
|||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
|
||||
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn fn)
|
||||
{
|
||||
if (fn == bch2_btree_node_flush0 ||
|
||||
fn == bch2_btree_node_flush1)
|
||||
return JOURNAL_PIN_TYPE_btree;
|
||||
else if (fn == bch2_btree_key_cache_journal_flush)
|
||||
fn == bch2_btree_node_flush1) {
|
||||
unsigned idx = fn == bch2_btree_node_flush1;
|
||||
struct btree *b = container_of(pin, struct btree, writes[idx].journal);
|
||||
|
||||
return JOURNAL_PIN_TYPE_btree0 - b->c.level;
|
||||
} else if (fn == bch2_btree_key_cache_journal_flush)
|
||||
return JOURNAL_PIN_TYPE_key_cache;
|
||||
else
|
||||
return JOURNAL_PIN_TYPE_other;
|
||||
|
@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
|
|||
|
||||
bool reclaim = __journal_pin_drop(j, dst);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
|
||||
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
|
@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
|||
|
||||
bool reclaim = __journal_pin_drop(j, pin);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
|
||||
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
|
@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
|
|||
spin_lock(&j->lock);
|
||||
/* Pin might have been dropped or rearmed: */
|
||||
if (likely(!err && !j->flush_in_progress_dropped))
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
|
||||
j->flush_in_progress = NULL;
|
||||
j->flush_in_progress_dropped = false;
|
||||
spin_unlock(&j->lock);
|
||||
|
@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
|||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
|
||||
BIT(JOURNAL_PIN_TYPE_key_cache)|
|
||||
BIT(JOURNAL_PIN_TYPE_other))) {
|
||||
*did_work = true;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
|
||||
BIT(JOURNAL_PIN_TYPE_btree))) {
|
||||
*did_work = true;
|
||||
goto unlock;
|
||||
}
|
||||
for (int type = JOURNAL_PIN_TYPE_NR - 1;
|
||||
type >= 0;
|
||||
--type)
|
||||
if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
|
||||
*did_work = true;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (seq_to_flush > journal_cur_seq(j))
|
||||
bch2_journal_entry_close(j);
|
||||
|
|
|
@ -53,7 +53,10 @@ struct journal_buf {
|
|||
*/
|
||||
|
||||
enum journal_pin_type {
|
||||
JOURNAL_PIN_TYPE_btree,
|
||||
JOURNAL_PIN_TYPE_btree3,
|
||||
JOURNAL_PIN_TYPE_btree2,
|
||||
JOURNAL_PIN_TYPE_btree1,
|
||||
JOURNAL_PIN_TYPE_btree0,
|
||||
JOURNAL_PIN_TYPE_key_cache,
|
||||
JOURNAL_PIN_TYPE_other,
|
||||
JOURNAL_PIN_TYPE_NR,
|
||||
|
|
Loading…
Add table
Reference in a new issue