hi!
this time, concurrent block/inode allocation for EXT3 against 2.5.65.
should be applied with ext2-concurrent-balloc because of ext2_set_bit_atomic()
and ext2_clear_bit_atomic().
to see real improvement, you should use 2.5.65-mm4 in which Andrew Morton pushed
BKL'es down into JBD.
1) each group has own spinlock, which is used for group counter
modifications
2) sb->s_free_blocks_count isn't used any more. ext2_statfs() and
find_group_orlov() loop over groups to count free blocks
3) sb->s_free_blocks_count is recalculated at mount/umount/sync_super time
in order to check consistency and to avoid fsck warnings
4) reserved blocks are distributed over last groups
5) ext3_new_block() tries to use non-reserved blocks and if it fails then
tries to use reserved blocks
6) ext3_new_block() and ext3_free_blocks do not modify sb->s_free_blocks,
therefore they do not call mark_buffer_dirty() for superblock's
buffer_head. this should reduce I/O a bit
diff -puNr linux-2.5.65/fs/ext3/balloc.c edited/fs/ext3/balloc.c
--- linux-2.5.65/fs/ext3/balloc.c Thu Feb 20 16:19:06 2003
+++ edited/fs/ext3/balloc.c Mon Mar 24 16:17:40 2003
@@ -118,7 +118,6 @@ void ext3_free_blocks (handle_t *handle,
printk ("ext3_free_blocks: nonexistent device");
return;
}
- lock_super (sb);
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
@@ -184,11 +183,6 @@ do_more:
if (err)
goto error_return;
- BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
- err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
- if (err)
- goto error_return;
-
for (i = 0; i < count; i++) {
/*
* An HJ special. This is expensive...
@@ -208,18 +202,15 @@ do_more:
}
#endif
BUFFER_TRACE(bitmap_bh, "clear bit");
- if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
+ if (!ext3_clear_bit_atomic (&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock,
+ bit + i, bitmap_bh->b_data)) {
ext3_error (sb, __FUNCTION__,
"bit already cleared for block %lu",
block + i);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
- } else {
+ } else
dquot_freed_blocks++;
- gdp->bg_free_blocks_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
- es->s_free_blocks_count =
- cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
- }
+
/* @@@ This prevents newly-allocated data from being
* freed and then reallocated within the same
* transaction.
@@ -244,6 +235,11 @@ do_more:
ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
}
+ spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock);
+ gdp->bg_free_blocks_count =
+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + dquot_freed_blocks);
+ spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock);
+
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
@@ -253,11 +249,6 @@ do_more:
ret = ext3_journal_dirty_metadata(handle, gd_bh);
if (!err) err = ret;
- /* And the superblock */
- BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock");
- ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
- if (!err) err = ret;
-
if (overflow && !err) {
block += count;
count = overflow;
@@ -267,7 +258,6 @@ do_more:
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, err);
- unlock_super(sb);
if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
@@ -367,6 +357,61 @@ static int find_next_usable_block(int st
return -1;
}
+
+int
+ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
+ struct buffer_head *bitmap_bh, int goal,
+ int *errp)
+{
+ int i, fatal = 0;
+
+ *errp = 0;
+
+ if (goal >= 0 && ext3_test_allocatable(goal, bitmap_bh))
+ goto got;
+
+repeat:
+ goal = find_next_usable_block(goal, bitmap_bh,
+ EXT3_BLOCKS_PER_GROUP(sb));
+ if (goal < 0)
+ return -1;
+
+ for (i = 0;
+ i < 7 && goal > 0 && ext3_test_allocatable(goal - 1, bitmap_bh);
+ i++, goal--);
+
+got:
+ /* Make sure we use undo access for the bitmap, because it is
+ * critical that we do the frozen_data COW on bitmap buffers in
+ * all cases even if the buffer is in BJ_Forget state in the
+ * committing transaction. */
+ BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
+ fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+ if (fatal) {
+ *errp = fatal;
+ return -1;
+ }
+
+ if (ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock,
+ goal, bitmap_bh->b_data)) {
+ /* already allocated by concurrent thread -bzzz */
+ goal++;
+ if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
+ return -1;
+ goto repeat;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
+ fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
+ if (fatal) {
+ *errp = fatal;
+ return -1;
+ }
+
+ return goal;
+}
+
+
/*
* ext3_new_block uses a goal block to assist allocation. If the goal is
* free, or there is a free block within 32 blocks of the goal, that block
@@ -387,6 +432,7 @@ ext3_new_block(handle_t *handle, struct
int target_block; /* tmp */
int fatal = 0, err;
int performed_allocation = 0;
+ int free, use_reserve = 0;
struct super_block *sb;
struct ext3_group_desc *gdp;
struct ext3_super_block *es;
@@ -408,16 +454,7 @@ ext3_new_block(handle_t *handle, struct
return 0;
}
- lock_super(sb);
es = EXT3_SB(sb)->s_es;
- if (le32_to_cpu(es->s_free_blocks_count) <=
- le32_to_cpu(es->s_r_blocks_count) &&
- ((EXT3_SB(sb)->s_resuid != current->fsuid) &&
- (EXT3_SB(sb)->s_resgid == 0 ||
- !in_group_p(EXT3_SB(sb)->s_resgid)) &&
- !capable(CAP_SYS_RESOURCE)))
- goto out;
-
ext3_debug("goal=%lu.\n", goal);
/*
@@ -431,40 +468,28 @@ ext3_new_block(handle_t *handle, struct
gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
if (!gdp)
goto io_error;
-
- if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
+
+ free = le16_to_cpu(gdp->bg_free_blocks_count);
+ free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
+ if (free > 0) {
ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
EXT3_BLOCKS_PER_GROUP(sb));
-#ifdef EXT3FS_DEBUG
- if (ret_block)
- goal_attempts++;
-#endif
bitmap_bh = read_block_bitmap(sb, group_no);
if (!bitmap_bh)
- goto io_error;
-
- ext3_debug("goal is at %d:%d.\n", group_no, ret_block);
-
- if (ext3_test_allocatable(ret_block, bitmap_bh)) {
-#ifdef EXT3FS_DEBUG
- goal_hits++;
- ext3_debug("goal bit allocated.\n");
-#endif
- goto got_block;
- }
-
- ret_block = find_next_usable_block(ret_block, bitmap_bh,
- EXT3_BLOCKS_PER_GROUP(sb));
+ goto io_error;
+ ret_block = ext3_try_to_allocate(sb, handle, group_no, bitmap_bh,
+ ret_block, &fatal);
+ if (fatal)
+ goto out;
if (ret_block >= 0)
- goto search_back;
+ goto allocated;
}
-
- ext3_debug("Bit not found in block group %d.\n", group_no);
-
+
/*
* Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
+repeat:
for (bit = 0; bit < EXT3_SB(sb)->s_groups_count; bit++) {
group_no++;
if (group_no >= EXT3_SB(sb)->s_groups_count)
@@ -474,57 +499,47 @@ ext3_new_block(handle_t *handle, struct
*errp = -EIO;
goto out;
}
- if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
- brelse(bitmap_bh);
- bitmap_bh = read_block_bitmap(sb, group_no);
- if (!bitmap_bh)
- goto io_error;
- ret_block = find_next_usable_block(-1, bitmap_bh,
- EXT3_BLOCKS_PER_GROUP(sb));
- if (ret_block >= 0)
- goto search_back;
- }
- }
+ free = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (!use_reserve)
+ free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
+ if (free <= 0)
+ continue;
+ brelse(bitmap_bh);
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh)
+ goto io_error;
+ ret_block = ext3_try_to_allocate(sb, handle, group_no,
+ bitmap_bh, -1, &fatal);
+ if (fatal)
+ goto out;
+ if (ret_block >= 0)
+ goto allocated;
+ }
+
+ if (!use_reserve &&
+ (EXT3_SB(sb)->s_resuid == current->fsuid ||
+ (EXT3_SB(sb)->s_resgid != 0 && in_group_p(EXT3_SB(sb)->s_resgid)) ||
+ capable(CAP_SYS_RESOURCE))) {
+ use_reserve = 1;
+ group_no = 0;
+ goto repeat;
+ }
+
/* No space left on the device */
+ *errp = -ENOSPC;
goto out;
-search_back:
- /*
- * We have succeeded in finding a free byte in the block
- * bitmap. Now search backwards up to 7 bits to find the
- * start of this group of free blocks.
- */
- for ( bit = 0;
- bit < 7 && ret_block > 0 &&
- ext3_test_allocatable(ret_block - 1, bitmap_bh);
- bit++, ret_block--)
- ;
-
-got_block:
+allocated:
ext3_debug("using block group %d(%d)\n",
group_no, gdp->bg_free_blocks_count);
- /* Make sure we use undo access for the bitmap, because it is
- critical that we do the frozen_data COW on bitmap buffers in
- all cases even if the buffer is in BJ_Forget state in the
- committing transaction. */
- BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
- fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
- if (fatal)
- goto out;
-
BUFFER_TRACE(gdp_bh, "get_write_access");
fatal = ext3_journal_get_write_access(handle, gdp_bh);
if (fatal)
goto out;
- BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
- fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
- if (fatal)
- goto out;
-
target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
+ le32_to_cpu(es->s_first_data_block);
@@ -536,11 +551,6 @@ got_block:
"Allocating block in system zone - "
"block = %u", target_block);
- /* The superblock lock should guard against anybody else beating
- * us to this point! */
- J_ASSERT_BH(bitmap_bh, !ext3_test_bit(ret_block, bitmap_bh->b_data));
- BUFFER_TRACE(bitmap_bh, "setting bitmap bit");
- ext3_set_bit(ret_block, bitmap_bh->b_data);
...
read more »