concurrent inode allocation for ext2 against 2.5.64

concurrent inode allocation for ext2 against 2.5.64

Post by Alex Toma » Sun, 16 Mar 2003 23:20:07



hi!

here is the patch for ext2 concurrent inode allocation. should be applied
on top of previous concurrent-balloc patch. tested on dual p3 for several
hours of stress-test + fsck. hope someone test it on big iron ;)

diff -uNr linux/fs/ext2/ialloc.c edited/fs/ext2/ialloc.c
--- linux/fs/ext2/ialloc.c      Sat Mar 15 23:34:17 2003
+++ edited/fs/ext2/ialloc.c     Sat Mar 15 23:05:19 2003
@@ -63,6 +63,52 @@
        return bh;
 }

+void ext2_reserve_inode (struct super_block * sb, int group, int dir)
+{
+       struct ext2_group_desc * desc;
+       struct buffer_head *bh;
+
+       desc = ext2_get_group_desc(sb, group, &bh);
+       if (!desc) {
+               ext2_error(sb, "ext2_reserve_inode",
+                       "can't get descriptor for group %d", group);
+       return;
+       }
+
+       spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+       desc->bg_free_inodes_count =
+               cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+       if (dir)
+               desc->bg_used_dirs_count =
+                       cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
+       spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+
+       mark_buffer_dirty(bh);
+}
+
+void ext2_release_inode (struct super_block * sb, int group, int dir)
+{
+       struct ext2_group_desc * desc;
+       struct buffer_head *bh;
+
+       desc = ext2_get_group_desc(sb, group, &bh);
+       if (!desc) {
+               ext2_error(sb, "ext2_release_inode",
+                       "can't get descriptor for group %d", group);
+               return;
+       }
+
+       spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+       desc->bg_free_inodes_count =
+               cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
+       if (dir)
+               desc->bg_used_dirs_count =
+                       cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
+       spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+
+       mark_buffer_dirty(bh);
+}
+
 /*
  * NOTE! When we get the inode, we're the only people
  * that have access to it, and as such there are no
@@ -85,10 +131,8 @@
        int is_directory;
        unsigned long ino;
        struct buffer_head *bitmap_bh = NULL;
-       struct buffer_head *bh2;
        unsigned long block_group;
        unsigned long bit;
-       struct ext2_group_desc * desc;
        struct ext2_super_block * es;

        ino = inode->i_ino;
@@ -105,7 +149,6 @@
                DQUOT_DROP(inode);
        }

-       lock_super (sb);
        es = EXT2_SB(sb)->s_es;
        is_directory = S_ISDIR(inode->i_mode);

@@ -126,32 +169,17 @@
                goto error_return;

        /* Ok, now we can actually update the inode bitmaps.. */
-       if (!ext2_clear_bit(bit, bitmap_bh->b_data))
+       if (!ext2_clear_bit_atomic(&EXT2_SB(sb)->s_bgi[block_group].ialloc_lock,
+                               bit, (void *) bitmap_bh->b_data))
                ext2_error (sb, "ext2_free_inode",
                              "bit already cleared for inode %lu", ino);
-       else {
-               desc = ext2_get_group_desc (sb, block_group, &bh2);
-               if (desc) {
-                       desc->bg_free_inodes_count =
-                               cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
-                       if (is_directory) {
-                               desc->bg_used_dirs_count =
-                                       cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
-                               EXT2_SB(sb)->s_dir_count--;
-                       }
-               }
-               mark_buffer_dirty(bh2);
-               es->s_free_inodes_count =
-                       cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
-               mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
-       }
+       else
+               ext2_release_inode(sb, block_group, is_directory);
        mark_buffer_dirty(bitmap_bh);
        if (sb->s_flags & MS_SYNCHRONOUS)
                sync_dirty_buffer(bitmap_bh);
-       sb->s_dirt = 1;
 error_return:
        brelse(bitmap_bh);
-       unlock_super (sb);
 }

 /*
@@ -211,9 +239,8 @@
  */
 static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
-       struct ext2_super_block * es = EXT2_SB(sb)->s_es;
        int ngroups = EXT2_SB(sb)->s_groups_count;
-       int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+       int avefreei = ext2_count_free_inodes(sb) / ngroups;
        struct ext2_group_desc *desc, *best_desc = NULL;
        struct buffer_head *bh, *best_bh = NULL;
        int group, best_group = -1;
@@ -234,11 +261,9 @@
        }
        if (!best_desc)
                return -1;
-       best_desc->bg_free_inodes_count =
-               cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1);
-       best_desc->bg_used_dirs_count =
-               cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1);
-       mark_buffer_dirty(best_bh);
+
+       ext2_reserve_inode(sb, best_group, 1);
+
        return best_group;
 }

@@ -277,11 +302,12 @@
        struct ext2_super_block *es = sbi->s_es;
        int ngroups = sbi->s_groups_count;
        int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
-       int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+       int freei = ext2_count_free_inodes(sb);
+       int avefreei = freei / ngroups;
        int free_blocks = ext2_count_free_blocks(sb);
        int avefreeb = free_blocks / ngroups;
        int blocks_per_dir;
-       int ndirs = sbi->s_dir_count;
+       int ndirs = ext2_count_dirs(sb);
        int max_debt, max_dirs, min_blocks, min_inodes;
        int group = -1, i;
        struct ext2_group_desc *desc;
@@ -364,12 +390,8 @@
        return -1;

 found:
-       desc->bg_free_inodes_count =
-               cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
-       desc->bg_used_dirs_count =
-               cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
-       sbi->s_dir_count++;
-       mark_buffer_dirty(bh);
+       ext2_reserve_inode(sb, group, 1);
+
        return group;
 }

@@ -431,9 +453,8 @@
        return -1;

 found:
-       desc->bg_free_inodes_count =
-               cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
-       mark_buffer_dirty(bh);
+       ext2_reserve_inode(sb, group, 0);
+
        return group;
 }

@@ -456,7 +477,6 @@
                return ERR_PTR(-ENOMEM);

        ei = EXT2_I(inode);
-       lock_super (sb);
        es = EXT2_SB(sb)->s_es;
 repeat:
        if (S_ISDIR(mode)) {
@@ -480,7 +500,12 @@
                                      EXT2_INODES_PER_GROUP(sb));
        if (i >= EXT2_INODES_PER_GROUP(sb))
                goto bad_count;
-       ext2_set_bit(i, bitmap_bh->b_data);
+       if (ext2_set_bit_atomic(&EXT2_SB(sb)->s_bgi[group].ialloc_lock,
+                       i, (void *) bitmap_bh->b_data)) {
+               brelse(bitmap_bh);
+               ext2_release_inode(sb, group, S_ISDIR(mode));
+               goto repeat;
+       }

        mark_buffer_dirty(bitmap_bh);
        if (sb->s_flags & MS_SYNCHRONOUS)
@@ -497,9 +524,7 @@
                goto fail2;
        }

-       es->s_free_inodes_count =
-               cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
-
+       spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
        if (S_ISDIR(mode)) {
                if (EXT2_SB(sb)->s_bgi[group].debts < 255)
                        EXT2_SB(sb)->s_bgi[group].debts++;
@@ -507,9 +532,8 @@
                if (EXT2_SB(sb)->s_bgi[group].debts)
                        EXT2_SB(sb)->s_bgi[group].debts--;
        }
-
-       mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
-       sb->s_dirt = 1;
+       spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+
        inode->i_uid = current->fsuid;
        if (test_opt (sb, GRPID))
                inode->i_gid = dir->i_gid;
@@ -552,7 +576,6 @@
        inode->i_generation = EXT2_SB(sb)->s_next_generation++;
        insert_inode_hash(inode);

-       unlock_super(sb);
        if(DQUOT_ALLOC_INODE(inode)) {
                DQUOT_DROP(inode);
                goto fail3;
@@ -574,15 +597,8 @@
        return ERR_PTR(err);

 fail2:
-       desc = ext2_get_group_desc (sb, group, &bh2);
-       desc->bg_free_inodes_count =
-               cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
-       if (S_ISDIR(mode))
-               desc->bg_used_dirs_count =
-                       cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
-       mark_buffer_dirty(bh2);
+       ext2_release_inode(sb, group, S_ISDIR(mode));
 fail:
-       unlock_super(sb);
        make_bad_inode(inode);
        iput(inode);
        return ERR_PTR(err);
@@ -605,16 +621,19 @@

 unsigned long ext2_count_free_inodes (struct super_block * sb)
 {
+       struct ext2_group_desc *desc;
+       unsigned long desc_count = 0;
+       int i;  
+
 #ifdef EXT2FS_DEBUG
        struct ext2_super_block * es;
-       unsigned long desc_count = 0, bitmap_count = 0;
+       unsigned long bitmap_count = 0;
        struct buffer_head *bitmap_bh = NULL;
        int i;

        lock_super (sb);
        es = EXT2_SB(sb)->s_es;
        for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
-               struct ext2_group_desc *desc;
                unsigned x;

                desc = ext2_get_group_desc (sb, i, NULL);
@@ -637,7 +656,13 @@
        unlock_super(sb);
        return desc_count;
 #else
-       return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count);
+       for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+               desc = ext2_get_group_desc (sb, i, NULL);
+               if (!desc)
+                       continue;
+               desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+       }
+       return desc_count;
 #endif
 }

diff -uNr linux/fs/ext2/super.c edited/fs/ext2/super.c
--- linux/fs/ext2/super.c       Sat Mar 15 23:34:17 2003
+++ edited/fs/ext2/super.c      Sat Mar 15 22:15:51 2003
@@ -510,6 +510,7 @@

        /* restore free blocks counter in SB -bzzz */
        es->s_free_blocks_count = total_free = ext2_count_free_blocks(sb);
+       es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));

        /* distribute reserved blocks over groups -bzzz */
        for(i = sbi->s_groups_count-1; reserved && total_free && i >= 0; i--) {
@@ -802,6 +803,7 @@
                sbi->s_bgi[i].debts = 0;
                sbi->s_bgi[i].reserved = 0;
                spin_lock_init(&sbi->s_bgi[i].balloc_lock);
+               spin_lock_init(&sbi->s_bgi[i].ialloc_lock);
        }
        for (i = 0; i < db_count; i++) {
                block = descriptor_loc(sb, logic_sb_block, i);
@@ -869,6 +871,7 @@
 static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
 {
        es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+       es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
        es->s_wtime = cpu_to_le32(get_seconds());
        mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
        sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
@@ -898,6 +901,7 @@
                        es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
                                                  ~EXT2_VALID_FS);
                        es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+                       es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
                        es->s_mtime = cpu_to_le32(get_seconds());
                        ext2_sync_super(sb, es);
                } else
diff -uNr linux/include/linux/ext2_fs_sb.h edited/include/linux/ext2_fs_sb.h
--- linux/include/linux/ext2_fs_sb.h    Sat Mar 15 23:34:18 2003
+++ edited/include/linux/ext2_fs_sb.h   Sat Mar 15 21:38:35 2003
@@ -19,6 +19,7 @@
 struct ext2_bg_info {
        u8 debts;
        spinlock_t balloc_lock;
+       spinlock_t ialloc_lock;
        unsigned int reserved;
 } ____cacheline_aligned_in_smp;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please ...

read more »

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Sun, 16 Mar 2003 23:20:10



> here is the patch for ext2 concurrent inode allocation. should be applied
> on top of previous concurrent-balloc patch. tested on dual p3 for several
> hours of stress-test + fsck. hope someone test it on big iron ;)

benching now
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by Andrew Morto » Mon, 17 Mar 2003 00:00:14



> hi!

> here is the patch for ext2 concurrent inode allocation. should be applied
> on top of previous concurrent-balloc patch. tested on dual p3 for several
> hours of stress-test + fsck. hope someone test it on big iron ;)

> ...
> +void ext2_reserve_inode (struct super_block * sb, int group, int dir)
> +{

This can have static scope.  And, please, no spaces after the function name,
nor after the `*' thingy.  ext2 is all over the place in this regard and I'm
slowly trying to get it consistent.

I'm not sure that skipping setting s_dirt is desirable.  Sure, we haven't
actually altered the superblock.  But we sort-of "virtually dirtied" it.  The
superblock is now out-of-date and we should sync it.

It could be that not writing the superblock for a week is an OK thing to do.
inode and block allocation counts are something which fsck can trivially fix
up.  But at the cost of a single sector write per five seconds I think it's
best to keep the superblock more up-to-date.

I'll make the same change to the block allocator patches.

Quote:>  struct ext2_bg_info {
>    u8 debts;
>    spinlock_t balloc_lock;
> +  spinlock_t ialloc_lock;
>    unsigned int reserved;
>  } ____cacheline_aligned_in_smp;

hm, I wonder if this should be in a separate cacheline.  We may as well use a
single lock if they're this close together.  Bill, can you test that
sometime?

diff -puN fs/ext2/ialloc.c~ext2-ialloc-no-lock_super-fixes fs/ext2/ialloc.c
--- 25/fs/ext2/ialloc.c~ext2-ialloc-no-lock_super-fixes 2003-03-15 13:36:14.000000000 -0800

        return bh;
 }

-void ext2_reserve_inode (struct super_block * sb, int group, int dir)
+/*
+ * Speculatively reserve an inode in a blockgroup which used to have some
+ * spare ones.  Later, when we come to actually claim the inode in the bitmap
+ * it may be that it was taken.  In that case the allocator will undo this
+ * reservation and try again.
+ *
+ * The inode allocator does not physically alter the superblock.  But we still
+ * set sb->s_dirt, because the superblock was "logically" altered - we need to
+ * go and add up the free inodes counts again and flush out the superblock.
+ */
+static void ext2_reserve_inode(struct super_block *sb, int group, int dir)
 {
        struct ext2_group_desc * desc;

        if (!desc) {
                ext2_error(sb, "ext2_reserve_inode",
                        "can't get descriptor for group %d", group);
-       return;
+               return;
        }


                desc->bg_used_dirs_count =
                        cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
        spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
-
+       sb->s_dirt = 1;
        mark_buffer_dirty(bh);
 }

-void ext2_release_inode (struct super_block * sb, int group, int dir)
+static void ext2_release_inode(struct super_block *sb, int group, int dir)
 {
        struct ext2_group_desc * desc;

                desc->bg_used_dirs_count =
                        cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
        spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
-
+       sb->s_dirt = 1;
        mark_buffer_dirty(bh);
 }

_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 00:10:08


On Sun, Mar 16, 2003 at 12:01:38AM +0300, Alex Tomas wrote:
> here is the patch for ext2 concurrent inode allocation. should be applied
> on top of previous concurrent-balloc patch. tested on dual p3 for several
> hours of stress-test + fsck. hope someone test it on big iron ;)

32x/48GB NUMA-Q

Throughput 257.986 MB/sec 128 procs
dbench 128  95.36s user 4833.06s system 2832% cpu 2:53.97 total

vma      samples  %-age       symbol name
c01dc9ac 4532033  21.4566     .text.lock.dec_and_lock
c0169c0b 3835802  18.1603     .text.lock.dcache
c0106ff4 1741849  8.24666     default_idle
c0264fe0 1506547  7.13264     sync_buffer
c01dc920 1344198  6.36401     atomic_dec_and_lock
c01dc7c0 1059649  5.01683     __copy_to_user_ll
c015142c 468551   2.21832     vfs_read
c01dc828 405337   1.91904     __copy_from_user_ll
c02651b0 305363   1.44572     add_event_entry
c01688dc 305154   1.44473     d_instantiate
c015272a 241552   1.14361     .text.lock.file_table
c0168d3c 231119   1.09422     d_lookup
c0119ddc 219611   1.03973     scheduler_tick
c01686a0 213012   1.00849     d_alloc
c015f66c 199435   0.94421     path_lookup
c0152530 185470   0.878094    file_move
c0152334 184847   0.875145    __fput
c015162c 180988   0.856874    vfs_write
c011fb0c 177776   0.841668    profile_hook
c0167e78 165423   0.783183    prune_dcache
c0264ef8 154338   0.730702    add_sample
c01677b8 151599   0.717734    dput
c0169234 149571   0.708133    d_rehash
c0122960 134989   0.639096    current_kernel_time
c0119890 134442   0.636506    load_balance

procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 1  0      0 49095776   4928  27296    0    0     2     0   33     2  0  0 100  0
 0  0      0 49092768   4960  27552    0    0    80     0 1028    50  0  0 100  0
 7  0      0 49059424   4960  27616    0    0    16     0 1029   221  0  1 99  0
 1  0      0 49055008   4960  27616    0    0     0     0 1026    39  0  0 100  0
133 17      0 48628832  24672 342528    0    0   344     0 1059  6752  4 76 17  4
151 17      0 48356064  49888 580608    0    0   444     0  925   144  8 92  0  0
151 21      0 48115424  73952 783968    0    0   488     0  993   257  9 90  0  1
139 25      0 47834816 101248 1026272    0    0   460     0 1013   378  5 95  0  0
142 22      0 47626176 122208 1206304    0    0   720     0 1019   265  6 94  0  0
87 13      0 47268832 150624 1564096    0    0  1084     0 1150  1809  2 88  1  9
 0  0      0 47327264 150880 1564960    0    0    28     0 1079   374  0  4 96  0
 0  0      0 47331040 150880 1564960    0    0     0     0 1030    69  0  0 100  0
 0  0      0 47329824 150880 1565088    0    0     0     0 1025    21  0  0 100  0
 0  0      0 47329824 150880 1565088    0    0     0     0 1026    20  0  0 100  0
 0  0      0 47329696 150880 1565120    0    0     0     0 1025    22  0  0 100  0
 0  0      0 47330144 150880 1565152    0    0     0     0 1040    58  0  0 100  0
 0  0      0 47330016 150880 1565184    0    0    28     0 1033    40  0  0 100  0
 0  0      0 47329952 150880 1565216    0    0     0     0 1030    33  0  0 100  0
 0  0      0 47329952 150880 1565216    0    0     0     0 1051    93  0  0 100  0
 0  0      0 47330144 150880 1565216    0    0     0     0 1027    22  0  0 100  0
 0  0      0 47330144 150880 1565216    0    0     0     0 1044    62  0  0 100  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 1  0      0 47330144 150880 1565248    0    0     0     0 1039   103  0  0 100  0
 0  0      0 47330144 150880 1565248    0    0     0     0 1034    28  0  0 100  0
 0  0      0 47330400 150912 1565280    0    0     8     0 1043    60  0  0 100  0
 0  0      0 47330400 150912 1565280    0    0     0     0 1025    22  0  0 100  0
 0  0      0 47330400 150912 1565280    0    0     0     0 1032    43  0  0 100  0
 0  0      0 47330400 150912 1565280    0    0     0     0 1029    32  0  0 100  0
 5  0      0 47328544 150912 1565344    0    0     4     0 1032    49  0  0 100  0
 0  0      0 47296224 150912 1565344    0    0     0     0 1026   257  0  1 99  0
12  1      0 47129952 150912 1565408    0    0     4     0 1025  1506  2 21 77  0
41  1      0 46885216 173728 1857312    0    0   136     0 1013  1233  2 42 54  2
51  2      0 46698976 194240 2019328    0    0    72     0  996   118 10 85  6  0
62  3      0 46523360 216576 2169952    0    0   140     0 1123   112 10 89  0  1
59  4      0 46364064 235136 2306176    0    0   392     0 1209   177  9 91  0  0
51  5      0 46216288 252224 2433792    0    0   316     0  983   130  5 94  0  0
60  1      0 46053024 268448 2578720    0    0   276     0 1050   221  2 98  0  0
77  1      0 45922400 281376 2698528    0    0   248     0 1052   229  3 97  0  0
96  5      0 45632288 307648 2961568    0    0   248     0 1047   277  3 97  0  0
114  5      0 45373024 335776 3192096    0    0   176     0 1059   346  2 98  0  0
110  2      0 45211424 355840 3330720    0    0    68     0 1042   241  2 98  0  0
111  7      0 45117216 367744 3410240    0    0    44     0 1036   223  1 99  0  0
127  1      0 44932320 384096 3579360    0    0    52     0 1298   501  2 98  0  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
129  3      0 44775584 401280 3718752    0    0    72     0 1043   305  2 98  0  0
127  1      0 44657376 416512 3818784    0    0    60     0 1044   223  2 98  0  0
127  1      0 44554784 430304 3905664    0    0    56     0 1043   227  2 98  0  0
100 25      0 44485600 440736 3962816    0    0    40     0 1035   243  2 98  0  0
123 19      0 44390144 451712 4045696    0    0    32     0 1034   216  2 98  0  0
124  7      0 44269792 464192 4150816    0    0    32     0 1033   212  2 98  0  0
125  0      0 44189792 473280 4220160    0    0    32     0 1038   220  3 97  0  0
123  1      0 44147296 478816 4255712    0    0    28     0 1031   217  2 98  0  0
126  0      0 44147552 483072 4249472    0    0    12     0 1035   194  2 98  0  0
125  0      0 44157472 485888 4235264    0    0     0     0 1029   188  2 98  0  0
127  0      0 44205536 489184 4182304    0    0     0     0 1018   198  1 99  0  0
121  0      0 44223776 491328 4162272    0    0     0     0 1029   220  1 99  0  0
122  0      0 44231584 493504 4152736    0    0     0     0 1025   321  1 99  0  0
119  0      0 44241952 496544 4137760    0    0     0     0 1031   238  2 98  0  0
121  0      0 44228640 499904 4148128    0    0     0     0 1022   193  2 98  0  0
120  0      0 44213024 502880 4161152    0    0     0     0 1028   195  2 98  0  0
123  0      0 44234400 505280 4135968    0    0     0     0 1026   174  2 98  0  0
116  0      0 44222304 507328 4147360    0    0     0     0 1026   207  1 99  0  0
118  0      0 44215264 509792 4153216    0    0     0     0 1035   236  2 98  0  0
118  0      0 44218336 512960 4146592    0    0     0     0 1023   192  2 98  0  0
120  0      0 44219552 516192 4141984    0    0     8     0 1023   202  2 98  0  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
118  0      0 44234848 518304 4124768    0    0     0     0 1032   202  1 99  0  0
118  0      0 44224480 522400 4133024    0    0     0     0 1023   239  2 98  0  0
116  0      0 44248544 525504 4105376    0    0     0     0 1034   253  2 98  0  0
118  0      0 44226720 529152 4124640    0    0     0     0 1018   241  1 98  0  0
116  1      0 44202912 533920 4143808    0    0     8     0 1028   207  2 98  0  0
115  0      0 44180192 538080 4162208    0    0    12     0 1029   217  2 98  0  0
114  0      0 44169632 541760 4169536    0    0     8     0 1035   211  2 98  0  0
117  0      0 44161696 545312 4174048    0    0     0     0 1020   198  2 98  0  0
117  0      0 44183264 548640 4149696    0    0     0     0 1030   205  2 98  0  0
119  0      0 44173792 551328 4155936    0    0     0     0 1025   192  2 98  0  0
115  0      0 44184736 553344 4143680    0    0     0     0 1026   211  1 99  0  0
115  0      0 44173216 555840 4154048    0    0     0     0 1028   245  2 98  0  0
115  0      0 44158112 558912 4166176    0    0     0     0 1025   208  2 98  0  0
117  0      0 44173216 561888 4146144    0    0     0     0 1036   239  2 98  0  0
113  0      0 44169888 564128 4148992    0    0     0     0 1019   259  1 99  0  0
114  0      0 44162016 566304 4156160    0    0     0     0 1026   232  2 98  0  0
113  0      0 44171936 568864 4142912    0    0     0     0 1035   241  2 98  0  0
114  0      0 44185376 570656 4127296    0    0     4     0 1023   237  2 98  0  0
112  0      0 44209056 572064 4102080    0    0     0     0 1027   234  2 98  0  0
114  0      0 44255392 573504 4055296    0    0     0     0 1025   223  2 98  0  0
115  0      0 44264544 574720 4044608    0    0     0     0 1030   242  1 99  0  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
99  0      0 44282656 575936 4026048    0    0     0     0 1036   258  1 99  0  0
111  0      0 44282656 578144 4025920    0    0     0     0 1024   273  2 98  0  0
111  0      0 44285920 580864 4019616    0    0     0     0 1043   240  2 98  0  0
111  0      0 44269856 583712 4032864    0    0     0     0 1031   209  2 98  0  0
112  0      0 44290080 586112 4009312    0    0     0     0 1027   253  2 98  0  0
111  0      0 44320608 588128 3977472    0    0     0     0 1025   222  1 99  0  0
104  0      0 44324704 590816 3971520    0    0     0     0 1028   253  1 98  0  0
108  0      0 44302048 593184 3994496    0    0     0     0 1027   247  2 98  0  0
109  0      0 44299360 596672 3993216    0    0     0     0
...

read more »

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 00:10:10



>>  struct ext2_bg_info {
>>        u8 debts;
>>        spinlock_t balloc_lock;
>> +      spinlock_t ialloc_lock;
>>        unsigned int reserved;
>>  } ____cacheline_aligned_in_smp;

> hm, I wonder if this should be in a separate cacheline.  We may as well use a
> single lock if they're this close together.  Bill, can you test that
> sometime?

Benching now.

-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 00:30:23



Quote:> c01dc9ac 4532033  21.4566     .text.lock.dec_and_lock
> c0169c0b 3835802  18.1603     .text.lock.dcache
> c0106ff4 1741849  8.24666     default_idle

More detailed wrt. atomic_dec_and_lock():

c01dc920 1344198  6.36401     atomic_dec_and_lock
 c01dc920 567      0.0421813
 c01dc921 503      0.0374201
 c01dc923 8        0.00059515
 c01dc924 204      0.0151763
 c01dc925 199      0.0148044
 c01dc928 211      0.0156971
 c01dc92b 335      0.0249219
 c01dc92d 27983    2.08176
 c01dc930 118      0.00877847
 c01dc932 206      0.0153251
 c01dc934 181      0.0134653
 c01dc936 98       0.00729059
 c01dc93a 317972   23.6551
 c01dc93c 5        0.000371969
 c01dc93e 216      0.0160691
 c01dc942 43       0.00319893
 c01dc949 176692   13.1448
 c01dc962 749      0.055721
 c01dc965 809754   60.2407
 c01dc967 4        0.000297575
 c01dc96a 6005     0.446735
 c01dc971 56       0.00416605
 c01dc97f 2        0.000148788
 c01dc9a1 228      0.0169618
 c01dc9a3 217      0.0161435
 c01dc9a6 9        0.000669544
 c01dc9a7 1328     0.098795
 c01dc9a8 141      0.0104895
 c01dc9ab 164      0.0122006

c01dc920 <atomic_dec_and_lock>:
c01dc920:       55                      push   %ebp
c01dc921:       89 e5                   mov    %esp,%ebp
c01dc923:       56                      push   %esi
c01dc924:       53                      push   %ebx
c01dc925:       8b 75 08                mov    0x8(%ebp),%esi
c01dc928:       8b 5d 0c                mov    0xc(%ebp),%ebx
c01dc92b:       8b 16                   mov    (%esi),%edx
c01dc92d:       8d 4a ff                lea    0xffffffff(%edx),%ecx
c01dc930:       85 c9                   test   %ecx,%ecx
c01dc932:       74 0e                   je     c01dc942 <atomic_dec_and_lock+0x2
2>
c01dc934:       89 d0                   mov    %edx,%eax
c01dc936:       f0 0f b1 0e             lock cmpxchg %ecx,(%esi)
c01dc93a:       89 c1                   mov    %eax,%ecx
c01dc93c:       39 d1                   cmp    %edx,%ecx
c01dc93e:       75 eb                   jne    c01dc92b <atomic_dec_and_lock+0xb
c01dc940:       eb 5f                   jmp    c01dc9a1 <atomic_dec_and_lock+0x8
1>
c01dc942:       81 7b 04 ad 4e ad de    cmpl   $0xdead4ead,0x4(%ebx)
c01dc949:       74 17                   je     c01dc962 <atomic_dec_and_lock+0x4
2>
c01dc94b:       68 42 c9 1d c0          push   $0xc01dc942
c01dc950:       68 4c d1 2e c0          push   $0xc02ed14c
c01dc955:       e8 4e 28 f4 ff          call   c011f1a8 <printk>
c01dc95a:       0f 0b                   ud2a  
c01dc95c:       7b 00                   jnp    c01dc95e <atomic_dec_and_lock+0x3
e>
c01dc95e:       35 d1 2e c0 f0          xor    $0xf0c02ed1,%eax
c01dc963:       fe 0b                   decb   (%ebx)
c01dc965:       78 45                   js     c01dc9ac <.text.lock.dec_and_lock
c01dc967:       f0 ff 0e                lock decl (%esi)
c01dc96a:       0f 94 c0                sete   %al
c01dc96d:       84 c0                   test   %al,%al
c01dc96f:       74 07                   je     c01dc978 <atomic_dec_and_lock+0x5
8>
c01dc971:       b8 01 00 00 00          mov    $0x1,%eax
c01dc976:       eb 2b                   jmp    c01dc9a3 <atomic_dec_and_lock+0x8
3>
c01dc978:       81 7b 04 ad 4e ad de    cmpl   $0xdead4ead,0x4(%ebx)
c01dc97f:       74 0f                   je     c01dc990 <atomic_dec_and_lock+0x7
0>
c01dc981:       0f 0b                   ud2a  
c01dc983:       4a                      dec    %edx
c01dc984:       00 35 d1 2e c0 8d       add    %dh,0x8dc02ed1
c01dc98a:       b4 26                   mov    $0x26,%ah
c01dc98c:       00 00                   add    %al,(%eax)
c01dc98e:       00 00                   add    %al,(%eax)
c01dc990:       8a 03                   mov    (%ebx),%al
c01dc992:       84 c0                   test   %al,%al
c01dc994:       7e 08                   jle    c01dc99e <atomic_dec_and_lock+0x7
e>
c01dc996:       0f 0b                   ud2a  
c01dc998:       4c                      dec    %esp
c01dc999:       00 35 d1 2e c0 c6       add    %dh,0xc6c02ed1
c01dc99f:       03 01                   add    (%ecx),%eax
c01dc9a1:       31 c0                   xor    %eax,%eax
c01dc9a3:       8d 65 f8                lea    0xfffffff8(%ebp),%esp
c01dc9a6:       5b                      pop    %ebx
c01dc9a7:       5e                      pop    %esi
c01dc9a8:       89 ec                   mov    %ebp,%esp
c01dc9aa:       5d                      pop    %ebp
c01dc9ab:       c3                      ret    

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by Andrew Morto » Mon, 17 Mar 2003 00:40:17




> > here is the patch for ext2 concurrent inode allocation. should be applied
> > on top of previous concurrent-balloc patch. tested on dual p3 for several
> > hours of stress-test + fsck. hope someone test it on big iron ;)

> 32x/48GB NUMA-Q

> Throughput 257.986 MB/sec 128 procs
> dbench 128  95.36s user 4833.06s system 2832% cpu 2:53.97 total

> vma      samples  %-age       symbol name
> c01dc9ac 4532033  21.4566     .text.lock.dec_and_lock
> c0169c0b 3835802  18.1603     .text.lock.dcache
> c0106ff4 1741849  8.24666     default_idle

Looks like it's gone nuts when 128 processes all try to close lots of
files at the same time.

One possible reason for this leaping out is that all the instances are now
achieving more uniform runtimes.   You can tell that by comparing the dbench
dots.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 01:10:04



>> 32x/48GB NUMA-Q
>> Throughput 257.986 MB/sec 128 procs
>> dbench 128  95.36s user 4833.06s system 2832% cpu 2:53.97 total
>> vma      samples  %-age       symbol name
>> c01dc9ac 4532033  21.4566     .text.lock.dec_and_lock
>> c0169c0b 3835802  18.1603     .text.lock.dcache
>> c0106ff4 1741849  8.24666     default_idle

> Looks like it's gone nuts when 128 processes all try to close lots of
> files at the same time.
> One possible reason for this leaping out is that all the instances are now
> achieving more uniform runtimes.   You can tell that by comparing the dbench
> dots.

For some reason this version of dbench doesn't produce dots. I logged
what it did produce, though. It looks something like this:

8  103.63 MB/sec^M 128      4008  105.52 MB/sec^M 128      4397  108.04 MB/sec^M 128  
    4811  109.90 MB/sec^M 128      5243  111.89 MB/sec^M 128      5637  114.19 MB/sec
 128      6039  117.42 MB/sec^M 128      6421  120.99 MB/sec^M 128      6779  124.12 M
B/sec^M 128      7120  127.06 MB/sec^M 128      7467  128.75 MB/sec^M 128      7799  1
30.19 MB/sec^M 128      8146  131.55 MB/sec^M 128      8551  132.97 MB/sec^M 128      
8975  134.09 MB/sec^M 128      9374  135.67 MB/sec^M 128      9737  137.73 MB/sec^M 12
8     10123  140.34 MB/sec^M 128     10503  142.81 MB/sec^M 128     10847  145.13 MB/s
ec^M 128     11161  146.17 MB/sec^M 128     11511  147.09 MB/sec^M 128     11857  147.
92 MB/sec^M 128     12293  149.22 MB/sec^M 128     12711  149.91 MB/sec^M 128     1309
6  151.01 MB/sec^M 128     13470  152.52 MB/sec^M 128     13808  154.25 MB/sec^M 128  
   14176  156.10 MB/sec^M 128     14517  157.65 MB/sec^M 128     14842  158.75 MB/sec
 128     15200  159.51 MB/sec^M 128     15558  159.99 MB/sec^M 128     15947  160.84 M
B/sec^M 128     16372  161.64 MB/sec^M 128     16805  162.56 MB/sec^M 128     17175  1
63.49 MB/sec^M 128     17523  164.99 MB/sec^M 128     17884  166.28 MB/sec^M 128     1
8237  167.82 MB/sec^M 128     18575  168.78 MB/sec^M 128     18919  169.10 MB/sec^M 12
8     19246  169.26 MB/sec^M 128     19600  169.73 MB/sec^M 128     19983  170.34 MB/s
ec^M 128     20398  170.91 MB/sec^M 128     20782  171.59 MB/sec^M 128     21126  172.
44 MB/sec^M 128     21456  173.34 MB/sec^M 128     21792  174.53 MB/sec^M 128     2213
8  175.44 MB/sec^M 128     22499  176.01 MB/sec^M 128     22821  176.11 MB/sec^M 128  

... and dos2unix just annihilated the log from the last run ...

-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 01:10:16



>> Looks like it's gone nuts when 128 processes all try to close lots of
>> files at the same time.
>> One possible reason for this leaping out is that all the instances are now
>> achieving more uniform runtimes.   You can tell that by comparing the dbench
>> dots.


Quote:> For some reason this version of dbench doesn't produce dots. I logged
> what it did produce, though. It looks something like this:

There's a problem with the old dbench:

Throughput 82.0899 MB/sec (NB=102.612 MB/sec  820.899 MBit/sec)
$ (time ./dbench/dbench 128) |& tee -a ~/dbench.output.15
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++128 clients started
********************************************************************************************************************************
Throughput 1841.38 MB/sec (NB=2301.72 MB/sec  18413.8 MBit/sec)
./dbench/dbench 128  73.31s user 6.75s system 1802% cpu 4.440 total

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 01:30:19



Quote:> There's a problem with the old dbench:

oh, this is hilarious!!!

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++128 clients started
********************************************************************************************************************************
Throughput 1841.38 MB/sec (NB=2301.72 MB/sec  18413.8 MBit/sec)
./dbench/dbench 128  73.31s user 6.75s system 1802% cpu 4.440 total
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++512 clients started
********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
Throughput 5860.34 MB/sec (NB=7325.42 MB/sec  58603.4 MBit/sec)
./dbench/dbench 512  293.72s user 14.36s system 2306% cpu 13.360 total
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++1024 clients started
****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
Throughput 5872.46 MB/sec (NB=7340.57 MB/sec  58724.6 MBit/sec)
./dbench/dbench 1024  587.43s user 35.85s system 2424% cpu 25.711 total
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++2048 clients started
********************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
Throughput 5167.57 MB/sec (NB=6459.46 MB/sec  51675.7 MBit/sec)
./dbench/dbench 2048  1176.73s user 101.47s system 2525% cpu 50.619 total
...

read more »

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 01:50:13


On Sat, Mar 15, 2003 at 02:02:41PM -0800, William Lee Irwin III wrote:

> 32x/48GB NUMA-Q
> Throughput 257.986 MB/sec 128 procs
> dbench 128  95.36s user 4833.06s system 2832% cpu 2:53.97 total

re-ran baseline ext2, 128 procs, properly tuned this time:

$ (time dbench 128) |& tee -a ~/dbench.output.16
zsh: correct '~/dbench.output.16' to '~/dbench.output.15' [nyae]? n
128 clients started
   0     62477  107.03 MB/sec
Throughput 107.029 MB/sec 128 procs
dbench 128  119.44s user 2839.98s system 698% cpu 7:03.78 total

vma      samples  %-age       symbol name
c0106ff4 26762866 40.8866     default_idle
c01dc2d0 7167801  10.9505     __copy_to_user_ll
c0108150 4402233  6.72544     .text.lock.semaphore
c0264af0 2656195  4.05796     sync_buffer
c01dc338 2512097  3.83782     __copy_from_user_ll
c0119088 2172605  3.31916     try_to_wake_up
c011a1ec 1437722  2.19646     schedule
c0138fb8 1418731  2.16744     do_page_cache_readahead
c0107d1c 1268960  1.93863     __down
c011c52f 1264234  1.93141     .text.lock.sched
c011fb0c 902784   1.37921     profile_hook
c0119ddc 890528   1.36049     scheduler_tick
c0264cc0 791480   1.20917     add_event_entry
c0119890 769438   1.1755      load_balance
c013ef74 606094   0.925951    check_highmem_ptes
c010f6e8 438562   0.670006    timer_interrupt
c01135c0 393512   0.601182    mark_offset_tsc
c0152d40 352086   0.537894    __find_get_block_slow
c0168d3c 322928   0.493348    d_lookup
c0264a08 318410   0.486446    add_sample
c011a73c 305988   0.467468    __wake_up_common
c01333a8 305702   0.467032    find_get_page
c0122960 276553   0.4225      current_kernel_time
c01860d4 256093   0.391242    ext2_new_block
c0153ee4 248623   0.37983     __find_get_block

procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 1  0      0 49095136   5024  27232    0    0     2     0   34     2  0  0 100  0
 0  0      0 49091872   5056  27488    0    0    76     0 1036    50  0  0 100  0
 0  0      0 49091872   5056  27488    0    0     0     0 1025     6  0  0 100  0
 0  0      0 49091872   5056  27488    0    0     0     0 1027     6  0  0 100  0
 0  0      0 49091872   5056  27488    0    0     0     0 1025     6  0  0 100  0
 0  0      0 49091872   5056  27488    0    0     0     0 1024     6  0  0 100  0
 0  0      0 49091872   5056  27488    0    0     0     0 1025     9  0  0 100  0
 0  0      0 49052256   5056  27552    0    0    16     0 1029   227  0  1 99  0
 1  1      0 49045344   5184  28032    0    0   440     0 1049   198  0  1 98  0
18  0      0 48853856  10432 119008    0    0  1252     0 1132 13658  2 53 39  5
10  0      0 48742944  14752 234944    0    0    52     0 1042 15298  0 25 75  0
14  0      0 48605088  19584 357952    0    0   104     0 1044 14654  1 25 75  0
16  0      0 48464672  25248 485088    0    0    32     0 1034 14526  0 24 75  0
13  0      0 48336480  31520 604576    0    0    36     0 1034 14563  0 24 76  0
16  0      0 48218720  38816 713056    0    0    44     0 1036 15012  0 24 76  0
16  0      0 48102176  47488 819488    0    0    40     0 1038 15476  0 23 76  0
13  0      0 47996832  56512 914304    0    0    84     0 1041 15528  0 23 77  0
14  0      0 47894944  66176 1005120    0    0    52     0 1039 15668  0 23 77  0
16  0      0 47788064  77472 1098496    0    0    52     0 1039 15592  0 23 77  0
17  0      0 47689952  87520 1184992    0    0    52     0 1040 15604  0 23 77  0
14  0      0 47589408  97792 1273504    0    0    56     0 1040 15722  0 22 77  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
19  0      0 47490656 107840 1360608    0    0    84     0 1044 15449  0 23 76  0
17  0      0 47390560 117728 1448896    0    0    52     0 1039 15471  0 22 77  0
15  0      0 47291232 128032 1536000    0    0    52     0 1041 15535  0 23 77  0
20  0      0 47191840 138336 1623520    0    0    84     0 1039 15617  0 23 77  0
15  0      0 47097504 148256 1706720    0    0    56     0 1041 15142  0 22 77  0
17  0      0 46992992 158880 1798400    0    0    48     0 1039 15524  0 23 77  0
18  0      0 46894176 168832 1885888    0    0    52     0 1040 15428  0 23 77  0
16  0      0 46793888 177824 1975200    0    0    44     0 1038 15375  0 23 77  0
20  0      0 46686112 187360 2071424    0    0    76     0 1038 15385  0 24 76  0
17  0      0 46585696 194816 2162080    0    0    40     0 1036 15212  0 23 77  0
16  0      0 46487904 201408 2249632    0    0    40     0 1040 15178  1 24 75  0
20  0      0 46386976 207232 2340864    0    0    44     0 1037 15033  1 24 76  0
20  0      0 46299360 211264 2420864    0    0    36     0 1034 14621  1 24 75  0
17  0      0 46226208 215232 2485024    0    0    68     0 1037 14667  1 23 76  0
18  0      0 46166112 218944 2536608    0    0    32     0 1032 14402  1 24 75  0
19  0      0 46115040 223008 2578848    0    0    28     0 1034 14493  1 24 75  0
19  0      0 46073056 227616 2613248    0    0    28     0 1033 14744  1 24 75  0
26  0      0 46035616 232544 2643776    0    0    28     0 1035 14793  1 25 74  0
13  0      0 45989472 237824 2683424    0    0    28     0 1034 14835  1 24 75  0
18  0      0 45945824 242912 2720192    0    0    52     0 1032 15222  1 23 76  0
18  0      0 45903456 247776 2754752    0    0    52     0 1034 14905  1 24 75  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
11  1      0 45889248 250624 2766816    0    0   328     0 1108  4651  0 14 83  2
16  0      0 45866528 254144 2785216    0    0   192     0 1076  9108  1 18 80  1
22  0      0 45838368 258176 2808160    0    0   140     0 1063 10561  1 19 79  1
11  1      0 45819424 262144 2822272    0    0   176     0 1067 10427  1 19 79  1
15  0      0 45806688 265664 2830528    0    0   136     0 1056 12192  1 20 79  1
14  0      0 45798432 269248 2834464    0    0     4     0 1027 15099  1 23 76  0
14  0      0 45801696 272224 2827328    0    0     4     0 1027 15070  1 24 75  0
15  0      0 45814624 275200 2810592    0    0     0     0 1026 15164  1 23 75  0
17  0      0 45835744 276960 2787840    0    0     4     0 1027 15178  1 24 75  0
17  0      0 45866272 278112 2755264    0    0     8     0 1028 15182  1 23 76  0
16  0      0 45914272 279392 2705504    0    0     4     0 1031 15705  1 23 76  0
18  0      0 45952864 280544 2666208    0    0     0     0 1028 15809  1 23 76  0
16  0      0 45973664 282528 2644256    0    0    32     0 1026 14703  1 23 76  0
17  0      0 45973664 284672 2642784    0    0     0     0 1027 15353  1 23 76  0
17  0      0 45960928 286656 2653952    0    0     0     0 1027 15140  1 23 76  0
18  0      0 45941280 289344 2670784    0    0     0     0 1025 15291  1 23 76  0
16  0      0 45922080 292544 2686784    0    0     0     0 1027 15222  1 23 76  0
20  0      0 45893792 295808 2712320    0    0    32     0 1028 15287  1 23 76  0
17  0      0 45860448 299456 2741632    0    0     0     0 1026 15279  1 22 77  0
14  0      0 45831392 303168 2766176    0    0    20     0 1032 14554  1 22 77  0
 7  1      0 45797920 306592 2795904    0    0    72     0 1044 13144  1 18 80  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
10  0      0 45773984 309312 2816512    0    0    52     0 1033 14444  1 19 80  0
 9  0      0 45761952 311936 2824768    0    0    32     0 1028 15371  1 20 79  0
 9  0      0 45764128 313760 2820032    0    0     0     0 1027 15656  1 20 79  0
10  0      0 45777376 316224 2803968    0    0     0     0 1027 15641  1 20 79  0
10  0      0 45784352 318112 2795360    0    0     0     0 1028 15397  1 20 79  0
16  0      0 45792096 319456 2786784    0    0     0     0 1026 15494  1 20 78  0
11  0      0 45812768 320704 2763040    0    0     0     0 1025 15468  1 20 79  0
17  0      0 45840800 321728 2734336    0    0     0     0 1026 15582  1 19 80  0
16  0      0 45866400 322720 2707776    0    0     0     0 1028 15609  1 19 80  0
12  0      0 45889312 323744 2683648    0    0     0     0 1029 15356  1 20 79  0
12  0      0 45902944 324992 2669664    0    0    32     0 1031 15858  1 19 80  0
15  0      0 45902112 326560 2669344    0    0     0     0 1027 15677  1 19 80  0
11  0      0 45892896 328640 2677216    0    0     4     0 1027 15445  1 20 79  0
12  0      0 45871072 332448 2695328    0    0     0     0 1028 15546  1 20 80  0
14  0      0 45837920 336928 2724352    0    0     0     0 1028 15501  1 20 79  0
16  0      0 45804320 340480 2754816    0    0     4     0 1029 15249  1 21 78  0
12  0      0 45764128 344704 2790528    0    0    32     0 1027 15469  1 20 79  0
11  0      0 45728928 348736 2821152    0    0    16     0 1031 14738  1 19 80  0
14  0      0 45707040 351904 2839584    0    0    68     0 1030 15199  1 19 80  0
11  0      0 45691040 354432 2852640    0    0     4     0 1028 15327  1 19 80  0
15  0      0 45682592 356032 2858272    0    0     4     0 1031 15790  1 20 79  0
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
10  0      0 45687584 357952 2850880    0    0     0     0 1037 15894  1 19 80  0
16  0      0 45701152 359392 2835616    0    0     0     0 1031 15942  1 19 80  0
11  0      0 45721120 360704 2813984    0    0     0     0 1026 15509  1 20 79  0
14  0      0 45738528 361824 2795680    0    0     0     0 1027 15805  1 20 79  0
10  0      0 45764704 362720 2768192    0    0     0     0 1025 15973  1 19 80  0
11  0      0 45794336 363296 2737568    0    0     0     0 1027 15875  1 19 80  0
10  0      0 45828512 364160 2702656    0    0     0     0 1027 15848  1 18 81  0
16  0      0 45842976 365312 2687872    0    0     4
...

read more »

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by Martin J. Blig » Mon, 17 Mar 2003 10:30:15


Quote:> here is the patch for ext2 concurrent inode allocation. should be applied
> on top of previous concurrent-balloc patch. tested on dual p3 for several
> hours of stress-test + fsck. hope someone test it on big iron ;)

OK ... now you're scaring me. This puppy is accellerating too fast ;-)
I'm sure 196% on SDET is against some rule or other ... ;-)
(16x NUMA-Q off a single disk on node 0)

              2.5.64-mjb3  baseline
         2.5.64-mjb3-ext2  with concurrent balloc
    2.5.64-mjb3-ext2_plus  with concurrent balloc + ialloc

dbench32:
        2.5.64-mjb3
Throughput 187.637 MB/sec (NB=234.546 MB/sec  1876.37 MBit/sec)  32 procs
        2.5.64-mjb3-ext2
Throughput 378.664 MB/sec (NB=473.33 MB/sec  3786.64 MBit/sec)  32 procs
        2.5.64-mjb3-ext2-plus
Throughput 514.092 MB/sec (NB=642.615 MB/sec  5140.92 MBit/sec)  32 procs

DISCLAIMER: SPEC(tm) and the benchmark name SDET(tm) are registered
trademarks of the Standard Performance Evaluation Corporation. This
benchmarking was performed for research purposes only, and the run results
are non-compliant and not-comparable with any published results.

Results are shown as percentages of the first set displayed

SDET 1  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         1.8%
         2.5.64-mjb3-ext2       102.0%         1.1%
    2.5.64-mjb3-ext2_plus       105.4%         0.7%

SDET 2  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         3.7%
         2.5.64-mjb3-ext2       106.1%         3.1%
    2.5.64-mjb3-ext2_plus       105.3%         3.3%

SDET 4  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         1.5%
         2.5.64-mjb3-ext2       101.1%         2.1%
    2.5.64-mjb3-ext2_plus       103.7%         1.9%

SDET 8  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         0.2%
         2.5.64-mjb3-ext2       113.3%         0.7%
    2.5.64-mjb3-ext2_plus       118.8%         0.2%

SDET 16  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         1.1%
         2.5.64-mjb3-ext2       167.1%         0.8%
    2.5.64-mjb3-ext2_plus       187.7%         0.6%

SDET 32  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         0.9%
         2.5.64-mjb3-ext2       170.7%         0.1%
    2.5.64-mjb3-ext2_plus       196.3%         0.2%

SDET 64  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         0.7%
         2.5.64-mjb3-ext2       157.2%         0.5%
    2.5.64-mjb3-ext2_plus       177.4%         0.4%

SDET 128  (see disclaimer)
                           Throughput    Std. Dev
              2.5.64-mjb3       100.0%         0.3%
         2.5.64-mjb3-ext2       151.3%         0.8%
    2.5.64-mjb3-ext2_plus       161.3%         0.1%

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

concurrent inode allocation for ext2 against 2.5.64

Post by William Lee Irwin II » Mon, 17 Mar 2003 11:00:08



>> hm, I wonder if this should be in a separate cacheline.  We may as well use a
>> single lock if they're this close together.  Bill, can you test that
>> sometime?


Quote:> Benching now.

Sorry, this should have hit the list earlier.

Throughput 294.388 MB/sec 128 procs
dbench 128  87.22s user 4286.79s system 2984% cpu 2:26.58 total

(the "before" picture was ca. 257MB/s)

vmstat and oprofile info vanished, not sure why. A rerun is possible.

-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

1. [PATCH] concurrent block allocation for ext2 against 2.5.64


Sadly, we are constantly diverging the ext2/ext3 codebases.  Lots of
features are going into ext3, but lots of fixes/improvements are only
going into ext2.  Is ext3 holding BKL for doing journal_start() still?

Looking at ext3_prepare_write() we grab the BKL for doing journal_start()
and for journal_stop(), but I don't _think_ we need BKL for journal_stop()
do we?  We may or may not need it for the journal_data case, but that is
not even working right now I think.

It also seems we are getting BKL in ext3_truncate(), which likely isn't
needed past journal_start(), although we do need to have superblock-only
lock for ext3_orphan_add/del.

Cheers, Andreas
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

2. Job responsibility between UNIX System Admin and Operator???

3. File descriptors

4. [2.5.64] kexec for 2.5.64 available

5. RealPlayer help

6. 2.5.64-mm4 looking good (Re: Oops in 2.5.64bk3)

7. ATAPI CDROM (mitsumi FX-400_2+vibra16)

8. Oops: 2.5.64 check_obj_poison for 'size-64'

9. concurrent block/inode allocation for EXT3

10. Complete support PC-9800 for 2.5.64-ac4 (11/11) SCSI

11. 2.5.64: nfs module unknown symbols

12. (0/5) Brlock removal (2.5.64)