dcache scalability patch (2.4.17)

dcache scalability patch (2.4.17)

Post by Maneesh Son » Sat, 13 Jul 2002 23:10:10



Here is the dcache scalability patch (cleaned up) as disscussed in
the previous post to lkml by Dipankar. The patch uses RCU for doing fast
dcache lookup. It also does lazy updates to lru list of dentries to
avoid doing write operations while doing lookup.

Following changes were done in this version

o Removed the d_next_hash hack. Dentries are unhashed using list_del
  instead of list_del_init, and that too using d_drop interface  
o Used d_drop_locked and d_unhashed instead of directly manipulating hash list
  using list macros
o Changed DCACHE_UNLINKED to DCACHE_UNHASHED

Regards,
Maneesh

diff -urN linux-2.4.17-base/fs/autofs4/root.c linux-2.4.17-dc8/fs/autofs4/root.c
--- linux-2.4.17-base/fs/autofs4/root.c Tue Oct 24 10:27:38 2000
+++ linux-2.4.17-dc8/fs/autofs4/root.c  Fri Jul 12 10:59:38 2002
@@ -403,7 +403,7 @@
                spin_unlock(&dcache_lock);
                return -ENOTEMPTY;
        }
-       list_del_init(&dentry->d_hash);
+       d_drop_locked(dentry);
        spin_unlock(&dcache_lock);

        dput(ino->dentry);
diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
--- linux-2.4.17-base/fs/dcache.c       Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/dcache.c        Fri Jul 12 16:18:39 2002
@@ -25,6 +25,7 @@
 #include <linux/module.h>

 #include <asm/uaccess.h>
+#include <linux/rcupdate.h>

 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
@@ -55,14 +56,21 @@
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};

+static void d_callback(void *arg)
+{
+       struct dentry * dentry = (struct dentry *)arg;
+
+       if (dname_external(dentry))
+               kfree((void *) dentry->d_name.name);
+       kmem_cache_free(dentry_cache, dentry);
+}
+
 /* no dcache_lock, please */
 static inline void d_free(struct dentry *dentry)
 {
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);
-       if (dname_external(dentry))
-               kfree(dentry->d_name.name);
-       kmem_cache_free(dentry_cache, dentry);
+       call_rcu(&dentry->d_rcu, d_callback, dentry);
        dentry_stat.nr_dentry--;
 }

@@ -124,9 +132,13 @@
        if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
                return;

-       /* dput on a free dentry? */
-       if (!list_empty(&dentry->d_lru))
-               BUG();
+       spin_lock(&dentry->d_lock);
+        if (atomic_read(&dentry->d_count)) {
+                spin_unlock(&dentry->d_lock);
+                spin_unlock(&dcache_lock);
+                return;
+        }
+
        /*
         * AV: ->d_delete() is _NOT_ allowed to block now.
         */
@@ -135,18 +147,28 @@
                        goto unhash_it;
        }
        /* Unreachable? Get rid of it */
-       if (list_empty(&dentry->d_hash))
+       if (d_unhashed(dentry))
                goto kill_it;
-       list_add(&dentry->d_lru, &dentry_unused);
-       dentry_stat.nr_unused++;
+
+       if (list_empty(&dentry->d_lru)) {
+               dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+               list_add(&dentry->d_lru, &dentry_unused);
+               dentry_stat.nr_unused++;
+       }
+       spin_unlock(&dentry->d_lock);
        spin_unlock(&dcache_lock);
        return;

 unhash_it:
-       list_del_init(&dentry->d_hash);
+       __d_drop(dentry);

 kill_it: {
                struct dentry *parent;
+               spin_unlock(&dentry->d_lock);
+               if (!list_empty(&dentry->d_lru)) {
+                       list_del(&dentry->d_lru);
+                       dentry_stat.nr_unused--;
+               }
                list_del(&dentry->d_child);
                /* drops the lock, at that point nobody can reach this dentry */
                dentry_iput(dentry);
@@ -177,7 +199,7 @@
         * If it's already been dropped, return OK.
         */
        spin_lock(&dcache_lock);
-       if (list_empty(&dentry->d_hash)) {
+       if (d_unhashed(dentry)) {
                spin_unlock(&dcache_lock);
                return 0;
        }
@@ -201,15 +223,18 @@
         * we might still populate it if it was a
         * working directory or similar).
         */
+       spin_lock(&dentry->d_lock);
        if (atomic_read(&dentry->d_count) > 1) {
                if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
+                       spin_unlock(&dentry->d_lock);
                        spin_unlock(&dcache_lock);
                        return -EBUSY;
                }
        }
-
-       list_del_init(&dentry->d_hash);
+       __d_drop(dentry);
+       spin_unlock(&dentry->d_lock);
        spin_unlock(&dcache_lock);
+
        return 0;
 }

@@ -217,11 +242,14 @@

 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
+       spin_lock(&dentry->d_lock);
        atomic_inc(&dentry->d_count);
+       dentry->d_vfs_flags |= DCACHE_REFERENCED;
        if (atomic_read(&dentry->d_count) == 1) {
                dentry_stat.nr_unused--;
                list_del_init(&dentry->d_lru);
        }
+       spin_unlock(&dentry->d_lock);
        return dentry;
 }

@@ -252,8 +280,8 @@
                tmp = next;
                next = tmp->next;
                alias = list_entry(tmp, struct dentry, d_alias);
-               if (!list_empty(&alias->d_hash)) {
-                       __dget_locked(alias);
+               if (!d_unhashed(alias)) {
+                       dget(alias);
                        spin_unlock(&dcache_lock);
                        return alias;
                }
@@ -263,7 +291,7 @@
 }

 /*
- *     Try to kill dentries associated with this inode.
+ *     Try to kill dentries associated with this inode.
  * WARNING: you must own a reference to inode.
  */
 void d_prune_aliases(struct inode *inode)
@@ -274,13 +302,16 @@
        tmp = head;
        while ((tmp = tmp->next) != head) {
                struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+               spin_lock(&dentry->d_lock);
                if (!atomic_read(&dentry->d_count)) {
-                       __dget_locked(dentry);
+                       __dget(dentry);
+                       __d_drop(dentry);
+                       spin_unlock(&dentry->d_lock);
                        spin_unlock(&dcache_lock);
-                       d_drop(dentry);
                        dput(dentry);
                        goto restart;
                }
+               spin_unlock(&dentry->d_lock);
        }
        spin_unlock(&dcache_lock);
 }
@@ -295,7 +326,8 @@
 {
        struct dentry * parent;

-       list_del_init(&dentry->d_hash);
+       __d_drop(dentry);
+       spin_unlock(&dentry->d_lock);
        list_del(&dentry->d_child);
        dentry_iput(dentry);
        parent = dentry->d_parent;
@@ -330,19 +362,20 @@
                if (tmp == &dentry_unused)
                        break;
                list_del_init(tmp);
+               dentry_stat.nr_unused--;
                dentry = list_entry(tmp, struct dentry, d_lru);

+               spin_lock(&dentry->d_lock);
                /* If the dentry was recently referenced, don't free it. */
                if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
                        dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
-                       list_add(&dentry->d_lru, &dentry_unused);
+                       if (!atomic_read(&dentry->d_count)) {
+                               list_add(&dentry->d_lru, &dentry_unused);
+                               dentry_stat.nr_unused++;
+                       }
+                       spin_unlock(&dentry->d_lock);
                        continue;
                }
-               dentry_stat.nr_unused--;
-
-               /* Unused dentry with a count? */
-               if (atomic_read(&dentry->d_count))
-                       BUG();

                prune_one_dentry(dentry);
                if (!--count)
@@ -405,10 +438,13 @@
                dentry = list_entry(tmp, struct dentry, d_lru);
                if (dentry->d_sb != sb)
                        continue;
-               if (atomic_read(&dentry->d_count))
-                       continue;
+               spin_lock(&dentry->d_lock);
                dentry_stat.nr_unused--;
                list_del_init(tmp);
+               if (atomic_read(&dentry->d_count)) {
+                       spin_unlock(&dentry->d_lock);
+                       continue;
+               }
                prune_one_dentry(dentry);
                goto repeat;
        }
@@ -488,11 +524,13 @@
                struct list_head *tmp = next;
                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
+               list_del_init(&dentry->d_lru);
+               spin_lock(&dentry->d_lock);
                if (!atomic_read(&dentry->d_count)) {
-                       list_del(&dentry->d_lru);
                        list_add(&dentry->d_lru, dentry_unused.prev);
                        found++;
                }
+               spin_unlock(&dentry->d_lock);
                /*
                 * Descend a level if the d_subdirs list is non-empty.
                 */
@@ -606,8 +644,9 @@
        str[name->len] = 0;

        atomic_set(&dentry->d_count, 1);
-       dentry->d_vfs_flags = 0;
+       dentry->d_vfs_flags = DCACHE_UNHASHED;
        dentry->d_flags = 0;
+       dentry->d_lock = SPIN_LOCK_UNLOCKED;
        dentry->d_inode = NULL;
        dentry->d_parent = NULL;
        dentry->d_sb = NULL;
@@ -708,8 +747,9 @@
        const unsigned char *str = name->name;
        struct list_head *head = d_hash(parent,hash);
        struct list_head *tmp;
+       struct dentry * found = NULL;

-       spin_lock(&dcache_lock);
+       /* rcu_read_lock(); for pre-emptible kernel */
        tmp = head->next;
        for (;;) {
                struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -729,13 +769,16 @@
                        if (memcmp(dentry->d_name.name, str, len))
                                continue;
                }
-               __dget_locked(dentry);
-               dentry->d_vfs_flags |= DCACHE_REFERENCED;
-               spin_unlock(&dcache_lock);
-               return dentry;
+               spin_lock(&dentry->d_lock);
+               if (!(dentry->d_vfs_flags & DCACHE_UNHASHED)) {
+                       found = __dget(dentry);
+               }
+               spin_unlock(&dentry->d_lock);
+               /* rcu_read_unlock(); for pre-emptible kernel */
+               return found;
        }
-       spin_unlock(&dcache_lock);
-       return NULL;
+       /* rcu_read_unlock(); for pre-emptible kernel */
+       return found;
 }

 /**
@@ -774,7 +817,7 @@
        lhp = base = d_hash(dparent, dentry->d_name.hash);
        while ((lhp = lhp->next) != base) {
                if (dentry == list_entry(lhp, struct dentry, d_hash)) {
-                       __dget_locked(dentry);
+                       dget(dentry);
                        spin_unlock(&dcache_lock);
                        return 1;
                }
@@ -834,9 +877,12 @@
 void d_rehash(struct dentry * entry)
 {
        struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
        spin_lock(&dcache_lock);
+       spin_lock(&entry->d_lock);
+       if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG();
        list_add(&entry->d_hash, list);
+       entry->d_vfs_flags &= ~DCACHE_UNHASHED;
+       spin_unlock(&entry->d_lock);
        spin_unlock(&dcache_lock);
 }

@@ -909,7 +955,7 @@
        list_add(&dentry->d_hash, &target->d_hash);

        /* Unhash the target: dput() will then get rid of it */
-       list_del_init(&target->d_hash);
+       d_drop_locked(target);

        list_del(&dentry->d_child);
        list_del(&target->d_child);
@@ -951,7 +997,7 @@

        *--end = '\0';
        buflen--;
-       if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
+       if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
                buflen -= 10;
                end -= 10;
                memcpy(end, " (deleted)", 10);
@@ -1034,7 +1080,7 @@
        error = -ENOENT;
        /* Has the current directory has been unlinked? */
        spin_lock(&dcache_lock);
-       if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
+       if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
                unsigned long len;
                char * cwd;

diff -urN linux-2.4.17-base/fs/intermezzo/journal.c linux-2.4.17-dc8/fs/intermezzo/journal.c
--- linux-2.4.17-base/fs/intermezzo/journal.c   Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/intermezzo/journal.c    Mon Jul  8 16:18:43 2002
@@ -186,7 +186,7 @@
...

read more »

 
 
 

dcache scalability patch (2.4.17)

Post by Christoph Hellwi » Sat, 13 Jul 2002 23:20:06


> diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
> --- linux-2.4.17-base/fs/dcache.c  Fri Dec 21 23:11:55 2001
> +++ linux-2.4.17-dc8/fs/dcache.c   Fri Jul 12 16:18:39 2002

>  #include <linux/module.h>

>  #include <asm/uaccess.h>
> +#include <linux/rcupdate.h>

Please try to include <linux/*.h> before <asm/*.h> headers.

Quote:> +static void d_callback(void *arg)
> +{
> +  struct dentry * dentry = (struct dentry *)arg;
> +
> +  if (dname_external(dentry))
> +          kfree((void *) dentry->d_name.name);
> +  kmem_cache_free(dentry_cache, dentry);
> +}

why do you cast to void * before calling kfree?

Quote:> -  /* dput on a free dentry? */
> -  if (!list_empty(&dentry->d_lru))
> -          BUG();
> +  spin_lock(&dentry->d_lock);
> +        if (atomic_read(&dentry->d_count)) {
> +                spin_unlock(&dentry->d_lock);
> +                spin_unlock(&dcache_lock);
> +                return;
> +        }
> +

Please use tabs instead of eight spaces in kernel code.

Another implementation details is whether we shouldn't spin on a bit of
->d_vfs_flags instead of increasing struct dentry further.  Maybe the
spin_lock_bit interface that wli prototypes might be a godd choise.

Else the patch looks fine to me, although I'm wondering why you target 2.4.17
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Alexander Vir » Sat, 13 Jul 2002 23:40:06



> Here is the dcache scalability patch (cleaned up) as disscussed in
> the previous post to lkml by Dipankar. The patch uses RCU for doing fast
> dcache lookup. It also does lazy updates to lru list of dentries to
> avoid doing write operations while doing lookup.

Where is
        * version for 2.5.<current>
        * analysis of benefits in real-world situations for 2.5 version?

Patch adds complexity and unless you can show that it gives significant
benefits outside of pathological situations, it's not going in.

Note: measurements on 2.4 do not make sense; reduction of cacheline
bouncing between 2.4 and 2.5 will change the results anyway and
if any of these patches are going to be applied to 2.4, reduction of
cacheline bouncing on ->d_count is going to go in before that one.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Dipankar Sarm » Sun, 14 Jul 2002 01:10:08




> > Here is the dcache scalability patch (cleaned up) as disscussed in
> > the previous post to lkml by Dipankar. The patch uses RCU for doing fast
> > dcache lookup. It also does lazy updates to lru list of dentries to
> > avoid doing write operations while doing lookup.

> Where is
>    * version for 2.5.<current>
>    * analysis of benefits in real-world situations for 2.5 version?

I know that 2.5 patches are available, but Maneesh will probably
respond to this on Monday.

I am working on getting 2.5 measurements done. BTW, would you consider
specweb99 reasonably real-world ? If not, do you have any suggestions
for benchmarks ? I suspect that dbench wouldn't cut it ;-).

Quote:

> Patch adds complexity and unless you can show that it gives significant
> benefits outside of pathological situations, it's not going in.

Fair enough.

Quote:

> Note: measurements on 2.4 do not make sense; reduction of cacheline
> bouncing between 2.4 and 2.5 will change the results anyway and

Quite possible. Our performance measurements have been far
behind and we are catching up now. You may expect 2.5 numbers soon.

Quote:> if any of these patches are going to be applied to 2.4, reduction of
> cacheline bouncing on ->d_count is going to go in before that one.

That is an issue we need to work on. We can do some cache event
profiling to understand the extent of the d_count cacheline bouncing.
At the same time, it seems that the dcache_lock cacheline is also
bouncing around and it is probably more shared than the dentries
for / or /usr. One thing for sure - RCU based lookup of dcache
makes it difficult to optimize on dget()s. We will have to figure
out a way to do this.

Thanks
--

Linux Technology Center, IBM Software Lab, Bangalore, India.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Dipankar Sarm » Sun, 14 Jul 2002 01:20:11



> Else the patch looks fine to me, although I'm wondering why you target 2.4.17

Just to clarify from the project standpoint - we are *not* targeting
2.4.X. 2.4.X is what was used for ongoing performance measurement
work and we had to hop on to that bandwagon. It was just a proof
of concept.

We will publish the 2.5 stuff soon.

Thanks
--

Linux Technology Center, IBM Software Lab, Bangalore, India.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Paul Menag » Sun, 14 Jul 2002 02:40:11


Quote:

>Note: measurements on 2.4 do not make sense; reduction of cacheline
>bouncing between 2.4 and 2.5 will change the results anyway and
>if any of these patches are going to be applied to 2.4, reduction of
>cacheline bouncing on ->d_count is going to go in before that one.

I think there are some other possibilities for cache-bounce removal in
struct dentry. The most obvious one is d_vfs_flags - not only does it
get written to on every d_lookup (to set DCACHE_REFERENCED) but it also
shares a cache line (on Pentium IV) with d_op, d_iname and part of
d_name (along with d_sb and d_fsdata, but these don't seem to be so
crucial).

Some quick stats gathering suggested that DCACHE_REFERENCED is already
set 95%-98% of the time, so this cache bounce is not even doing anything
useful. I submitted this patch a while ago making the DCACHE_REFERENCED
bit setting be conditional on it not being already set, which didn't
generate any interest. One problem with this patch would be the
additional branch prediction misses (on some architectures?) that would
work against the benefits of not dirtying a cache line.

Maybe we should have a function definition something like the following:

static __inline__ void __ensure_bit_set(int nr, volatile unsigned long * addr)
{
#if defined (CONFIG_BIG_SMP) || defined(ARCH_HAVE_PREDICATED_WRITE)
        if(!test_bit(nr, addr))
#endif
                set_bit(nr, addr);

Quote:}      

so that architectures that support conditional writes (arm and ia64?) and
for SMP systems with enough processors that cache-bouncing is an issue,
the test can be performed, and for others where the branch prediction
miss would hurt us more than the cache dirtying it would just do it
unconditionally.

--- linux-2.5.13/fs/dcache.c    Thu May  2 17:22:42 2002

                        if (memcmp(dentry->d_name.name, str, len))
                                continue;
                }
-               dentry->d_vfs_flags |= DCACHE_REFERENCED;
+               if(!(dentry->d_vfs_flags & DCACHE_REFERENCED))
+                       dentry->d_vfs_flags |= DCACHE_REFERENCED;
                return dentry;
        }
        return NULL;

Perhaps another solution is to rearrange struct dentry - put the
volatile stuff in one cache line (or set of lines), and the constant
stuff in another. So probably d_count, d_lru and d_vfs_flags
would be in the volatile line, and most other stuff in the the other.

It would probably make sense to ensure that d_name and d_iname share a
cache line if possible, even on smaller cache-line architectures, and
maybe also d_hash and d_parent on that same line.

Paul

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Dipankar Sarm » Sun, 14 Jul 2002 03:00:13




> > Where is
> >       * version for 2.5.<current>
> >       * analysis of benefits in real-world situations for 2.5 version?

> I know that 2.5 patches are available, but Maneesh will probably
> respond to this on Monday.

> I am working on getting 2.5 measurements done. BTW, would you consider
> specweb99 reasonably real-world ? If not, do you have any suggestions
> for benchmarks ? I suspect that dbench wouldn't cut it ;-).

Hi Al,

Mark Hahn made a good point over private email that real-worldness
also includes hardware. I will dig around and see if we can work out
a setup with different dual/4CPU hardware to do webserver benchmarks
and analyze results for 2.5 patches.

Thanks
--

Linux Technology Center, IBM Software Lab, Bangalore, India.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Hanna Linde » Sun, 14 Jul 2002 03:10:06



Quote:

> Hi Al,

> Mark Hahn made a good point over private email that real-worldness
> also includes hardware. I will dig around and see if we can work out
> a setup with different dual/4CPU hardware to do webserver benchmarks
> and analyze results for 2.5 patches.

Dipankar,

        I just loaded a 2-way PII 400Mhz with 256Meg of ram which
is a lot more real world than most the other systems we test. It
is on the net (behind the ibm firewall of course) so you can use
it if you want.

Hanna

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Hanna Linde » Sun, 14 Jul 2002 08:00:10




>> Here is the dcache scalability patch (cleaned up) as disscussed in
>> the previous post to lkml by Dipankar. The patch uses RCU for doing fast
>> dcache lookup. It also does lazy updates to lru list of dentries to
>> avoid doing write operations while doing lookup.

> Where is
>    * version for 2.5.<current>
>    * analysis of benefits in real-world situations for 2.5 version?

> Patch adds complexity and unless you can show that it gives significant
> benefits outside of pathological situations, it's not going in.

Here are the slides where I presented, among other things, some
performance results of fastwalk compared to using rcu with lazy
updating of the d_lru list. The results are similar to what Dipankar
just published but there are a few more data points.

http://lse.sf.net/locking

Thanks.

Hanna

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Alexander Vir » Sun, 14 Jul 2002 18:00:05



> >Note: measurements on 2.4 do not make sense; reduction of cacheline
> >bouncing between 2.4 and 2.5 will change the results anyway and
> >if any of these patches are going to be applied to 2.4, reduction of
> >cacheline bouncing on ->d_count is going to go in before that one.

> I think there are some other possibilities for cache-bounce removal in
> struct dentry. The most obvious one is d_vfs_flags - not only does it
> get written to on every d_lookup (to set DCACHE_REFERENCED) but it also
> shares a cache line (on Pentium IV) with d_op, d_iname and part of
> d_name (along with d_sb and d_fsdata, but these don't seem to be so
> crucial).

> Some quick stats gathering suggested that DCACHE_REFERENCED is already
> set 95%-98% of the time, so this cache bounce is not even doing anything
> useful. I submitted this patch a while ago making the DCACHE_REFERENCED
> bit setting be conditional on it not being already set, which didn't
> generate any interest. One problem with this patch would be the
> additional branch prediction misses (on some architectures?) that would
> work against the benefits of not dirtying a cache line.

Frankly, I'd rather moved setting DCACHE_REFERENCED to dput().  We don't
care for that bit for dentries with positive ->d_count.

So I'd just do

vi fs/dcache.c -c '/|= DCACHE_R/d|/nr_un/pu|<|x'

and be done with that.  Linus?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Paul Menag » Mon, 15 Jul 2002 02:30:09


Quote:

>Frankly, I'd rather moved setting DCACHE_REFERENCED to dput().  We don't
>care for that bit for dentries with positive ->d_count.

>So I'd just do

>vi fs/dcache.c -c '/|= DCACHE_R/d|/nr_un/pu|<|x'

>and be done with that.  Linus?

Some possibly minor issues with that:

- accessing foo/../bar, won't mark foo as referenced, even though it
might be being referenced frequently. Probably not a common case for foo
to be accessed exclusively in this way, but it could be fixed by marking
a dentry referenced when following ".."

- currently, negative dentries start off unreferenced and get marked
referenced the second and subsequent time that they're used. This would
change to starting off referenced (by the ref count set in lock_nd()
after the ->lookup()) but then not being marked referenced ever again,
as they're always looked at under dcache_lock, and no count is taken on
them. So used-once negative dentries would hang around longer, and
frequently-used negative dentries would be cleaned up sooner.

- referenced bit will be set possibly long after the reference is
actually taken/used, which might make dentry pruning a little less
accurate.

I was considering suggesting moving the reference bit setting to
unlock_nd(), since that's another place where we're already changing
d_count, but that still has the first two problems that I mentioned.
Either way, moving d_vfsflags to the same cacheline as d_count would
probably be a good idea.

Paul

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

dcache scalability patch (2.4.17)

Post by Alexander Vir » Mon, 15 Jul 2002 02:40:06



> - accessing foo/../bar, won't mark foo as referenced, even though it
> might be being referenced frequently. Probably not a common case for foo
> to be accessed exclusively in this way, but it could be fixed by marking
> a dentry referenced when following ".."

It certainly will.  Look - until ->d_count hits zero referenced bit is
not touched or looked at.  At all.

Look at the code.  There is _no_ aging for dentries with positive ->d_count.
They start aging only when after they enter unused_list...

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

1. some 2.4.17 vs. 2.4.17-rmap8 vs. lowmem analysis


Yes, the rmap patch still has a known livelock. I haven't
quite tracked it down yet, but am looking into it whenever
I have the time.

regards,

Rik
--
Shortwave goes a long way:  irc.starchat.net  #swl

http://www.surriel.com/             http://distro.conectiva.com/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

2. Get files through telnet?

3. Cryptoapi on 2.4.17 (2 patches)

4. FreeBSD and WEB Cam?

5. patent on O_ATOMICLOOKUP [Re: [PATCH] loopable tmpfs (2.4.17) ]

6. Mr BIOS and CtrlAlt+/-

7. [PATCH 2.4.17] Your suggestions for fast path walk

8. how to access ~username?

9. AX25 Patches for 2.4.17 and above - have they been included yet

10. AX25 Patches for 2.4.17 and above - have they been included y et

11. Oops with 2.4.17 + NFS patches

12. AX25 Patches for 2.4.17 and above - have they been included y et

13. kernel 2.4.17 with -rmap VM patch ROCKS!!!