direct-to-BIO writeback for writeback-mode ext3

direct-to-BIO writeback for writeback-mode ext3

Post by Andrew Morto » Sun, 02 Jun 2002 17:50:09



Turn on direct-to-BIO writeback for ext3 in data=writeback mode.

=====================================

--- 2.5.19/fs/ext3/inode.c~ext3-writepages      Sat Jun  1 01:18:10 2002

        releasepage:    ext3_releasepage,       /* BKL not held.  Don't need */
 };

+/* For writeback mode, we can use mpage_writepages() */
+
+static int
+ext3_writepages(struct address_space *mapping, int *nr_to_write)
+{
+       int ret;
+       int err;
+
+       ret = write_mapping_buffers(mapping);
+       err = mpage_writepages(mapping, nr_to_write, ext3_get_block);
+       if (!ret)
+               ret = err;
+       return ret;
+}
+
+struct address_space_operations ext3_writeback_aops = {
+       readpage:       ext3_readpage,          /* BKL not held.  Don't need */
+       readpages:      ext3_readpages,         /* BKL not held.  Don't need */
+       writepage:      ext3_writepage,         /* BKL not held.  We take it */
+       writepages:     ext3_writepages,        /* BKL not held.  Don't need */
+       sync_page:      block_sync_page,
+       prepare_write:  ext3_prepare_write,     /* BKL not held.  We take it */
+       commit_write:   ext3_commit_write,      /* BKL not held.  We take it */
+       bmap:           ext3_bmap,              /* BKL held */
+       flushpage:      ext3_flushpage,         /* BKL not held.  Don't need */
+       releasepage:    ext3_releasepage,       /* BKL not held.  Don't need */
+};
+
 /*
  * ext3_block_truncate_page() zeroes out a mapping from file offset `from'

        else if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
+               if (ext3_should_writeback_data(inode))
+                       inode->i_mapping->a_ops = &ext3_writeback_aops;
+               else
+                       inode->i_mapping->a_ops = &ext3_aops;
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = &ext3_dir_inode_operations;

                        inode->i_op = &ext3_fast_symlink_inode_operations;
                else {
                        inode->i_op = &page_symlink_inode_operations;
-                       inode->i_mapping->a_ops = &ext3_aops;
+                       if (ext3_should_writeback_data(inode))
+                               inode->i_mapping->a_ops = &ext3_writeback_aops;
+                       else
+                               inode->i_mapping->a_ops = &ext3_aops;
                }
        } else
                init_special_inode(inode, inode->i_mode,
--- 2.5.19/include/linux/ext3_jbd.h~ext3-writepages     Sat Jun  1 01:18:10 2002

        return (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA);
 }

+static inline int ext3_should_writeback_data(struct inode *inode)
+{
+       return !ext3_should_journal_data(inode) &&
+                       !ext3_should_order_data(inode);
+}

 #endif /* _LINUX_EXT3_JBD_H */
--- 2.5.19/include/linux/ext3_fs.h~ext3-writepages      Sat Jun  1 01:18:10 2002

 /* inode.c */
 extern struct address_space_operations ext3_aops;
+extern struct address_space_operations ext3_writeback_aops;

 /* namei.c */
 extern struct inode_operations ext3_dir_inode_operations;
--- 2.5.19/fs/ext3/namei.c~ext3-writepages      Sat Jun  1 01:18:10 2002

        if (!IS_ERR(inode)) {
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
+               if (ext3_should_writeback_data(inode))
+                       inode->i_mapping->a_ops = &ext3_writeback_aops;
+               else
+                       inode->i_mapping->a_ops = &ext3_aops;
                ext3_mark_inode_dirty(handle, inode);
                err = ext3_add_nondir(handle, dentry, inode);

        if (l > sizeof (EXT3_I(inode)->i_data)) {
                inode->i_op = &page_symlink_inode_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
+               if (ext3_should_writeback_data(inode))
+                       inode->i_mapping->a_ops = &ext3_writeback_aops;
+               else
+                       inode->i_mapping->a_ops = &ext3_aops;
                /*
                 * page_symlink() calls into ext3_prepare/commit_write.
                 * We have a transaction open.  All is sweetness.  It also sets

-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

direct-to-BIO writeback for writeback-mode ext3

Post by Andreas Dilge » Mon, 03 Jun 2002 04:20:09



Quote:> Turn on direct-to-BIO writeback for ext3 in data=writeback mode.

A minor note on this (especially minor since I believe data=journal
doesn't even work in 2.5), but you should probably also change the
address ops in ext3/ioctl.c if you enable/disable per-inode data
journaling.

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

direct-to-BIO writeback for writeback-mode ext3

Post by Andrew Morto » Mon, 03 Jun 2002 05:20:06




> > Turn on direct-to-BIO writeback for ext3 in data=writeback mode.

> A minor note on this (especially minor since I believe data=journal
> doesn't even work in 2.5), but you should probably also change the
> address ops in ext3/ioctl.c if you enable/disable per-inode data
> journaling.

hrm.  Actually, changing journalling mode against a file while
modifications are happening against it is almost certain to explode
if the timing is right.  ISTR that we have seen bug reports against
this on ext3-users.  This is just waaaay too hard to do.

But we can fix it by doing the opposite: create three separate
a_ops instances, one for each journalling mode.  Assign it at
new_inode/read_inode time.

This way, we don't have to do the `ext3_should_journal_data()'
tests all over the place and we just don't care if someone diddles
the journalling mode while the file is otherwise in use.

Another one for my todo list..

-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

direct-to-BIO writeback for writeback-mode ext3

Post by Andreas Dilge » Mon, 03 Jun 2002 06:00:09





> > > Turn on direct-to-BIO writeback for ext3 in data=writeback mode.

> > A minor note on this (especially minor since I believe data=journal
> > doesn't even work in 2.5), but you should probably also change the
> > address ops in ext3/ioctl.c if you enable/disable per-inode data
> > journaling.

> hrm.  Actually, changing journalling mode against a file while
> modifications are happening against it is almost certain to explode
> if the timing is right.  ISTR that we have seen bug reports against
> this on ext3-users.  This is just waaaay too hard to do.

Actually, if you look at the code in ioctl.c for changing the journaling
mode of a file, it basically stops _all_ I/O to the filesystem and waits
for it to complete before changing the journal data flag, so it should
also be possible to change the aops pointer at the same time.  The "stop
all I/O" is one of the reasons why enabling data journaling on files is
only allowed for root/privileged users.

Quote:> But we can fix it by doing the opposite: create three separate
> a_ops instances, one for each journalling mode.  Assign it at
> new_inode/read_inode time.

Sure, as long as this doesn't increase the amount of code duplication.

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/