2.5.32 IO performance issues

2.5.32 IO performance issues

Post by Badari Pulavart » Sat, 31 Aug 2002 03:30:08



Hi,

I am having severe IO performance problems with 2.5.32 (2.5.31 works fine).
I was wondering what caused this.

As you can see, IO rate went from

                384MB/sec with 6% CPU utilization on 2.5.31
                        to
                120MB/sec with 19% CPU utilization on 2.5.32

Any idea ?

Thanks,
Badari

Hardware: 8x 700MHz P-III, 4 Qlogic FC controllers, 40 disks
Test: 40 dds on 40 raw devices (40 disks).

2.5.32 vmstat output:
---------------------

   procs                      memory    swap          io     system         cpu
 r  b  w   swpd   free   buff  cache  si  so    bi    bo   in    cs  us  sy  id
 3 36  0      0 3803972      0  23392   0   0 119962    24 1939  3592   0  18  81
 2 37  0      0 3804144      0  23392   0   0 121024     0 1938  3611   0  19  81
 0 39  0      0 3804776      0  23392   0   0 119834     0 1925  3553   0  19  81
 0 39  0      0 3804172      0  23392   0   0 120128     0 1932  3584   0  19  81
 1 38  0      0 3804680      0  23392   0   0 120346     0 1932  3585   0  19  81
 4 35  0      0 3804536      0  23392   0   0 120806     0 1940  3625   0  18  81
 1 38  1      0 3804212      0  23392   0   0 120512     0 1938  3607   0  19  81
 0 39  0      0 3804592      0  23392   0   0 120282     0 1931  3572   0  19  81
 0 39  0      0 3803944      0  23392   0   0 120755     0 1936  3617   0  19  81

2.5.31 vmstat output:
---------------------
   procs                      memory    swap          io     system         cpu
 r  b  w   swpd   free   buff  cache  si  so    bi    bo   in    cs  us  sy  id
 0 39  0      0 3803732      0  32080   0   0 384141    13 3966  6232   0   5  95
 0 39  0      0 3803700      0  32080   0   0 384230     3 3962  6248   0   6  94
 0 39  0      0 3803696      0  32080   0   0 384256     2 3966  6226   0   5  95
 0 39  0      0 3803696      0  32080   0   0 384179     0 3961  6236   0   5  95
 1 38  0      0 3803684      0  32080   0   0 384230     0 3967  6229   0   5  95
 0 39  0      0 3803680      0  32080   0   0 383782     0 3958  6233   0   6  94
 0 39  0      0 3803680      0  32080   0   0 384154     0 3964  6225   0   5  95
 0 39  0      0 3803676      0  32080   0   0 384077     7 3968  6252   0   5  95

Profile=2 optput for 2.5.32:
---------------------------
# readprofile | sort -nr +2

151712 default_idle                             2370.5000
21622 __scsi_end_request                       122.8523
  4491 do_softirq                                23.3906
  2543 scsi_dispatch_cmd                          4.1826
   306 dio_bio_complete                           1.7386
  1798 __make_request                             1.5608
   756 rmqueue                                    0.9265
   198 scsi_queue_next_request                    0.6513
   100 dio_await_one                              0.6250
    25 __wake_up                                  0.5208
   499 scsi_request_fn                            0.4873
    15 scsi_sense_valid                           0.4688
    14 page_pool_alloc                            0.4375
   167 blk_rq_map_sg                              0.4349
   381 schedule                                   0.4252
   132 __set_page_dirty_buffers                   0.3750
    51 generic_unplug_device                      0.3542
    17 bio_put                                    0.3542
    78 scsi_finish_command                        0.3482
     5 cap_file_permission                        0.3125
   112 mempool_alloc                              0.2917
    23 bio_destructor                             0.2875
   215 get_user_pages                             0.2859
   117 bio_alloc                                  0.2812
    21 fget                                       0.2625
    12 dio_prep_bio                               0.2500
   194 blk_queue_bounce                           0.2497
   165 do_direct_IO                               0.2398
    25 cpu_idle                                   0.2232
     7 _alloc_pages                               0.2188
     2 syscall_exit                               0.1818
    23 kmem_cache_free                            0.1797
    44 __scsi_release_command                     0.1654
    13 dio_get_page                               0.1625
     5 __get_free_pages                           0.1562
     5 fput                                       0.1562
    32 add_timer                                  0.1429
    20 blk_run_queues                             0.1389
    42 kmem_cache_alloc                           0.1382
    23 add_entropy_words                          0.1307
    63 __alloc_pages                              0.1270
     8 scsi_free_sgtable                          0.1250
     8 dio_new_bio                                0.1250
     4 slab_pool_alloc                            0.1250
    25 scsi_init_cmd_errh                         0.1116
    10 mempool_free                               0.1111
186646 total                                      0.1034
     6 __generic_copy_to_user                     0.0938
     3 scsi_pool_free                             0.0938
     3 scsi_pool_alloc                            0.0938
     4 system_call                                0.0909
    27 scsi_decide_disposition                    0.0844
     9 submit_bio                                 0.0804
     9 __run_task_queue                           0.0804
    24 vfs_read                                   0.0789
    15 rw_raw_dev                                 0.0781
     6 bh_action                                  0.0750
     8 scsi_add_timer                             0.0714
    11 generic_file_direct_IO                     0.0683
    57 sd_init_command                            0.0660
     6 find_vma                                   0.0625
     6 blkdev_get_blocks                          0.0625
     5 max_block                                  0.0625
     4 dio_await_completion                       0.0625
     3 blkdev_direct_IO                           0.0625
     2 slab_pool_free                             0.0625
    18 vfs_write                                  0.0592
    10 batch_entropy_process                      0.0568
    15 get_more_blocks                            0.0551
    31 scsi_softirq                               0.0538
     4 dio_bio_submit                             0.0500
    35 timer_bh                                   0.0497
    12 scsi_alloc_sgtable                         0.0469
    20 generic_direct_IO                          0.0431
     4 find_extend_vma                            0.0417
     4 dio_bio_alloc                              0.0417
    20 sd_rw_intr                                 0.0368
     7 tasklet_hi_action                          0.0365
     4 dio_bio_reap                               0.0357
     9 generic_make_request                       0.0352
     2 sys_read                                   0.0312
     1 __wake_up_locked                           0.0312
     1 tqueue_bh                                  0.0312
    30 scsi_io_completion                         0.0280
     4 write_profile                              0.0227
     5 scsi_init_io                               0.0223
     5 dio_refill_pages                           0.0208
     1 in_group_p                                 0.0208
     1 credit_entropy_store                       0.0208
     2 .text.lock.mempool                         0.0196
     1 .text.lock.highmem                         0.0182
     4 mod_timer                                  0.0167
     1 atomic_dec_and_lock                        0.0161
     1 sys_write                                  0.0156
     1 free_pages                                 0.0156
     1 fd_install                                 0.0156
     1 strncpy_from_user                          0.0125
     1 scsi_get_request_dev                       0.0125
     2 __d_lookup                                 0.0089
     1 pfifo_fast_enqueue                         0.0089
     1 session_of_pgrp                            0.0078
     7 proc_pid_status                            0.0071
     2 page_remove_rmap                           0.0069
     2 i8042_interrupt                            0.0066
     1 scsi_eh_completed_normally                 0.0063
     4 number                                     0.0057
     1 skb_release_data                           0.0057
     1 tcp_v4_send_check                          0.0052
     1 locks_remove_flock                         0.0052
     1 eth_type_trans                             0.0052
     1 __find_get_block                           0.0048
     1 page_add_rmap                              0.0045
     1 __kfree_skb                                0.0045
     3 proc_pid_stat                              0.0043
     1 bh_lru_install                             0.0042
     4 vsnprintf                                  0.0038
     1 tcp_v4_checksum_init                       0.0037
     1 ksoftirqd                                  0.0037
     1 tcp_poll                                   0.0030
     1 may_open                                   0.0026
     1 dentry_open                                0.0026
     1 sd_open                                    0.0024
     5 link_path_walk                             0.0023
     2 kstat_read_proc                            0.0021
     1 do_anonymous_page                          0.0021
     1 tty_write                                  0.0015
     1 __free_pages_ok                            0.0013
     1 ext3_find_entry                            0.0012
     2 tulip_interrupt                            0.0010

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

2.5.32 IO performance issues

Post by Rik van Rie » Sat, 31 Aug 2002 03:50:09



> I am having severe IO performance problems with 2.5.32 (2.5.31 works fine).
> I was wondering what caused this.

> As you can see, IO rate went from

>            384MB/sec with 6% CPU utilization on 2.5.31
>                    to
>            120MB/sec with 19% CPU utilization on 2.5.32

> Any idea ?

384 MB/s is suspiciously fast.  What kind of disk subsystem
do you have to achieve that speed ?

Or did 2.5.31 keep the dirty data in memory, instead of
writing it to disk ? ;)

regards,

Rik
--
Bravely reimplemented by the knights who say "NIH".

http://www.surriel.com/             http://distro.conectiva.com/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

2.5.32 IO performance issues

Post by Andrew Morto » Sat, 31 Aug 2002 03:50:12



> Hi,

> I am having severe IO performance problems with 2.5.32 (2.5.31 works fine).
> I was wondering what caused this.

> As you can see, IO rate went from

>                 384MB/sec with 6% CPU utilization on 2.5.31
>                         to
>                 120MB/sec with 19% CPU utilization on 2.5.32

> ...
> 151712 default_idle                             2370.5000
> 21622 __scsi_end_request                       122.8523

block-highmem is bust for scsi. (aic7xxx at least).  Does
http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.32/2.5.32-mm2/broke...
fix it?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
 
 
 

2.5.32 IO performance issues

Post by Dave Hanse » Sat, 31 Aug 2002 04:10:09




>>I am having severe IO performance problems with 2.5.32 (2.5.31 works fine).
>>I was wondering what caused this.

>>As you can see, IO rate went from

>>                384MB/sec with 6% CPU utilization on 2.5.31
>>                        to
>>                120MB/sec with 19% CPU utilization on 2.5.32

>>Any idea ?

> 384 MB/s is suspiciously fast.  What kind of disk subsystem
> do you have to achieve that speed ?

Oh, that's nothing! :)

Quote:> Hardware: 8x 700MHz P-III, 4 Qlogic FC controllers, 40 disks
> Test: 40 dds on 40 raw devices (40 disks).

One tray of 10 10k RPM disks, per controller.  The controllers are also
spread across 2 completely separate 64bit/66Mhz PCI busses (not bridged).

   Bus 10, device   8, function  0:
     SCSI storage controller: QLogic Corp. QLA2200 (#2) (rev 5).
       IRQ 23.
       Master Capable.  Latency=96.  Min Gnt=64.
       I/O at 0xb000 [0xb0ff].
       Non-prefetchable 32 bit memory at 0xfe1ff000 [0xfe1fffff].
   Bus 10, device   9, function  0:
     SCSI storage controller: QLogic Corp. QLA2200 (#3) (rev 5).
       IRQ 24.
       Master Capable.  Latency=96.  Min Gnt=64.
       I/O at 0xb100 [0xb1ff].
       Non-prefetchable 32 bit memory at 0xfe1fe000 [0xfe1fefff].
   Bus 13, device   6, function  0:
     SCSI storage controller: QLogic Corp. QLA2200 (#4) (rev 5).
       IRQ 21.
       Master Capable.  Latency=96.  Min Gnt=64.
       I/O at 0xd000 [0xd0ff].
       Non-prefetchable 32 bit memory at 0xfebff000 [0xfebfffff].
   Bus 13, device   7, function  0:
     SCSI storage controller: QLogic Corp. QLA2200 (#5) (rev 5).
       IRQ 22.
       Master Capable.  Latency=96.  Min Gnt=64.
       I/O at 0xd100 [0xd1ff].
       Non-prefetchable 32 bit memory at 0xfebfe000 [0xfebfefff].

--
Dave Hansen

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

2.5.32 IO performance issues

Post by Badari Pulavart » Sat, 31 Aug 2002 06:00:11



> > Hi,

> > I am having severe IO performance problems with 2.5.32 (2.5.31 works fine).
> > I was wondering what caused this.

> > As you can see, IO rate went from

> >                 384MB/sec with 6% CPU utilization on 2.5.31
> >                         to
> >                 120MB/sec with 19% CPU utilization on 2.5.32

> > ...
> > 151712 default_idle                             2370.5000
> > 21622 __scsi_end_request                       122.8523

> block-highmem is bust for scsi. (aic7xxx at least).  Does
> http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.32/2.5.32-mm2/broke...
> fix it?

Hmm !! This patch fixed it. I remember you gave me this patch for 2.5.31. But 2.5.31
was doing fine without it. But 2.5.32 seem to need it.

   procs                      memory    swap          io     system         cpu
 r  b  w   swpd   free   buff  cache  si  so    bi    bo   in    cs  us  sy  id
 1 38  0      0 3811160      0  23532   0   0 384461     0 3981  6346   0   5  95
 0 39  0      0 3811152      0  23532   0   0 384499     0 3967  6354   0   5  95
 0 39  0      0 3811148      0  23532   0   0 384154     0 3974  6341   0   5  95
 0 39  0      0 3811144      0  23532   0   0 384013     0 3964  6344   0   5  95
 0 39  0      0 3811204      0  23532   0   0 384154     0 3966  6357   0   5  95
 0 39  0      0 3811176      0  23532   0   0 384256     0 3973  6341   0   5  95
 0 39  0      0 3811168      0  23532   0   0 384320     0 3969  6329   0   5  95
 0 39  0      0 3811152      0  23532   0   0 384294     0 3980  6335   0   5  95

Here is the profile output with this patch:

# readprofile  | sort -nr +2
221145 default_idle                             3455.3906
  3163 do_softirq                                16.4740
  1489 __scsi_end_request                         8.4602
  1696 scsi_dispatch_cmd                          2.7895
   371 dio_bio_complete                           2.1080
   509 blk_rq_map_sg                              1.3255
   198 dio_await_one                              1.2375
    57 dio_prep_bio                               1.1875
    56 bio_put                                    1.1667
    71 __generic_copy_to_user                     1.1094
   630 do_direct_IO                               0.9157
    42 __wake_up                                  0.8750
   297 __set_page_dirty_buffers                   0.8438
   622 schedule                                   0.6942
   629 __make_request                             0.5460
   402 get_user_pages                             0.5346
    17 scsi_sense_valid                           0.5312
    42 bio_destructor                             0.5250
   198 bio_alloc                                  0.4760
    32 dio_get_page                               0.4000

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

2.5.32 IO performance issues

Post by Andrew Morto » Sat, 31 Aug 2002 07:40:04




> > > block-highmem is bust for scsi. (aic7xxx at least).  Does
> > > http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.32/2.5.32-mm2/broke...
> > > fix it?

> > Hmm !! This patch fixed it. I remember you gave me this patch for 2.5.31. But 2.5.31
> > was doing fine without it. But 2.5.32 seem to need it.

> The above patch works fine to get back to the previous (pre-2.5.32) state.
> But, it makes no sense to modify the bounce_limit based on the type of
> storage that is attached to an adapter.

I agree.  Hence the name "scsi_hack" ;)

> We want to allow high mem for block devices other than SCSI direct access
> devices (TYPE_DISK), such as CD ROM (SDpnt->type TYPE_ROM), WORM devices
> (TYPE_WORM), and optical disks (TYPE_MOD).

> So it is better to patch scsi_initialize_merge_fn:

> --- 1.16/drivers/scsi/scsi_merge.c      Fri Jul  5 09:43:00 2002
> +++ edited/drivers/scsi/scsi_merge.c    Thu Aug 29 14:30:12 2002

>          * Enable highmem I/O, if appropriate.
>          */
>         bounce_limit = BLK_BOUNCE_HIGH;
> -       if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) {
> +       if (SHpnt->highmem_io) {
>                 if (!PCI_DMA_BUS_IS_PHYS)
>                         /* Platforms with virtual-DMA translation
>                          * hardware have no practical limit.

That will certainly fix it.  But who added the TYPE_DISK check,
and why???
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
 
 
 

2.5.32 IO performance issues

Post by Patrick Mansfiel » Sat, 31 Aug 2002 08:00:10



> > block-highmem is bust for scsi. (aic7xxx at least).  Does
> > http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.32/2.5.32-mm2/broke...
> > fix it?

> Hmm !! This patch fixed it. I remember you gave me this patch for 2.5.31. But 2.5.31
> was doing fine without it. But 2.5.32 seem to need it.

The above patch works fine to get back to the previous (pre-2.5.32) state.
But, it makes no sense to modify the bounce_limit based on the type of
storage that is attached to an adapter.

We want to allow high mem for block devices other than SCSI direct access
devices (TYPE_DISK), such as CD ROM (SDpnt->type TYPE_ROM), WORM devices
(TYPE_WORM), and optical disks (TYPE_MOD).

So it is better to patch scsi_initialize_merge_fn:

--- 1.16/drivers/scsi/scsi_merge.c      Fri Jul  5 09:43:00 2002

         * Enable highmem I/O, if appropriate.
         */
        bounce_limit = BLK_BOUNCE_HIGH;
-       if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) {
+       if (SHpnt->highmem_io) {
                if (!PCI_DMA_BUS_IS_PHYS)
                        /* Platforms with virtual-DMA translation
                         * hardware have no practical limit.

-- Patrick Mansfield
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

2.5.32 IO performance issues

Post by Jens Axbo » Sun, 01 Sep 2002 18:20:08



> > We want to allow high mem for block devices other than SCSI direct access
> > devices (TYPE_DISK), such as CD ROM (SDpnt->type TYPE_ROM), WORM devices
> > (TYPE_WORM), and optical disks (TYPE_MOD).

> > So it is better to patch scsi_initialize_merge_fn:

> > --- 1.16/drivers/scsi/scsi_merge.c      Fri Jul  5 09:43:00 2002
> > +++ edited/drivers/scsi/scsi_merge.c    Thu Aug 29 14:30:12 2002

> >          * Enable highmem I/O, if appropriate.
> >          */
> >         bounce_limit = BLK_BOUNCE_HIGH;
> > -       if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) {
> > +       if (SHpnt->highmem_io) {
> >                 if (!PCI_DMA_BUS_IS_PHYS)
> >                         /* Platforms with virtual-DMA translation
> >                          * hardware have no practical limit.

> That will certainly fix it.  But who added the TYPE_DISK check,
> and why???

I guess that block-highmem has been around long enough, that I can use
the term 'historically' at least in the kernel sense :-)

This extra check was added for IDE because each device type driver
(ide-disk, ide-cd, etc) needed to be updated to not assume virtual
mappings of request data was valid. I only did that for ide-disk, since
this is the only one where bounce buffering really hurt performance
wise. So while ide-cd and ide-tape etc could have been updated, I deemed
it uninteresting and not worthwhile.

Now, this was just carried straight into the scsi counter parts,
conveniently, because of laziness. A quick glance at sr shows that it
too can aviod bouncing easily (no changes needed). st may need some
changes, though. So again, for scsi it was a matter of not impacting
existing code in 2.4 too much.

So TYPE_DISK check can be killed in 2.5 if someone does the work of
checking that it is safe. I'm not so sure it will make eg your SCSI
CD-ROM that much faster :-)

--
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/