mm.h documentation

mm.h documentation

Post by Rik van Rie » Mon, 18 Jun 2001 11:30:08



Hi Linus,

please consider the attached patch for inclusion
in the next -pre kernel, all it does is add and
update the documentation in mm.h

ObDisclaimer: -ac users have been running it for ages

cheers,

Rik
--
Executive summary of a recent Microsoft press release:
   "we are concerned about the GNU General Public License (GPL)"

                http://www.surriel.com/
http://www.conectiva.com/ http://distro.conectiva.com/

--- mm.h.orig   Wed Mar  7 15:36:32 2001
+++ mm.h        Wed Mar  7 19:30:44 2001
@@ -39,32 +39,37 @@
  * library, the executable area etc).
  */
 struct vm_area_struct {
-       struct mm_struct * vm_mm;       /* VM area parameters */
-       unsigned long vm_start;
-       unsigned long vm_end;
+       struct mm_struct * vm_mm;       /* The address space we belong to. */
+       unsigned long vm_start;         /* Our start address within vm_mm. */
+       unsigned long vm_end;           /* Our end address within vm_mm. */

        /* linked list of VM areas per task, sorted by address */
        struct vm_area_struct *vm_next;

-       pgprot_t vm_page_prot;
-       unsigned long vm_flags;
+       pgprot_t vm_page_prot;          /* Access permissions of this VMA. */
+       unsigned long vm_flags;         /* Flags, listed below. */

        /* AVL tree of VM areas per task, sorted by address */
        short vm_avl_height;
        struct vm_area_struct * vm_avl_left;
        struct vm_area_struct * vm_avl_right;

-       /* For areas with an address space and backing store,
+       /*
+        * For areas with an address space and backing store,
         * one of the address_space->i_mmap{,shared} lists,
         * for shm areas, the list of attaches, otherwise unused.
         */
        struct vm_area_struct *vm_next_share;
        struct vm_area_struct **vm_pprev_share;

+       /* Function pointers to deal with this struct. */
        struct vm_operations_struct * vm_ops;
-       unsigned long vm_pgoff;         /* offset in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */
-       struct file * vm_file;
-       unsigned long vm_raend;
+
+       /* Information about our backing store: */
+       unsigned long vm_pgoff;         /* Offset (within vm_file) in PAGE_SIZE
+                                          units, *not* PAGE_CACHE_SIZE */
+       struct file * vm_file;          /* File we map to (can be NULL). */
+       unsigned long vm_raend;         /* XXX: put full readahead info here. */
        void * vm_private_data;         /* was vm_pte (shared mem) */
 };

@@ -90,6 +95,7 @@
 #define VM_LOCKED      0x00002000
 #define VM_IO           0x00004000     /* Memory mapped I/O or similar */

+                                       /* Used by sys_madvise() */
 #define VM_SEQ_READ    0x00008000      /* App will access data sequentially */
 #define VM_RAND_READ   0x00010000      /* App will not benefit from clustered reads */

@@ -124,37 +130,144 @@
 };

 /*
+ * Each physical page in the system has a struct page associated with
+ * it to keep track of whatever it is we are using the page for at the
+ * moment. Note that we have no way to track which tasks are using
+ * a page.
+ *
  * Try to keep the most commonly accessed fields in single cache lines
  * here (16 bytes or greater).  This ordering should be particularly
  * beneficial on 32-bit processors.
  *
  * The first line is data used in page cache lookup, the second line
  * is used for linear searches (eg. clock algorithm scans).
+ *
+ * TODO: make this structure smaller, it could be as small as 32 bytes.
  */
 typedef struct page {
-       struct list_head list;
-       struct address_space *mapping;
-       unsigned long index;
-       struct page *next_hash;
-       atomic_t count;
-       unsigned long flags;    /* atomic flags, some possibly updated asynchronously */
-       struct list_head lru;
-       unsigned long age;
-       wait_queue_head_t wait;
-       struct page **pprev_hash;
-       struct buffer_head * buffers;
-       void *virtual; /* non-NULL if kmapped */
-       struct zone_struct *zone;
+       struct list_head list;          /* ->mapping has some page lists. */
+       struct address_space *mapping;  /* The inode (or ...) we belong to. */
+       unsigned long index;            /* Our offset within mapping. */
+       struct page *next_hash;         /* Next page sharing our hash bucket in
+                                          the pagecache hash table. */
+       atomic_t count;                 /* Usage count, see below. */
+       unsigned long flags;            /* atomic flags, some possibly
+                                          updated asynchronously */
+       struct list_head lru;           /* Pageout list, eg. active_list;
+                                          protected by pagemap_lru_lock !! */
+       unsigned long age;              /* Page aging counter. */
+       wait_queue_head_t wait;         /* Page locked?  Stand in line... */
+       struct page **pprev_hash;       /* Complement to *next_hash. */
+       struct buffer_head * buffers;   /* Buffer maps us to a disk block. */
+       void *virtual;                  /* Kernel virtual address (NULL if
+                                          not kmapped, ie. highmem) */
+       struct zone_struct *zone;       /* Memory zone we are in. */
 } mem_map_t;

+/*
+ * Methods to modify the page usage count.
+ *
+ * What counts for a page usage:
+ * - cache mapping   (page->mapping)
+ * - disk mapping    (page->buffers)
+ * - page mapped in a task's page tables, each mapping
+ *   is counted separately
+ *
+ * Also, many kernel routines increase the page count before a critical
+ * routine so they can be sure the page doesn't go away from under them.
+ */
 #define get_page(p)            atomic_inc(&(p)->count)
 #define put_page(p)            __free_page(p)
 #define put_page_testzero(p)   atomic_dec_and_test(&(p)->count)
 #define page_count(p)          atomic_read(&(p)->count)
 #define set_page_count(p,v)    atomic_set(&(p)->count, v)

-/* Page flag bit values */
-#define PG_locked               0
+/*
+ * Various page->flags bits:
+ *
+ * PG_reserved is set for special pages, which can never be swapped
+ * out. Some of them might not even exist (eg. empty_bad_page)...
+ *
+ * Multiple processes may "see" the same page. E.g. for untouched
+ * mappings of /dev/null, all processes see the same page full of
+ * zeroes, and text pages of executables and shared libraries have
+ * only one copy in memory, at most, normally.
+ *
+ * For the non-reserved pages, page->count denotes a reference count.
+ *   page->count == 0 means the page is free.
+ *   page->count == 1 means the page is used for exactly one purpose
+ *   (e.g. a private data page of one process).
+ *
+ * A page may be used for kmalloc() or anyone else who does a
+ * __get_free_page(). In this case the page->count is at least 1, and
+ * all other fields are unused but should be 0 or NULL. The
+ * management of this page is the responsibility of the one who uses
+ * it.
+ *
+ * The other pages (we may call them "process pages") are completely
+ * managed by the Linux memory manager: I/O, buffers, swapping etc.
+ * The following discussion applies only to them.
+ *
+ * A page may belong to an inode's memory mapping. In this case,
+ * page->mapping is the pointer to the inode, and page->offset is the
+ * file offset of the page (not necessarily a multiple of PAGE_SIZE).
+ *
+ * A page may have buffers allocated to it. In this case,
+ * page->buffers is a circular list of these buffer heads. Else,
+ * page->buffers == NULL.
+ *
+ * For pages belonging to inodes, the page->count is the number of
+ * attaches, plus 1 if buffers are allocated to the page, plus one
+ * for the page cache itself.
+ *
+ * All pages belonging to an inode are in these doubly linked lists:
+ * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
+ * using the page->list list_head. These fields are also used for
+ * freelist managemet (when page->count==0).
+ *
+ * There is also a hash table mapping (inode,offset) to the page
+ * in memory if present. The lists for this hash table use the fields
+ * page->next_hash and page->pprev_hash.
+ *
+ * All process pages can do I/O:
+ * - inode pages may need to be read from disk,
+ * - inode pages which have been modified and are MAP_SHARED may need
+ *   to be written to disk,
+ * - private pages which have been modified may need to be swapped out
+ *   to swap space and (later) to be read back into memory.
+ * During disk I/O, PG_locked is used. This bit is set before I/O
+ * and reset when I/O completes. page->wait is a wait queue of all
+ * tasks waiting for the I/O on this page to complete.
+ * PG_uptodate tells whether the page's contents is valid.
+ * When a read completes, the page becomes uptodate, unless a disk I/O
+ * error happened.
+ *
+ * For choosing which pages to swap out, inode pages carry a
+ * PG_referenced bit, which is set any time the system accesses
+ * that page through the (inode,offset) hash table. This referenced
+ * bit, together with the referenced bit in the page tables, is used
+ * to manipulate page->age and move the page across the active,
+ * inactive_dirty and inactive_clean lists.
+ *
+ * Note that the referenced bit, the page->lru list_head and the
+ * active, inactive_dirty and inactive_clean lists are protected by
+ * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
+ *
+ * PG_skip is used on sparc/sparc64 architectures to "skip" certain
+ * parts of the address space.
+ *
+ * PG_error is set to indicate that an I/O error occurred on this page.
+ *
+ * PG_arch_1 is an architecture specific page state bit.  The generic
+ * code guarentees that this bit is cleared for a page when it first
+ * is entered into the page cache.
+ *
+ * PG_highmem pages are not permanently mapped into the kernel virtual
+ * address space, they need to be kmapped separately for doing IO on
+ * the pages. The struct page (these bits with information) are always
+ * mapped into kernel address space...
+ */
+#define PG_locked               0      /* Page is locked. Don't touch. */
 #define PG_error                1
 #define PG_referenced           2
 #define PG_uptodate             3
@@ -254,81 +367,7 @@
 #define NOPAGE_SIGBUS  (NULL)
 #define NOPAGE_OOM     ((struct page *) (-1))

-
-/*
- * Various page->flags bits:
- *
- * PG_reserved is set for a page which must never be accessed (which
- * may not even be present).
- *
- * PG_DMA has been removed, page->zone now tells exactly wether the
- * page is suited to do DMAing into.
- *
- * Multiple processes may "see" the same page. E.g. for untouched
- * mappings of /dev/null, all processes see the same page full of
- * zeroes, and text pages of executables and shared libraries have
- * only one copy in memory, at most, normally.
- *
- * For the non-reserved ...

read more »

 
 
 

mm.h documentation

Post by Pete Wyckof » Tue, 19 Jun 2001 22:40:08



Quote:>  typedef struct page {
[..]
> +  unsigned long index;            /* Our offset within mapping. */

[..]

Quote:> + * A page may belong to an inode's memory mapping. In this case,
> + * page->mapping is the pointer to the inode, and page->offset is the
> + * file offset of the page (not necessarily a multiple of PAGE_SIZE).

Minor nit.

The field offset was renamed to index some time ago, but I can't
figure out if the usage changed.  Can you fix the comment and educate
us?

                -- Pete
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

mm.h documentation

Post by Ben LaHais » Wed, 20 Jun 2001 00:30:12



> Minor nit.

> The field offset was renamed to index some time ago, but I can't
> figure out if the usage changed.  Can you fix the comment and educate
> us?

Offset was used to indicate the offset in bytes of the page in the object
page cache.  Index is the index of the page, ie in pages, not bytes.

                -ben

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in

More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

 
 
 

1. Need nroff/mm documentation

Please excuse if this is an FAQ -- direct me to it if it is, but I'm
looking for online documentation on the nroff/troff and mm macros.
The man pages don't give all of the macro specifications, and the
SYSV Documenter's Workbench manuals don't belong to me! Is there
any documentation available that I can download on these macros,
so I can have my own copy?

Thanks for any help.

--mdw


UNIX-SQA, Data General Corporation RTP        Office: (919)248-6070
  "You're a jerk, Dent. A complete kneebiter."

2. Newbie Setup: Partition hda1 / floweth over

3. Questions - Re: [PATCH] documentation for mm.h

4. Apache: blocking virtual hosts

5. documentation for mm.h

6. hcon

7. documentation mm.h, mmzone.h and swap.h

8. more details for Suse

9. How to convert Unix date to DD/MM/YYYY HH:MM:SS

10. Changing all date fields from mm/dd/yy to dd/mm/yy

11. Files modified since yy/mm/dd hh:mm

12. error: MM:mm starting apache 1.3.9

13. how to convert given no. into unix dd:mm:yyyy -hh:mm:ss format