2 * High memory handling common code and variables.
4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
5 * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
8 * Redesigned the x86 32-bit VM architecture to deal with
9 * 64-bit physical space. With current x86 CPUs this
10 * means up to 64 Gigabytes physical RAM.
12 * Rewrote high memory support to move the page cache into
13 * high memory. Implemented permanent (schedulable) kmaps
14 * based on Linus' idea.
16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
20 #include <linux/pagemap.h>
21 #include <linux/highmem.h>
22 #include <linux/swap.h>
23 #include <linux/slab.h>
26 * Virtual_count is not a pure "count".
27 * 0 means that it is not mapped, and has not been mapped
28 * since a TLB flush - it is usable.
29 * 1 means that there are no users, but it has been mapped
30 * since the last TLB flush - so we can't use it.
31 * n means that there are (n-1) current users of it.
33 static int pkmap_count
[LAST_PKMAP
];
34 static unsigned int last_pkmap_nr
;
35 static spinlock_t kmap_lock
= SPIN_LOCK_UNLOCKED
;
37 pte_t
* pkmap_page_table
;
39 static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait
);
41 static void flush_all_zero_pkmaps(void)
47 for (i
= 0; i
< LAST_PKMAP
; i
++) {
51 * zero means we don't have anything to do,
52 * >1 means that it is still in use. Only
53 * a count of 1 means that it is free but
54 * needs to be unmapped
56 if (pkmap_count
[i
] != 1)
59 pte
= ptep_get_and_clear(pkmap_page_table
+i
);
68 static inline unsigned long map_new_virtual(struct page
*page
)
75 /* Find an empty entry */
77 last_pkmap_nr
= (last_pkmap_nr
+ 1) & LAST_PKMAP_MASK
;
79 flush_all_zero_pkmaps();
82 if (!pkmap_count
[last_pkmap_nr
])
83 break; /* Found a usable entry */
88 * Sleep for somebody else to unmap their entries
91 DECLARE_WAITQUEUE(wait
, current
);
93 current
->state
= TASK_UNINTERRUPTIBLE
;
94 add_wait_queue(&pkmap_map_wait
, &wait
);
95 spin_unlock(&kmap_lock
);
97 remove_wait_queue(&pkmap_map_wait
, &wait
);
98 spin_lock(&kmap_lock
);
100 /* Somebody else might have mapped it while we slept */
102 return (unsigned long) page
->virtual;
108 vaddr
= PKMAP_ADDR(last_pkmap_nr
);
109 set_pte(&(pkmap_page_table
[last_pkmap_nr
]), mk_pte(page
, kmap_prot
));
111 pkmap_count
[last_pkmap_nr
] = 1;
112 page
->virtual = (void *) vaddr
;
117 void *kmap_high(struct page
*page
)
122 * For highmem pages, we can't trust "virtual" until
123 * after we have the lock.
125 * We cannot call this from interrupts, as it may block
127 spin_lock(&kmap_lock
);
128 vaddr
= (unsigned long) page
->virtual;
130 vaddr
= map_new_virtual(page
);
131 pkmap_count
[PKMAP_NR(vaddr
)]++;
132 if (pkmap_count
[PKMAP_NR(vaddr
)] < 2)
134 spin_unlock(&kmap_lock
);
135 return (void*) vaddr
;
138 void kunmap_high(struct page
*page
)
143 spin_lock(&kmap_lock
);
144 vaddr
= (unsigned long) page
->virtual;
147 nr
= PKMAP_NR(vaddr
);
150 * A count must never go down to zero
151 * without a TLB flush!
153 switch (--pkmap_count
[nr
]) {
157 wake_up(&pkmap_map_wait
);
159 spin_unlock(&kmap_lock
);
163 * Simple bounce buffer support for highmem pages.
164 * This will be moved to the block layer in 2.5.
167 static inline void copy_from_high_bh (struct buffer_head
*to
,
168 struct buffer_head
*from
)
174 p_from
= from
->b_page
;
177 * Since this can be executed from IRQ context, reentrance
178 * on the same CPU must be avoided:
182 vfrom
= kmap_atomic(p_from
, KM_BOUNCE_WRITE
);
183 memcpy(to
->b_data
, vfrom
+ bh_offset(from
), to
->b_size
);
184 kunmap_atomic(vfrom
, KM_BOUNCE_WRITE
);
185 __restore_flags(flags
);
188 static inline void copy_to_high_bh_irq (struct buffer_head
*to
,
189 struct buffer_head
*from
)
198 vto
= kmap_atomic(p_to
, KM_BOUNCE_READ
);
199 memcpy(vto
+ bh_offset(to
), from
->b_data
, to
->b_size
);
200 kunmap_atomic(vto
, KM_BOUNCE_READ
);
201 __restore_flags(flags
);
204 static inline void bounce_end_io (struct buffer_head
*bh
, int uptodate
)
206 struct buffer_head
*bh_orig
= (struct buffer_head
*)(bh
->b_private
);
208 bh_orig
->b_end_io(bh_orig
, uptodate
);
209 __free_page(bh
->b_page
);
210 kmem_cache_free(bh_cachep
, bh
);
213 static void bounce_end_io_write (struct buffer_head
*bh
, int uptodate
)
215 bounce_end_io(bh
, uptodate
);
218 static void bounce_end_io_read (struct buffer_head
*bh
, int uptodate
)
220 struct buffer_head
*bh_orig
= (struct buffer_head
*)(bh
->b_private
);
223 copy_to_high_bh_irq(bh_orig
, bh
);
224 bounce_end_io(bh
, uptodate
);
227 struct buffer_head
* create_bounce(int rw
, struct buffer_head
* bh_orig
)
230 struct buffer_head
*bh
;
232 if (!PageHighMem(bh_orig
->b_page
))
236 bh
= kmem_cache_alloc(bh_cachep
, SLAB_BUFFER
);
238 wakeup_bdflush(1); /* Sets task->state to TASK_RUNNING */
242 * This is wasteful for 1k buffers, but this is a stopgap measure
243 * and we are being ineffective anyway. This approach simplifies
244 * things immensly. On boxes with more than 4GB RAM this should
245 * not be an issue anyway.
248 page
= alloc_page(GFP_BUFFER
);
250 wakeup_bdflush(1); /* Sets task->state to TASK_RUNNING */
253 set_bh_page(bh
, page
, 0);
256 bh
->b_blocknr
= bh_orig
->b_blocknr
;
257 bh
->b_size
= bh_orig
->b_size
;
259 bh
->b_dev
= bh_orig
->b_dev
;
260 bh
->b_count
= bh_orig
->b_count
;
261 bh
->b_rdev
= bh_orig
->b_rdev
;
262 bh
->b_state
= bh_orig
->b_state
;
263 bh
->b_flushtime
= jiffies
;
264 bh
->b_next_free
= NULL
;
265 bh
->b_prev_free
= NULL
;
266 /* bh->b_this_page */
267 bh
->b_reqnext
= NULL
;
271 bh
->b_end_io
= bounce_end_io_write
;
272 copy_from_high_bh(bh
, bh_orig
);
274 bh
->b_end_io
= bounce_end_io_read
;
275 bh
->b_private
= (void *)bh_orig
;
276 bh
->b_rsector
= bh_orig
->b_rsector
;
277 memset(&bh
->b_wait
, -1, sizeof(bh
->b_wait
));