XV6學習(15)Lab mmap: Mmap
程式碼在Github上。
這一個實驗是要實現最基礎的mmap
功能。mmap即記憶體映射文件,將一個文件直接映射到記憶體當中,之後對文件的讀寫就可以直接通過對記憶體進行讀寫來進行,而對文件的同步則由作業系統來負責完成。使用mmap
可以避免對文件大量read
和write
操作帶來的內核緩衝區和用戶緩衝區之間的頻繁的數據拷貝。在Kafka消息隊列等軟體中藉助mmap
來實現零拷貝(zero-copy)。
首先定義vma
結構體用於保存記憶體映射資訊,並在proc
結構體中加入struct vma *vma
指針:
#define NVMA 16
#define VMA_START (MAXVA / 2)
struct vma{
uint64 start;
uint64 end;
uint64 length; // 0 means vma not used
uint64 off;
int permission;
int flags;
struct file *file;
struct vma *next;
struct spinlock lock;
};
// Per-process state
struct proc {
...
struct vma *vma;
...
};
之後實現對vma
分配的程式碼:
struct vma vma_list[NVMA];
struct vma* vma_alloc(){
for(int i = 0; i < NVMA; i++){
acquire(&vma_list[i].lock);
if(vma_list[i].length == 0){
return &vma_list[i];
}else{
release(&vma_list[i].lock);
}
}
panic("no enough vma");
}
實現mmap
系統調用,這個函數主要就是申請一個vma
,之後查找一塊空閑記憶體,填入相關資訊,將vma
插入到進程的vma
鏈表中去:
uint64
sys_mmap(void)
{
uint64 addr;
int length, prot, flags, fd, offset;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0){
return -1;
}
if(addr != 0)
panic("mmap: addr not 0");
if(offset != 0)
panic("mmap: offset not 0");
struct proc *p = myproc();
struct file* f = p->ofile[fd];
int pte_flag = PTE_U;
if (prot & PROT_WRITE) {
if(!f->writable && !(flags & MAP_PRIVATE)) return -1; // map to a unwritable file with PROT_WRITE
pte_flag |= PTE_W;
}
if (prot & PROT_READ) {
if(!f->readable) return -1; // map to a unreadable file with PROT_READ
pte_flag |= PTE_R;
}
struct vma* v = vma_alloc();
v->permission = pte_flag;
v->length = length;
v->off = offset;
v->file = myproc()->ofile[fd];
v->flags = flags;
filedup(f);
struct vma* pv = p->vma;
if(pv == 0){
v->start = VMA_START;
v->end = v->start + length;
p->vma = v;
}else{
while(pv->next) pv = pv->next;
v->start = PGROUNDUP(pv->end);
v->end = v->start + length;
pv->next = v;
v->next = 0;
}
addr = v->start;
printf("mmap: [%p, %p)\n", addr, v->end);
release(&v->lock);
return addr;
}
接下來就可以在usertrap
中對缺頁中斷進行處理:查找進程的vma
鏈表,判斷該地址是否為映射地址,如果不是就說明出錯,直接返回;如果在vma
鏈表中,就可以申請並映射一個頁面,之後根據vma
從對應的文件中讀取數據:
int
mmap_handler(uint64 va, int scause)
{
struct proc *p = myproc();
struct vma* v = p->vma;
while(v != 0){
if(va >= v->start && va < v->end){
break;
}
//printf("%p\n", v);
v = v->next;
}
if(v == 0) return -1; // not mmap addr
if(scause == 13 && !(v->permission & PTE_R)) return -2; // unreadable vma
if(scause == 15 && !(v->permission & PTE_W)) return -3; // unwritable vma
// load page from file
va = PGROUNDDOWN(va);
char* mem = kalloc();
if (mem == 0) return -4; // kalloc failed
memset(mem, 0, PGSIZE);
if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, v->permission) != 0){
kfree(mem);
return -5; // map page failed
}
struct file *f = v->file;
ilock(f->ip);
readi(f->ip, 0, (uint64)mem, v->off + va - v->start, PGSIZE);
iunlock(f->ip);
return 0;
}
之後就是munmap
的實現,同樣先從鏈表中找到對應的vma
結構體,之後根據三種不同情況(頭部、尾部、整個)來寫回並釋放對應的頁面並更新vma
資訊,如果整個區域都被釋放就將vma
和文件釋放。
uint64
sys_munmap(void)
{
uint64 addr;
int length;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0){
return -1;
}
struct proc *p = myproc();
struct vma *v = p->vma;
struct vma *pre = 0;
while(v != 0){
if(addr >= v->start && addr < v->end) break; // found
pre = v;
v = v->next;
}
if(v == 0) return -1; // not mapped
printf("munmap: %p %d\n", addr, length);
if(addr != v->start && addr + length != v->end) panic("munmap middle of vma");
if(addr == v->start){
writeback(v, addr, length);
uvmunmap(p->pagetable, addr, length / PGSIZE, 1);
if(length == v->length){
// free all
fileclose(v->file);
if(pre == 0){
p->vma = v->next; // head
}else{
pre->next = v->next;
v->next = 0;
}
acquire(&v->lock);
v->length = 0;
release(&v->lock);
}else{
// free head
v->start -= length;
v->off += length;
v->length -= length;
}
}else{
// free tail
v->length -= length;
v->end -= length;
}
return 0;
}
寫回函數先判斷是否需要寫回,當需要寫回時就仿照filewrite
的實現,將數據寫回到對應的文件當中去,這裡的實現是直接寫回所有頁面,但實際可以根據PTE_D
來判斷記憶體是否被寫入,如果沒有寫入就不用寫回:
void
writeback(struct vma* v, uint64 addr, int n)
{
if(!(v->permission & PTE_W) || (v->flags & MAP_PRIVATE)) // no need to writeback
return;
if((addr % PGSIZE) != 0)
panic("unmap: not aligned");
printf("starting writeback: %p %d\n", addr, n);
struct file* f = v->file;
int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE;
int i = 0;
while(i < n){
int n1 = n - i;
if(n1 > max)
n1 = max;
begin_op();
ilock(f->ip);
printf("%p %d %d\n",addr + i, v->off + v->start - addr, n1);
int r = writei(f->ip, 1, addr + i, v->off + v->start - addr + i, n1);
iunlock(f->ip);
end_op();
i += r;
}
}
最後就是在fork
當中複製vma
到子進程,在exit
中當前進程的vma
鏈表釋放,在exit
時要對頁面進行寫回:
int
fork(void)
{
...
np->state = RUNNABLE;
np->vma = 0;
struct vma *pv = p->vma;
struct vma *pre = 0;
while(pv){
struct vma *vma = vma_alloc();
vma->start = pv->start;
vma->end = pv->end;
vma->off = pv->off;
vma->length = pv->length;
vma->permission = pv->permission;
vma->flags = pv->flags;
vma->file = pv->file;
filedup(vma->file);
vma->next = 0;
if(pre == 0){
np->vma = vma;
}else{
pre->next = vma;
}
pre = vma;
release(&vma->lock);
pv = pv->next;
}
...
}
void
exit(int status)
{
struct proc *p = myproc();
if(p == initproc)
panic("init exiting");
// munmap all mmap vma
struct vma* v = p->vma;
struct vma* pv;
while(v){
writeback(v, v->start, v->length);
uvmunmap(p->pagetable, v->start, PGROUNDUP(v->length) / PGSIZE, 1);
fileclose(v->file);
pv = v->next;
acquire(&v->lock);
v->next = 0;
v->length = 0;
release(&v->lock);
v = pv;
}
...
}