AbydOS开发日记 (5) - 基本内存映射

3/18/2024 OSC++MMU

# 基本内存布局

由于大部分的 RV64 SoC 都将 DRAM 放置在 0x80000000 以上,其下的空间保留给 IO,如 QEMU 的 virt,其布局如下:

static const MemMapEntry virt_memmap[] = {
    [VIRT_DEBUG] =        {        0x0,         0x100 },
    [VIRT_MROM] =         {     0x1000,        0xf000 },
    [VIRT_TEST] =         {   0x100000,        0x1000 },
    [VIRT_RTC] =          {   0x101000,        0x1000 },
    [VIRT_CLINT] =        {  0x2000000,       0x10000 },
    [VIRT_ACLINT_SSWI] =  {  0x2F00000,        0x4000 },
    [VIRT_PCIE_PIO] =     {  0x3000000,       0x10000 },
    [VIRT_PLATFORM_BUS] = {  0x4000000,     0x2000000 },
    [VIRT_PLIC] =         {  0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
    [VIRT_APLIC_M] =      {  0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) },
    [VIRT_APLIC_S] =      {  0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) },
    [VIRT_UART0] =        { 0x10000000,         0x100 },
    [VIRT_VIRTIO] =       { 0x10001000,        0x1000 },
    [VIRT_FW_CFG] =       { 0x10100000,          0x18 },
    [VIRT_FLASH] =        { 0x20000000,     0x4000000 },
    [VIRT_IMSIC_M] =      { 0x24000000, VIRT_IMSIC_MAX_SIZE },
    [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
    [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
    [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
    [VIRT_DRAM] =         { 0x80000000,           0x0 }, 
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

由此,我们可以简单地将低 2G 空间直接映射,然后将 (可能不连续的) DRAM 从 0x80000000 开始进行映射,最后把 DRAM 的一小部分重映射到 VMA 的高地址空间。如下表所示:

VMA Start VMA End PMA Start Usage
0x0 0x7FFFFFFF 0x0 Directly mapped IO
0x80000000 - 0x80000000,... Mapped continous kernel heap memory
KernelStackEnd VMA.LowerTop (To be allocated) Kernel stack

# MMU 控制实现

# 基础定义

基于上篇描述的 MMU 控制方法,首先做一点抽象,创建一个基类 MMUBase

class MMUBase
{
  public:
    static constexpr int PROT_NONE = 0, PROT_R = 1, PROT_W = 2, PROT_X = 4, PROT_U = 8, PROT_G = 16;

    /**
     * @brief Set MMU state
     * @note The function will take effort immediately!
     * @param enable true to enable, false to disable
     * @return true if success, false if failed
     */
    virtual bool enable(bool enable) = 0;

    /**
     * @brief switch the ASID to this
     * @note The function will not sfence!
     */
    virtual void switchASID() = 0;

    virtual int map(uintptr_t vaddr, uintptr_t paddr, size_t size, int prot) = 0;
    virtual int unmap(uintptr_t vaddr, size_t size) = 0;

    virtual void apply() = 0;

    virtual size_t getVMALowerTop() = 0;
    virtual size_t getVMAUpperBottom() = 0;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

这个基类是操作 MMU 的接口,其后可以对应 RV32 或 RV64 的实现。目前先开发 RV64 的,又由于其有三种变体,实现一个专用基类 RV64MMUBase

class RV64MMUBase : public MMUBase
{

  public:
    bool enable(bool enable)
    {
        if (enable)
        {
            csr_write(CSR_SATP, *(uint64_t *)(&_satp));
            if (csr_read(CSR_SATP) != *(uint64_t *)(&_satp))
                return false;
            sfence_vma();
            return true;
        }
        else
        {
            csr_write(CSR_SATP, 0);
            sfence_vma();
            return true;
        }
    }

    void switchASID()
    {
        csr_write(CSR_SATP, *(uint64_t *)(&_satp));
        // No need to sfence.vma
    }

  protected:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// 查看省略的代码

    struct vaddr_t
    {
        uint64_t offset : 12;
        uint64_t vpn0 : 9;
        uint64_t vpn1 : 9;
        uint64_t vpn2 : 9;
        uint64_t vpn3 : 9;
        uint64_t vpn4 : 9;

        template <uint8_t sz> uint64_t getVPN(int level)
        {
            switch (level)
            {
            case 0:
                if constexpr (sz == 39)
                    return vpn2;
                else if constexpr (sz == 48)
                    return vpn3;
                else if constexpr (sz == 57)
                    return vpn4;
                else
                    return 0;
                break;
            case 1:
                if constexpr (sz == 39)
                    return vpn1;
                else if constexpr (sz == 48)
                    return vpn2;
                else if constexpr (sz == 57)
                    return vpn3;
                else
                    return 0;
                break;
            case 2:
                if constexpr (sz == 39)
                    return vpn0;
                else if constexpr (sz == 48)
                    return vpn1;
                else if constexpr (sz == 57)
                    return vpn2;
                else
                    return 0;
                break;
            case 3:
                if constexpr (sz == 39)
                    return 0;
                else if constexpr (sz == 48)
                    return vpn0;
                else if constexpr (sz == 57)
                    return vpn1;
                else
                    return 0;
                break;
            case 4:
                if constexpr (sz == 39)
                    return 0;
                else if constexpr (sz == 48)
                    return 0;
                else if constexpr (sz == 57)
                    return vpn0;
                else
                    return 0;
                break;
            default:
                return 0;
                break;
            }
        }
    };

    struct paddr_t
    {
        uint64_t offset : 12;
        uint64_t ppn0 : 9;
        uint64_t ppn1 : 9;
        uint64_t ppn2 : 9;
        uint64_t ppn3 : 9;
        uint64_t ppn4 : 8;
        uint64_t reserved : 8;
    };

    enum MMUMode_t
    {
        BARE = 0,
        // 1-7 reserved for future use
        SV39 = 8,
        SV48 = 9,
        SV57 = 10,
        SV64 = 11, // Not defined in the current RISC-V specs
    };
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
    struct pte_t
    {
        uint64_t v : 1;
        uint64_t r : 1;
        uint64_t w : 1;
        uint64_t x : 1;
        uint64_t u : 1;
        uint64_t g : 1;
        uint64_t a : 1;
        uint64_t d : 1;
        uint64_t rsw : 2;
        uint64_t ppn0 : 9;
        uint64_t ppn1 : 9;
        uint64_t ppn2 : 9;
        uint64_t ppn3 : 9;
        uint64_t ppn4 : 8;
        uint64_t reserved : 10; // externsions off

        // C++ 17 enabled!
        template <uint8_t sz> auto fit()
        {
            this->reserved = 0;
            if constexpr (sz <= 39)
                this->ppn3 = 0;
            if constexpr (sz <= 48)
                this->ppn4 = 0;
            return *this;
        }

        void ppn(uintptr_t addr)
        {
            auto paddr = (paddr_t *)&addr;
            ppn0 = paddr->ppn0;
            ppn1 = paddr->ppn1;
            ppn2 = paddr->ppn2;
            ppn3 = paddr->ppn3;
            ppn4 = paddr->ppn4;
        }

        uintptr_t paddr()
        {
            return (*((uintptr_t *)this) << 2) & ~((0xFFFUL) + (0xFFUL << 56));
        }
    };

    struct satp_t
    {
        uint64_t ppn : 44;
        uint64_t asid : 16;
        uint64_t mode : 4;
    };

    RV64MMUBase(MMUMode_t mode, uint16_t asid)
    {
        _satp.asid = asid;
        _satp.mode = mode;
    }

    bool setPPN(uintptr_t addr)
    {
        if (addr & 4095)
            return false;
        _satp.ppn = addr2page4K(addr);
        return true;
    }

  private: // data
    satp_t _satp;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

这里利用了 C++17 的 if constexpr 特性,对不同的实现上的 PTE 提供实质上不同的函数,而无需特化函数模板。并且,enable()switchASID() 方法在该层实现。

接下来,编写一个模板类,同样利用 if constexpr 避免特化,实现 map 和 unmap 等。定义和一些简单方法如下:

template <uint8_t sz> class RV64MMU : public RV64MMUBase
{
  private:
    static constexpr auto _mmutype()
    {
        if constexpr (sz == 39)
            return MMUMode_t::SV39;
        else if constexpr (sz == 48)
            return MMUMode_t::SV48;
        else if constexpr (sz == 57)
            return MMUMode_t::SV57;
        else if constexpr (sz == 64)
            return MMUMode_t::SV64;
        else
            return MMUMode_t::BARE;
    }

  public:
    RV64MMU(uint16_t asid) : RV64MMUBase(_mmutype(), asid)
    {
        _ptes = alignedMalloc<pte_t>(512 * sizeof(pte_t), 4096);
        setPPN((uintptr_t)_ptes);
    }
    ~RV64MMU()
    {
        alignedFree(_ptes);
    }

    size_t getVMALowerTop() override
    {
        if constexpr (sz == 39)
            return (1ULL << 38);
        else if constexpr (sz == 48)
            return (1ULL << 47);
        else if constexpr (sz == 57)
            return (1ULL << 56);
        else
            return 0;
    }

    size_t getVMAUpperBottom() override
    {
        if constexpr (sz == 39)
            return -1ULL - (1ULL << 38) + 1;
        else if constexpr (sz == 48)
            return -1ULL - (1ULL << 47) + 1;
        else if constexpr (sz == 57)
            return -1ULL - (1ULL << 56) + 1;
        else
            return 0;
    }

    ...
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54

# Map()

接下来实现 map()。首先考虑页表的创建,可以使用虚拟地址 vaddr 和当前页表级数 level 作为参数,递归创建 (注意标记遍历路径上的有效位):

pte_t *_createPTE(int level, uintptr_t vaddr)
    {
        auto poff = ((vaddr_t *)&vaddr)->getVPN<sz>(level);
        // printf("Creating PTE for %lx @ L%i with poff = %li\n", vaddr, level, poff);
        if (level == 0)
            return _ptes + poff;                    // We have already created the root level
        auto parent = _createPTE(level - 1, vaddr); // Create parent PTE first
        // printf("Original Parent PTE has value %lx\n", *(uint64_t *)parent);

        pte_t *thisPTE = nullptr; // This level PTE 's base address
        // printf("Parent PTE.paddr = 0x%lx\n", parent->paddr());
        if (parent->paddr() != 0) // this level already created, get base
        {
            thisPTE = (pte_t *)(parent->paddr());
        }
        else
        {
            thisPTE = alignedMalloc<pte_t>(512 * sizeof(pte_t), 4096);
            parent->ppn((uintptr_t)thisPTE);
            parent->v = 1;
            parent->r = 0;
            parent->w = 0;
            parent->x = 0; // mark as a pointer
            parent->template fit<sz>();
            // printf("Created new PTE at %lx\n", (uintptr_t)thisPTE);
            // printf("Now Parent PTE has value %lx\n", *(uint64_t *)parent);
        }
        return thisPTE + poff;
    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

然后实现一个整块 map 的函数,如下:

template <uint8_t blocksz> int _map(uintptr_t vaddr, uintptr_t paddr, int prot)
    {
        printf("* Mapping %lx to %lx with prot %i\n", vaddr, paddr, prot);

        auto level = _calcLevel<blocksz>();
        if (level < 0)
            return level;

        auto pte = _createPTE(level, vaddr);
        // printf("PTE got: %lx\n", (uintptr_t)pte);

        if (pte->v)
            return K_EALREADY;
        pte->v = 1;
        pte->r = prot & PROT_R ? 1 : 0;
        pte->w = prot & PROT_W ? 1 : 0;
        pte->x = prot & PROT_X ? 1 : 0;
        pte->u = prot & PROT_U ? 1 : 0;
        pte->g = prot & PROT_G ? 1 : 0;

        pte->ppn(paddr);
        pte->template fit<sz>();
        // printf("Now PTE value: %lx\n", *(uintptr_t *)pte);
        return 0;
    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

这里的 _calcLevel() 函数用于计算级数,接受块大小作为模板参数,也是一个 constexpr 函数。

最后实现 map(),先做参数校验,然后从 va 开始,循环从大到小匹配块大小并调用相应的 _map():

    int map(uintptr_t vaddr, uintptr_t paddr, size_t size, int prot) override
    {
        if (vaddr & 0xFFF || paddr & 0xFFF || size & 0xFFF) // Not aligned
            return K_EINVAL;
        if (size == 0)
            return 0;                                     // No need to map with size == 0
        if (vaddr + size < vaddr || paddr + size < paddr) // overflow
            return K_EINVALID_ADDR;

        auto prott = prot & (PROT_R | PROT_W | PROT_X);
        if (prott == 0b000 || prott == 0b010 || prott == 0b110)
            return K_ENOSPC;

        auto rc = 0;
        // Divide the memory into blocks of size 256T,512G, 1G, 2M, and 4K
        for (uintptr_t vcaddr = vaddr, pcaddr = paddr; vcaddr < vaddr + size;)
        {
            if (rc)
                return rc;
            if constexpr (sz >= 57) // Only SV57 and SV64 support 256T
            {
                if ((vcaddr & 0xFFFFFFFFFFFF) == 0) // 256T aligned
                {
                    if (size - (vcaddr - vaddr) >= 1ULL << 48) // There are more than 256T to map
                    {
                        rc = _map<48>(vcaddr, pcaddr, prot);
                        vcaddr += 1ULL << 48;
                        pcaddr += 1ULL << 48;
                        continue;
                    }
                }
            }

            ...
        }
        return rc;
    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

# Unmap()

对于 unmap ,与 map 相似,也是循环匹配块大小并删除 (V=0)。实现上,如果删除的不是一个 4K 块,就释放下级 PTE 簇的内存,也是通过递归实现:

    pte_t *_getPTE(int level, uintptr_t vaddr)
    {
        auto poff = ((vaddr_t *)&vaddr)->getVPN<sz>(level);
        if (level == 0)
            return _ptes + poff;
        auto parent = _getPTE(level - 1, vaddr);
        return (pte_t *)(parent->paddr()) + poff;
    }

    int _removePTE(int level, uintptr_t vaddr)
    {
        auto pte = _getPTE(level, vaddr);
        if (!pte->v)
            return K_EALREADY;
        pte->v = 0;
        pte->ppn(0);
        pte->template fit<sz>();
        if (level != _getMaxLevel())
        { // Next level already unused, free it
            auto pteBase = (pte_t *)(pte->paddr());
            alignedFree(pteBase);
        }
        return (level == 0 ? 0 : _removePTE(level - 1, vaddr));
    }

    template <uint8_t blocksz> int _unmap(uintptr_t vaddr)
    {
        printf("* Unmapping %lx\n", vaddr);
        auto level = _calcLevel<blocksz>();

        if (level < 0)
            return level;

        return _removePTE(level, vaddr);
    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

# 测试

    // Previous enabled MMU with object sysmmu

    auto a = alignedMalloc<long>(4096, 4096);
    printf("Original addr of a: 0x%lx\n", (uintptr_t)a);
    *a = 1145141919810;
    printf("Original value of a: %li\n", *a);
    sysmmu->map((uintptr_t)a | 0xFFFFFFC000000000, (uintptr_t)a, 4096, MMUBase::PROT_W | MMUBase::PROT_R);
    sysmmu->apply();

    printf("Mapped addr of a: 0x%lx\n", (uintptr_t)a | 0xFFFFFFC000000000);
    printf("Mapped value of a: %li\n", *(long *)((uintptr_t)a | 0xFFFFFFC000000000));
    *(long *)((uintptr_t)a | 0xFFFFFFC000000000) = 1919810114514;
    printf("We modified from mapped, now the original value is %li\n", *a);

    printf("Now we unmap it\n");
    sysmmu->unmap((uintptr_t)a | 0xFFFFFFC000000000, 4096);
    sysmmu->apply();
    printf("Original value of a: %li\n", *a);
    printf("Accessing to mapped a (hang!)\n");
    *(long *)((uintptr_t)a | 0xFFFFFFC000000000) = 1;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

编译运行,典型的 log 如下:

Original addr of a: 0x802c4000
Original value of a: 1145141919810
* Mapping ffffffc0802c4000 to 802c4000 with prot 3
Mapped addr of a: 0xffffffc0802c4000
Mapped value of a: 1145141919810
We modified from mapped, now the original value is 1919810114514
Now we unmap it
* Unmapping ffffffc0802c4000
Original value of a: 1919810114514
Accessing to mapped a (hang!)
1
2
3
4
5
6
7
8
9
10

提示

此处代码仍存在一些问题,已经在新提交中修复,敬请参阅。