回顾
用变量实现寄存器和内存
#include <stdint.h>
uint32_t R[32], PC; // according to the RISC-V manual
uint8_t M[64]; // 64-Byte memory
为什么不使用int32_t和int8_t?
C语言标准规定, 有符号数溢出是undefined behavior, 但无符号数不会溢出
6.5 Expressions
5 If an exceptional condition occurs during the evaluation of an expression (that is,
if the result is not mathematically defined or not in the range of representable
values for its type), the behavior is undefined.
6.2.5 Types
9 A computation involving unsigned operands can never overflow, because a result that
cannot be represented by the resulting unsigned integer type is reduced modulo the
number that is one greater than the largest value that can be represented by the
resulting type.
用语句实现指令的语义
指令周期(instruction cycle): 执行一条指令的步骤
取指(fetch): 从PC所指示的内存位置读取一条指令
译码(decode): 按手册解析指令的操作码(opcode)和操作数(operand)
执行(execute): 按解析出的操作码, 对操作数进行处理
更新PC: 让PC指向下一条指令
通过模拟每一个运行的过程,并不断的更新状态,执行语句,直到结束
简化演示代码
#include <stdbool.h>
bool halt = false;
while (!halt) {
inst_cycle();
}
具体实现示例代码
RTFM后得知:
31 20 19 15 14 12 11 7 6 0
+---------------+-----+-----+-----+---------+
| imm[11:0] | rs1 | 000 | rd | 0010011 | ADDI
+---------------+-----+-----+-----+---------+
+---------------+-----+-----+-----+---------+
| 000000000001 |00000| 000 |00000| 1110011 | EBREAK
+---------------+-----+-----+-----+---------+
//实线过程
void inst_cycle() {
uint32_t inst = *(uint32_t *)&M[PC];
if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0) { // addi指令
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] +
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (inst == 0x00100073) { // ebreak指令
if (R[10] == 0) { putchar(R[11] & 0xff); }
else if (R[10] == 1) { halt = true; }//使循环结束
else { printf("Unsupported ebreak command\n"); }
} else { printf("Unsupported instuction\n"); }
PC += 4;//每条指令有四个字节
}
初始状态
根据手册, 初始状态如下:
R[0] = 0
, 0号寄存器恒为0
PC = 0
, 与自制运行时环境共同约定
M
中存放程序, 与自制运行时环境共同约定, 由模拟器加载程序
示例代码
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
uint32_t R[32], PC;
//编译出来的指令 内容已经存在内存里面了
uint8_t M[64] = {
0x13, 0x05, 0x00, 0x00, 0x93, 0x05, 0x10, 0x04, 0x73, 0x00, 0x10, 0x00,
0x13, 0x05, 0x10, 0x00, 0x93, 0x05, 0x00, 0x00, 0x73, 0x00, 0x10, 0x00,
0x6f, 0x00, 0x00, 0x00,
};
bool halt = false;
void inst_cycle() {
uint32_t inst = *(uint32_t *)&M[PC];
if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0) { // addi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] +
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (inst == 0x00100073) { // ebreak
if (R[10] == 0) { putchar(R[11] & 0xff); }
else if (R[10] == 1) { halt = true; }
else { printf("Unsupported ebreak command\n"); }
} else { printf("Unsupported instuction\n"); }
PC += 4;
}
int main() {
PC = 0; R[0] = 0; // can be omitted since uninitialized global variables are initialized with 0
while (!halt) { inst_cycle(); }
return 0;
}
从文件读入程序代码
// ...
uint8_t M[1024];
int main(int argc, char *argv[]) {
PC = 0; R[0] = 0;
FILE *fp = fopen(argv[1], "r");
fread(M, 1, 1024, fp);
fclose(fp);
while (!halt) { inst_cycle(); }
return 0;
}
//linux命令
# 将可执行文件prog中的指令序列抽取到prog.bin
riscv64-linux-gnu-objcopy -j .text -O binary prog prog.bin
gcc -o yemu yemu.c && ./yemu prog.bin
防御性编程
不相信外界的输入/其他函数传递的参数, 通过断言提前拦截非预期情况
示例代码
#include <assert.h>
// ...
int main(int argc, char *argv[]) {
PC = 0; R[0] = 0;
assert(argc >= 2); // 要求至少包含一个参数
FILE *fp = fopen(argv[1], "r");
assert(fp != NULL); // 要求argv[1]是一个可以成功打开的文件
int ret = fseek(fp, 0, SEEK_END);
assert(ret != -1); // 要求fseek()成功
long fsize = ftell(fp);
assert(fsize != -1); // 要求ftell()成功
rewind(fp);
assert(fsize < 1024); // 要求程序大小不超过1024字节
ret = fread(M, 1, 1024, fp);
assert(ret == fsize); // 要求完全读出程序的内容
fclose(fp);
while (!halt) { inst_cycle(); }
return 0;
}
编写可复用的代码
拒绝Copy-Paste
Copy-Paste = 编写相似代码时, 复制旧代码并稍作修改
上述代码不言自明本身就不怎么样, 不言自证就更难了
需要看很久的代码, 基本上都很难做到不言自证
当你粘贴出上百行这样的代码, 你很可能会改漏几处
哪天你发现了一个共性的问题(例如立即数忘记符号扩展), 所有粘贴的代码都要修改
改漏了 = bug
粘贴一时爽, 调试火葬场
编写可复用的代码
通过变量, 函数, 宏等方式消除重复/相似的代码
代码修改前
if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0) { // addi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] +
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0x4) { // xori
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] ^
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0x6) { // ori
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] |
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0x4) { // andi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] &
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (...) { ... }
uint32_t inst = *(uint32_t *)&M[PC];
uint32_t opcode = inst & 0x7f;
uint32_t funct3 = (inst >> 12) & 0x7;
uint32_t rd = (inst >> 7 ) & 0x1f;
uint32_t rs1 = (inst >> 15) & 0x1f;
uint32_t imm = ((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0);
if (opcode == 0x13) {
if (funct3 == 0x0) { R[rd] = R[rs1] + imm; } // addi
else if (funct3 == 0x4) { R[rd] = R[rs1] ^ imm; } // xori
else if (funct3 == 0x6) { R[rd] = R[rs1] | imm; } // ori
else if (funct3 == 0x7) { R[rd] = R[rs1] & imm; } // andi
else { panic("Unsupported funct3 = %d", funct3); }
R[0] = 0; // 若指令写入了R[0], 此处将其重置为0
} else if (...) { ... }
PC += 4;
引入中间变量, 不言自明 ✅
对齐的代码更容易阅读并发现错误, 不言自证 ✅
进一步优化代码
typedef union {
struct {
uint32_t opcode : 7;
uint32_t rd : 5;
uint32_t funct3 : 3;
uint32_t rs1 : 5;
int32_t imm11_0 : 12;
} I;
struct { /* ... */ } R;
uint32_t bytes;
} inst_t;
inst_t *inst = (inst_t *)&M[PC];
uint32_t rd = inst->I.rd;
uint32_t rs1 = inst->I.rs1;
uint32_t imm = (int32_t)inst->I.imm11_0;
if (inst->I.opcode == 0b0010011) {
switch (inst->I.funct3) {
case 0b000: R[rd] = R[rs1] + imm; break; // addi
case 0b100: R[rd] = R[rs1] ^ imm; break; // xori
case 0b110: R[rd] = R[rs1] | imm; break; // ori
case 0b111: R[rd] = R[rs1] & imm; break; // andi
default: panic("Unsupported funct3 = %d", inst->I.funct3);
}
R[0] = 0; // 若指令写入了R[0], 此处将其重置为0
} else if (inst->bytes == 0x00100073) { ... }
这篇文章写得深入浅出,让我这个小白也看懂了!