AFL学习

记录自己学习AFL的过程。

AFL安装

我的系统是ubuntu 16.04 64bits

1
2
sudo apt-get install clang
sudo apt-get install llvm

AFL官网下载最新版本:

1
wget https://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz

解压后进入AFL目录执行以下命令:

1
2
make
sudo make install

AFL简介和模式

AFL 的设计就是在 dumb fuzzer 之上加了一层基于边界(edge)覆盖率的反馈机制,在此过程中 AFL 会维护一个语料库队列 queue,包含了初始测试用例及变异后有新状态产生的测试用例,对应的变异操作分为确定性策略和随机性策略两类,而状态的分析,即边界覆盖率的统计工作,需依赖插桩来完成。
AFL分为三种模式,编译层级的llvm mode,汇编级别的普通模式,运行时级别的qemu mode。
普通模式:对gcc做一层的简单封装,指定-B prefix选项替换gcc的as汇编器。也就是后面介绍的afl-as汇编。
llvm模式:通过编写pass来实现tuple信息的记录,对每个基本块都会插入探针。
qemu模式:通过patch源码来实现afl-as.h中的操作。

1
2
3
4
5
6
7
8
9
10
11
12
//afl-qemu-cpu-inl.h
/* This snippet kicks in when the instruction pointer is positioned at
_start and does the usual forkserver stuff, not very different from
regular instrumentation injected via afl-as.h. */

#define AFL_QEMU_CPU_SNIPPET2 do { \
if(itb->pc == afl_entry_point) { \
afl_setup(); \
afl_forkserver(cpu); \
} \
afl_maybe_log(itb->pc); \
} while (0)

AFL初始化

ALF维护了两个bitemap,trace_bits和virgin_bits,另外还有两个bitmap,virgin_tmout和virgin_crash。

1
2
3
4
5
//afl-fuzz.c
EXP_ST u8* trace_bits; /* SHM with instrumentation bitmap */ //记录当前tuple的信息,位于共享内存上
EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing */ //记录总的tuple的信息
virgin_tmout[MAP_SIZE], /* Bits we haven't seen in tmouts */ //所有目标程序出现timeout的tuple信息
virgin_crash[MAP_SIZE]; /* Bits we haven't seen in crashes */ //所有目标程序出现crash的tuple信息

afl-as汇编器

源码->编译->汇编,gcc中的-B选项用于设置编译器的搜索路径,将其设置为afl-as的路径。
afl-as分析源代码,在分支处插入桩代码,然后调用真正的as汇编器进行汇编,相当于是做了一个hook as的操作。
R(MAP_SIZE)生成一个0~MAP_SIZE的随机数,运行时保存在ECX中,同于标识一个代码块。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//afl-as.c
if (line[0] == '\t') {

if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE)); //根据架构的不同插入不同的插桩代码

ins_lines++;

}

continue;

}

比如trampoline_fmt_32:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
//afl-as.h
static const u8* trampoline_fmt_32 =

"\n"
"/* --- AFL TRAMPOLINE (32-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leal -16(%%esp), %%esp\n"
"movl %%edi, 0(%%esp)\n"
"movl %%edx, 4(%%esp)\n"
"movl %%ecx, 8(%%esp)\n"
"movl %%eax, 12(%%esp)\n" //保存edi、edx、ecx、eax寄存器
"movl $0x%08x, %%ecx\n" //ecx = "0x%08x"
"call __afl_maybe_log\n" //调用__afl_maybe_log
"movl 12(%%esp), %%eax\n"
"movl 8(%%esp), %%ecx\n"
"movl 4(%%esp), %%edx\n"
"movl 0(%%esp), %%edi\n" //恢复寄存器
"leal 16(%%esp), %%esp\n"
"\n"
"/* --- END --- */\n"
"\n";

afl fork server

由于fuzz过程需要不断的fork和执行target的操作,AFL实现了fork server机制,启动target进程后,target会运行一个fork server,fuzzer不负责fork子进程,而是与fork server通信,由fork server来完成fork子进程和执行target的操作。
为什么要有fork server的存在呢?因为正常情况下我们的想法是构造一个测试样例,然后执行execve(),输入给目标程序,但execve()函数执行之后除非出错,否则不会返回,且运行程序会替换当前进程,pid不变,那么我们就无法通过pid来标识不同的测试用例。所以fuzzer先fork一个子进程,执行execve()来运行fork server,fork server是fuzzer的子进程,然后fork server负责fork子进程和执行target的操作。这样fuzzer是目标程序的父父进程,然后fuzzer和fork server通过建立两个管道st_pipe, ctl_pipe用于传递状态和命令。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
//afl-fuzz.c
EXP_ST void init_forkserver(char** argv) {

static struct itimerval it;
int st_pipe[2], ctl_pipe[2]; //使用两个管道进行命令和状态传递
int status;
s32 rlen;

ACTF("Spinning up the fork server...");

if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");

forksrv_pid = fork(); //父进程为fuzzer,子进程为fork server

if (forksrv_pid < 0) PFATAL("fork() failed");

if (!forksrv_pid) {

struct rlimit r;

/* Umpf. On OpenBSD, the default fd limit for root users is set to
soft 128. Let's try to fix that... */

if (!getrlimit(RLIMIT_NOFILE, &r) && r.rlim_cur < FORKSRV_FD + 2) {

r.rlim_cur = FORKSRV_FD + 2;
setrlimit(RLIMIT_NOFILE, &r); /* Ignore errors */

}

if (mem_limit) {

r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;

...

dup2(dev_null_fd, 1);
dup2(dev_null_fd, 2);

if (out_file) {

dup2(dev_null_fd, 0);

} else {

dup2(out_fd, 0);
close(out_fd);

}

/* Set up control and status pipes, close the unneeded original fds. */
//为管道分配id
if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");

close(ctl_pipe[0]);
close(ctl_pipe[1]);
close(st_pipe[0]);
close(st_pipe[1]);

close(out_dir_fd);
close(dev_null_fd);
close(dev_urandom_fd);
close(fileno(plot_file));

...

}

/* Close the unneeded endpoints. */

close(ctl_pipe[0]);
close(st_pipe[1]);

fsrv_ctl_fd = ctl_pipe[1];
fsrv_st_fd = st_pipe[0];

/* Wait for the fork server to come up, but don't wait too long. */

it.it_value.tv_sec = ((exec_tmout * FORK_WAIT_MULT) / 1000);
it.it_value.tv_usec = ((exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;

setitimer(ITIMER_REAL, &it, NULL);

rlen = read(fsrv_st_fd, &status, 4);

it.it_value.tv_sec = 0;
it.it_value.tv_usec = 0;

setitimer(ITIMER_REAL, &it, NULL);

/* If we have a four-byte "hello" message from the server, we're all set.
Otherwise, try to figure out what went wrong. */
//读取状态管道的信息,检查是否正常
if (rlen == 4) {
OKF("All right - fork server is up.");
return;
}
...
FATAL("Fork server handshake failed");
}

fuzzer与fork server、target的通信

fuzzer与fork server

首先向状态管道写入4字节,通知fuzzer已经准备好,fuzzer会读取状态管道的信息,检查长度是否等于4来确定fork server是否准备完毕。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//afl-as.h
"__afl_forkserver:\n"
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. */\n"
"\n"
" pushl %eax\n"
" pushl %ecx\n"
" pushl %edx\n"
"\n"
" /* Phone home and tell the parent that we're OK. (Note that signals with\n"
" no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
" closed because we were execve()d from an instrumented binary, or because\n"
" the parent doesn't want to use the fork server. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n" //成功写入
" jne __afl_fork_resume\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//afl-fuzz.c
rlen = read(fsrv_st_fd, &status, 4);

it.it_value.tv_sec = 0;
it.it_value.tv_usec = 0;

setitimer(ITIMER_REAL, &it, NULL);

/* If we have a four-byte "hello" message from the server, we're all set.
Otherwise, try to figure out what went wrong. */
//读取状态管道的信息,检查是否正常
if (rlen == 4) {
OKF("All right - fork server is up.");
return;
}

fork server向状态管道成功写入4字节后,然后执行__afl_fork_wait_loop,读取命令管道,等待fuzzer通知其开始fork.

1
2
3
4
5
6
7
8
9
10
11
//afl-as.h
"__afl_fork_wait_loop:\n"
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY(FORKSRV_FD) " /* file desc */\n"
" call read\n"
" addl $12, %esp\n"
"\n"

一旦fork接收到fuzzer的信息,fork server进行fork,子进程是target进程,父进程是fork server。成功fork后,会跳转到__afl_fork_resume。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//afl-as.h
" cmpl $4, %eax\n"
" jne __afl_die\n"
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
" caches getpid() results and offers no way to update the value, breaking\n"
" abort(), raise(), and a bunch of other things :-( */\n"
"\n"
" call fork\n"
"\n"
" cmpl $0, %eax\n"
" jl __afl_die\n"
" je __afl_fork_resume\n"
"\n"

__afl_fork_resume会关闭不需要的状态和命令管道,然后继续执行。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//afl-as.h
"__afl_fork_resume:\n"
"\n"
" /* In child process: close fds, resume execution. */\n"
"\n"
" pushl $" STRINGIFY(FORKSRV_FD) "\n"
" call close\n"
"\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) "\n"
" call close\n"
"\n"
" addl $8, %esp\n"
"\n"
" popl %edx\n"
" popl %ecx\n"
" popl %eax\n"
" jmp __afl_store\n"
"\n"

然后将fork的子进程的pid通过状态管道发送给fuzzer,调用waitpid等待子进程执行完毕,然后将target子进程的结束状态写入状态管道,告知fuzzer。之后再次进入__afl_fork_wait_loop。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
//afl-as.h
" /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
" movl %eax, __afl_fork_pid\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_fork_pid /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" pushl $0 /* no flags */\n"
" pushl $__afl_temp /* status */\n"
" pushl __afl_fork_pid /* PID */\n"
" call waitpid\n"
" addl $12, %esp\n"
"\n"
" cmpl $0, %eax\n"
" jle __afl_die\n"
"\n"
" /* Relay wait status to pipe, then loop back. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" jmp __afl_fork_wait_loop\n"

fuzzer侧通过调用run_target方法来执行测试用例,在run_target中,fuzzer会读取管道内的内容来获取target的状态。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
//afl-fuzz.c
static u8 run_target(char** argv, u32 timeout) {

static struct itimerval it;
static u32 prev_timed_out = 0;

int status = 0;
u32 tb4;

child_timed_out = 0;

/* After this memset, trace_bits[] are effectively volatile, so we
must prevent any earlier operations from venturing into that
territory. */

memset(trace_bits, 0, MAP_SIZE);
MEM_BARRIER();

/* If we're running in "dumb" mode, we can't rely on the fork server
logic compiled into the target program, so we will just keep calling
execve(). There is a bit of code duplication between here and
init_forkserver(), but c'est la vie. */
//初始化fork server
if (dumb_mode == 1 || no_forkserver) { //dumb模式下不生成fork server,fuzzer直接execve()执行目标程序

child_pid = fork();

... ...

/* Isolate the process and configure standard descriptors. If out_file is
specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */

setsid();

dup2(dev_null_fd, 1);
dup2(dev_null_fd, 2);

if (out_file) {

dup2(dev_null_fd, 0);

} else {

dup2(out_fd, 0);
close(out_fd);

}

/* On Linux, would be faster to use O_CLOEXEC. Maybe TODO. */

close(dev_null_fd);
close(out_dir_fd);
close(dev_urandom_fd);
close(fileno(plot_file));

... ...

execv(target_path, argv);

/* Use a distinctive bitmap value to tell the parent about execv()
falling through. */

*(u32*)trace_bits = EXEC_FAIL_SIG;
exit(0);

}

} else {

s32 res;

/* In non-dumb mode, we have the fork server up and running, so simply
tell it to have at it, and then read back PID. */
//通过命令管道通知fork server fork target子进程
if ((res = write(fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {

if (stop_soon) return 0;
RPFATAL(res, "Unable to request new process from fork server (OOM?)");

}
//通过状态管道读取target子进程的pid
if ((res = read(fsrv_st_fd, &child_pid, 4)) != 4) {

if (stop_soon) return 0;
RPFATAL(res, "Unable to request new process from fork server (OOM?)");

}

if (child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");

}

/* Configure timeout, as requested by user, then wait for child to terminate. */

it.it_value.tv_sec = (timeout / 1000);
it.it_value.tv_usec = (timeout % 1000) * 1000;

setitimer(ITIMER_REAL, &it, NULL);

/* The SIGALRM handler simply kills the child_pid and sets child_timed_out. */

if (dumb_mode == 1 || no_forkserver) {

if (waitpid(child_pid, &status, 0) <= 0) PFATAL("waitpid() failed");

} else {

s32 res;
//通过状态管道获取target子进程退出的状态
if ((res = read(fsrv_st_fd, &status, 4)) != 4) {

if (stop_soon) return 0;
RPFATAL(res, "Unable to communicate with fork server (OOM?)");

}

}

... ...

return FAULT_NONE;

}

fuzzer与target

AFL使用共享内存来完成fuzzer和target之间的信息传递,创建共享内存并写入环境变量,之后fork的子进程可以通过环境变量获取这块共享内存的shm_id。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
//afl-fuzz.c
EXP_ST void setup_shm(void) {

u8* shm_str;

if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE);

memset(virgin_tmout, 255, MAP_SIZE);
memset(virgin_crash, 255, MAP_SIZE);

shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600); //分配共享内存,大小为64K

if (shm_id < 0) PFATAL("shmget() failed");

atexit(remove_shm);

shm_str = alloc_printf("%d", shm_id);

/* If somebody is asking us to fuzz instrumented binaries in dumb mode,
we don't want them to detect instrumentation, since we won't be sending
fork server commands. This should be replaced with better auto-detection
later on, perhaps? */

if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1); //将这块共享内存的标识符写入环境变量

ck_free(shm_str);

trace_bits = shmat(shm_id, NULL, 0); //保存共享内存的地址

if (!trace_bits) PFATAL("shmat() failed");

}

target调用shmat将这块共享内存映射到自己的进程空间中,然后保存在__afl_area_ptr中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//afl-as.h
" pushl $0 /* shmat flags */\n"
" pushl $0 /* requested addr */\n"
" pushl %eax /* SHM ID */\n"
" call shmat\n"
" addl $12, %esp\n"
"\n"
" cmpl $-1, %eax\n"
" je __afl_setup_abort\n"
"\n"
" /* Store the address of the SHM region. */\n"
"\n"
" movl %eax, __afl_area_ptr\n"
" movl %eax, %edx\n"
"\n"
" popl %ecx\n"
" popl %eax\n"
"\n"

target会在afl_maybe_log检查这块共享内存是否已经映射完成,afl_area_ptr就是上面共享内存映射到target地址空间中的地址。

1
2
3
4
5
6
7
8
9
10
11
12
//afl-as.h
"__afl_maybe_log:\n"
"\n"
" lahf\n"
" seto %al\n"
"\n"
" /* Check if SHM region is already mapped. */\n"
"\n"
" movl __afl_area_ptr, %edx\n" //获取共享内存的地址
" testl %edx, %edx\n"
" je __afl_setup\n"
"\n"

tuple信息统计和语料库更迭

分支信息的记录

AFL使用二元tuple来记录跳转的源地址和目的地址,从而获得target的代码覆盖情况。当有新的tuple出现或已有的tuple出现新的命中组时则说明产生了新状态,相应的测试用例会加入到语料库中。
afl_maybe_log()中,将共享内存的地址保存后,edi保存的是前一次跳转的位置afl_prev_loc,与ecx异或后保存在edi中,ecx是上文中提到的标识代码块的随机数。然后以edx为基址,edi为下标,将edx[edi]进行加一操作,edx就是上文中共享内存的基址。
ecx右移一位后,被赋值给了__afl_prev_loc。
因此,AFL为每一个代码块都生成一个0~MAP_SIZE的随机数,标识其位置,将分支处的源代码块和目的代码块进行异或,作为该分支的key,来保存该分支的执行次数。用于保存执行次数的实际上是一个哈希表,大小为MAP_SIZE=64K。可能会存在碰撞的问题。
cur_location右移一位后,再赋值给prev_location,是为了标识源和目的代码块相同或两者颠倒的情况,如果不右移,则不能区分A->B和B->A,以及A->A和B->B这种分支。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//afl-as.h
//cur_location = <COMPILE_TIME_RANDOM>;
//shared_mem[cur_location ^ prev_location]++;
//prev_location = cur_location >> 1;
#ifndef COVERAGE_ONLY
" movl __afl_prev_loc, %edi\n" //edi = prev_location
" xorl %ecx, %edi\n" //ecx = cur_location edi = prev_location ^ cur_location
" shrl $1, %ecx\n" // ecx = ecx >> 1
" movl %ecx, __afl_prev_loc\n" //prev_location = cur_location >> 1
#else
" movl %ecx, %edi\n"
#endif /* ^!COVERAGE_ONLY */
"\n"
#ifdef SKIP_COUNTS
" orb $1, (%edx, %edi, 1)\n"
#else
" incb (%edx, %edi, 1)\n" // shmt[prev_location ^ cur_location]++

分支信息统计

为了统计边界覆盖率,AFL将每个tuple的命中数使用1字节来保存,初始值为0xFF,命中数分为8个组别,若无此类新状态产生,则其值为1,若出现其值则置为0。

1
1,2,3,4-7,8-15,16-31,32-127,128-255

执行完目标程序后,调用claassify_count对当前tuple的状态进行统计,trace_bits记录了这一信息:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//afl-fuzz.c
#ifdef __x86_64__
classify_counts((u64*)trace_bits);
#else
classify_counts((u32*)trace_bits);
#endif /* ^__x86_64__ */

/* Destructively classify execution counts in a trace. This is used as a
preprocessing step for any newly acquired traces. Called on every exec,
must be fast. */

static const u8 count_class_lookup8[256] = {

[0] = 0,
[1] = 1,
[2] = 2,
[3] = 4,
[4 ... 7] = 8,
[8 ... 15] = 16,
[16 ... 31] = 32,
[32 ... 127] = 64,
[128 ... 255] = 128

};

在统计完成后,has_new_bits函数通过比对trace_bits和virgin_bits来判断是否有新状态产生,位运算来提高比对效率。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
//afl-fuzz.c
static inline u8 has_new_bits(u8* virgin_map) {

#ifdef __x86_64__

u64* current = (u64*)trace_bits;
u64* virgin = (u64*)virgin_map;

u32 i = (MAP_SIZE >> 3);

#else

u32* current = (u32*)trace_bits;
u32* virgin = (u32*)virgin_map;

u32 i = (MAP_SIZE >> 2);

#endif /* ^__x86_64__ */

u8 ret = 0;

while (i--) {

/* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
that have not been already cleared from the virgin map - since this will
almost always be the case. */

if (unlikely(*current) && unlikely(*current & *virgin)) {

if (likely(ret < 2)) {

u8* cur = (u8*)current;
u8* vir = (u8*)virgin;

/* Looks like we have not found any new bytes yet; see if any non-zero
bytes in current[] are pristine in virgin[]. */

#ifdef __x86_64__

if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
(cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) ||
(cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) ||
(cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff)) ret = 2;
else ret = 1;

#else

if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
(cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff)) ret = 2;
else ret = 1;

#endif /* ^__x86_64__ */

}

*virgin &= ~*current;

}

current++;
virgin++;

}

if (ret && virgin_map == virgin_bits) bitmap_changed = 1;

return ret;

}

AFL希望维护一组最小测试用例集,这个集合能够触发目前bitmap中的所有路径,且长度更为精简、执行速度更快。AFL使用top_rated数组来保存各个tuple状态当前相应的最优测试用例,如果某个测试用例触发了新状态,或有更有利的长度和执行速度,则它就是top_rated[i]。
执行完测试用例后,fuzzer都会调用update_bitmap_score函数判断当前测试用例是否优于top_rated中已有的测试用例。
trace_mini的大小是MAP_SIZE/8,它的每个bit对应了bitmap中的一个字节,如果这个测试用例访问了该状态,对应的bit就会置为1,用来标识该测试用例覆盖分支的能力。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
//afl-fuzz.c
static void update_bitmap_score(struct queue_entry* q) {

u32 i;
u64 fav_factor = q->exec_us * q->len; //执行速度*测试用例长度

/* For every byte set in trace_bits[], see if there is a previous winner,
and how it compares to us. */

for (i = 0; i < MAP_SIZE; i++)

if (trace_bits[i]) {

if (top_rated[i]) { //如果当前状态有top_rated[i]

/* Faster-executing or smaller test cases are favored. */

if (fav_factor > top_rated[i]->exec_us * top_rated[i]->len) continue;

/* Looks like we're going to win. Decrease ref count for the
previous winner, discard its trace_bits[] if necessary. */

if (!--top_rated[i]->tc_ref) { //tc_ref标识该测试用例入选top_rated的次数,原来的top_rated[i]的入选次数减一
ck_free(top_rated[i]->trace_mini);
top_rated[i]->trace_mini = 0;
}

}

/* Insert ourselves as the new winner. */

top_rated[i] = q; //当前testcase入选top_rated
q->tc_ref++; //q的入选次数加一

if (!q->trace_mini) {
q->trace_mini = ck_alloc(MAP_SIZE >> 3);
minimize_bits(q->trace_mini, trace_bits);
}

score_changed = 1;

}

}

文件变异

文件变异有两种策略:确定性策略和随机性策略,确定性策略包含bitflip,整数加减,整数替换、字典插入和替换。这类策略比较耗时,是为了产生简洁有效的测试用例。

确定性策略

bitflip

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//afl-fuzz.c
/* Single walking bit. */
stage_short = "flip1";
stage_name = "bitflip 1/1"; //每次翻转一个bit

/* Two walking bits. */
stage_name = "bitflip 2/1";
stage_short = "flip2";

/* Four walking bits. */
stage_name = "bitflip 4/1";
stage_short = "flip4";

/* Walking byte. */
stage_name = "bitflip 8/8"; //依次对每个字节进行翻转
stage_short = "flip8";

/* Two walking bytes. */
stage_name = "bitflip 16/8"; //依次对每个word进行翻转
stage_short = "flip16";

/* Four walking bytes. */
stage_name = "bitflip 32/8"; //依次对每个dword进行翻转
stage_short = "flip32";

生成token

在bitflip 1/1中,如果连续多个字节的最低有效位进行翻转后,执行路径没有发生变化且和之前的执行路径不相同,则AFL认为这一段连续的字节上一个token。比如文件起始的魔数IHDR,对这四个字节做bitflip 1/1,程序执行路径与翻转前不同,且不同字节的翻转程序执行路径没有发生变化,则IHDR可能是一个文件的魔数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//xxxxxxxxIHDRxxxxxxxx
if (!dumb_mode && (stage_cur & 7) == 7) {

u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);

if (stage_cur == stage_max - 1 && cksum == prev_cksum) {

/* If at end of file and we are still collecting a string, grab the
final character and force output. */

if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3];
a_len++;

if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
maybe_add_auto(a_collect, a_len);

AFL对token的长度做了限制:

1
2
3
4
5
//config.h
/* Length limits for auto-detected dictionary tokens: */

#define MIN_AUTO_EXTRA 3
#define MAX_AUTO_EXTRA 32

effector map

在进行bitflip 8/8时,AFL使用了一个effector map来记录每个字节对执行路径的影响。对一个字节进行翻转后,如果执行路径和之前的执行路径不同,则将其置为1,否则为0。如果一个byte完全翻转,都无法带来执行路径的变化,那么这个byte很有可能是只是文件中的数据,而不决定文件的结构,在后续的文件变异中,会参考effector map,跳过无效的字节,提高效率。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//afl-fuzz.c
/* We also use this stage to pull off a simple trick: we identify
bytes that seem to have no effect on the current execution path
even when fully flipped - and we skip them during more expensive
deterministic stages, such as arithmetics or known ints. */

if (!eff_map[EFF_APOS(stage_cur)]) {

u32 cksum;

/* If in dumb mode or if the file is very short, just flag everything
without wasting time on checksums. */

if (!dumb_mode && len >= EFF_MIN_LEN) //非dumb模式且文件大小大于EFF_MIN_LEN
cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
else
cksum = ~queue_cur->exec_cksum;

if (cksum != queue_cur->exec_cksum) {
eff_map[EFF_APOS(stage_cur)] = 1; //字节翻转带来新的执行路径
eff_cnt++;
}

}

另外,dumb模式下会对所有字符进行变异,当文件大小小于EFF_MIN_LEN时,认为所有字符都是有效的,EFF_MIN_LEN默认为128字节。如果有效字节数超过90%,则认为整个文件都是有效的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//config.h
/* Minimum input file length at which the effector logic kicks in: */
#define EFF_MIN_LEN 128
/* Maximum effector density past which everything is just fuzzed unconditionally (%): */
#define EFF_MAX_PERC 90
//afl-fuzz.c
if (eff_cnt != EFF_ALEN(len) &&
eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) {

memset(eff_map, 1, EFF_ALEN(len));

blocks_eff_select += EFF_ALEN(len);

} else {

blocks_eff_select += eff_cnt;

}

arithmetic

整数加减变换有三种,分别是对一个字节,一个word和一个dword做整数加减的变异。

1
2
3
4
5
6
7
8
9
10
11
12
//afl-fuzz.c
/* 8-bit arithmetics. */
stage_name = "arith 8/8";
stage_short = "arith8";
/* 16-bit arithmetics, both endians. */
if (len < 2) goto skip_arith;
stage_name = "arith 16/8";
stage_short = "arith16";
/* 32-bit arithmetics, both endians. */
if (len < 4) goto skip_arith;
stage_name = "arith 32/8";
stage_short = "arith32";

具体的,以arith 8/8”为例,j从1开始到ARITH_MAX对每个字节进行加减j。如果整数加/减法变异后与bitflip相同,则不进行重复的变异。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//config.h
/* Maximum offset for integer addition / subtraction stages: */
#define ARITH_MAX 35

//afl-fuzz.c
/* Let's consult the effector map... */

if (!eff_map[EFF_APOS(i)]) { //如果整个字节都判断无效,则跳过该字节
stage_max -= 2 * ARITH_MAX;
continue;
}

stage_cur_byte = i;
for (j = 1; j <= ARITH_MAX; j++) {

u8 r = orig ^ (orig + j);

/* Do arithmetic operations only if the result couldn't be a product
of a bitflip. */

if (!could_be_bitflip(r)) { //如果加法变异后与bitflip相同,则不重复进行整数加法的变异

stage_cur_val = j;
out_buf[i] = orig + j; //加法变异

if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
stage_cur++;

} else stage_max--;

r = orig ^ (orig - j);

if (!could_be_bitflip(r)) { //如果减法变异后与bitflip相同,则不重复进行整数减法的变异

stage_cur_val = -j;
out_buf[i] = orig - j; //减法变异

if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
stage_cur++;

} else stage_max--;

out_buf[i] = orig;

}

interest

确定性策略的第三种是替换变异,一共分为三种,每次对8bit、16bit、32bit进行替换。

1
2
3
4
5
6
7
//afl-fuzz.c
stage_name = "interest 8/8";
stage_short = "int8";
stage_name = "interest 16/8";
stage_short = "int16";
stage_name = "interest 32/8";
stage_short = "int32";

替换的字符是预先定义好的,可以看到大都是一些类型边界的数字:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
//config.h
#define INTERESTING_8 \
-128, /* Overflow signed 8-bit when decremented */ \
-1, /* */ \
0, /* */ \
1, /* */ \
16, /* One-off with common buffer size */ \
32, /* One-off with common buffer size */ \
64, /* One-off with common buffer size */ \
100, /* One-off with common buffer size */ \
127 /* Overflow signed 8-bit when incremented */

#define INTERESTING_16 \
-32768, /* Overflow signed 16-bit when decremented */ \
-129, /* Overflow signed 8-bit */ \
128, /* Overflow signed 8-bit */ \
255, /* Overflow unsig 8-bit when incremented */ \
256, /* Overflow unsig 8-bit */ \
512, /* One-off with common buffer size */ \
1000, /* One-off with common buffer size */ \
1024, /* One-off with common buffer size */ \
4096, /* One-off with common buffer size */ \
32767 /* Overflow signed 16-bit when incremented */

#define INTERESTING_32 \
-2147483648LL, /* Overflow signed 32-bit when decremented */ \
-100663046, /* Large negative number (endian-agnostic) */ \
-32769, /* Overflow signed 16-bit */ \
32768, /* Overflow signed 16-bit */ \
65535, /* Overflow unsig 16-bit when incremented */ \
65536, /* Overflow unsig 16 bit */ \
100663045, /* Large positive number (endian-agnostic) */ \
2147483647 /* Overflow signed 32-bit when incremented */

//afl-fuzz.c
static s8 interesting_8[] = { INTERESTING_8 };
static s16 interesting_16[] = { INTERESTING_8, INTERESTING_16 };
static s32 interesting_32[] = { INTERESTING_8, INTERESTING_16, INTERESTING_32 };

同样的也会进行effector map的check:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
//afl-fuzz.c
/* Setting 8-bit integers. */

for (i = 0; i < len; i++) {

u8 orig = out_buf[i];

/* Let's consult the effector map... */

if (!eff_map[EFF_APOS(i)]) { //如果整个字节都判断无效,则跳过该字节
stage_max -= sizeof(interesting_8);
continue;
}

stage_cur_byte = i;

for (j = 0; j < sizeof(interesting_8); j++) {

/* Skip if the value could be a product of bitflips or arithmetics. */

if (could_be_bitflip(orig ^ (u8)interesting_8[j]) ||
could_be_arith(orig, (u8)interesting_8[j], 1)) { /如果与比特翻转和整数加减得到的变异相同,则跳过
stage_max--;
continue;
}

stage_cur_val = interesting_8[j];
out_buf[i] = interesting_8[j]; //替换变异

if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

out_buf[i] = orig;
stage_cur++;

}

}

dictionary

user extras over

使用用户提供的tokens进行替换变异,extras_cnt是用户提供的tokens的数量,如果用户提供的tokens大于MAX_DET_EXTRAS,默认为200,则会随机挑选tokens进行变异,UR(extras_cnt)会产生一个0~extras_cnt的随机数,如果随机数大于MAX_DET_EXTRAS,则不使用当前tokens进行变异。同样的查找effector map如果替换的目标byte是无效的,则直接跳过该byte。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
//config.h
/* Maximum number of user-specified dictionary tokens to use in deterministic
steps; past this point, the "extras/user" step will be still carried out,
but with proportionally lower odds: */

#define MAX_DET_EXTRAS 200

//afl-fuzz.c
/* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also
skip them if there's no room to insert the payload, if the token
is redundant, or if its entire span has no bytes set in the effector
map. */

if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) ||
extras[j].len > len - i ||
!memcmp(extras[j].data, out_buf + i, extras[j].len) ||
!memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) {

stage_max--;
continue;

}

user extras insert

使用用户提供的tokens进行插入变异,从文件的第一个字节开始,遍历用户定义的token,依次进行插入变异,变异后的文件分为插入位置之前的部分、插入的token部分和尾部,依次复制到目标缓冲区中,得到变异的文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
//config.h
/* Maximum size of input file, in bytes (keep under 100MB): */

#define MAX_FILE (1 * 1024 * 1024)

//afl-fuzz.c

for (i = 0; i <= len; i++) {

stage_cur_byte = i;

for (j = 0; j < extras_cnt; j++) {

if (len + extras[j].len > MAX_FILE) { //如果token的长度大于MAX_FILE,则跳过该token,默认最大为1KB
stage_max--;
continue;
}

/* Insert token */
memcpy(ex_tmp + i, extras[j].data, extras[j].len);

/* Copy tail */
memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i);

if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) {
ck_free(ex_tmp);
goto abandon_entry;
}

stage_cur++;

}

/* Copy head */
ex_tmp[i] = out_buf[i];

}

auto extras over

使用收集到的tokens进行替换变异,在bitflip 1/1中进行了tokens的收集。这部分类似于user extras over,限制自动收集的token的数目为USE_AUTO_EXTRAS,默认为50。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
//config.h
#define USE_AUTO_EXTRAS 50

//afl-fuzz.c
for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) {

/* See the comment in the earlier code; extras are sorted by size. */

if (a_extras[j].len > len - i ||
!memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) ||
!memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) {

stage_max--;
continue;

}

last_len = a_extras[j].len;
memcpy(out_buf + i, a_extras[j].data, last_len);

if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

stage_cur++;

}

随机性策略(random havoc)

有以下几种随机变异的方式:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
//随机选取某个bit进行翻转
FLIP_BIT(out_buf, UR(temp_len << 3));
//随机选取一个字节进行随机的interest替换
out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))];
//随机选取字节序,随机选取一个word进行随机的interest替换替换
if (UR(2)) { //随机选取到小端字节序
*(u16*)(out_buf + UR(temp_len - 1)) = interesting_16[UR(sizeof(interesting_16) >> 1)];
}
else { //大端字节序
*(u16*)(out_buf + UR(temp_len - 1)) = SWAP16(interesting_16[UR(sizeof(interesting_16) >> 1)]);
}
//随机选取字节序,随机选取一个dword进行随机的interest替换替换
if (UR(2)) {
*(u32*)(out_buf + UR(temp_len - 3)) = interesting_32[UR(sizeof(interesting_32) >> 2)];
}
else {
*(u32*)(out_buf + UR(temp_len - 3)) = SWAP32(interesting_32[UR(sizeof(interesting_32) >> 2)]);
}
//随机选取一个字节减去一个随机数
out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX);
//随机选取一个字节加上一个随机数
out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX);
//随机选取一个word,随机选取字节序,减去一个随机数
if (UR(2)) {
u32 pos = UR(temp_len - 1);
*(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
}
else {
u32 pos = UR(temp_len - 1);
u16 num = 1 + UR(ARITH_MAX);
*(u16*)(out_buf + pos) =
SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num);
}
//随机选取一个word,随机选取字节序,加上一个随机数
if (UR(2)) {
u32 pos = UR(temp_len - 1);
*(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX);
} else {
u32 pos = UR(temp_len - 1);
u16 num = 1 + UR(ARITH_MAX);
*(u16*)(out_buf + pos) =
SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num);
}
//随机选取一个dword,随机选取字节序,减去一个随机数
if (UR(2)) {

u32 pos = UR(temp_len - 3);

*(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX);

} else {

u32 pos = UR(temp_len - 3);
u32 num = 1 + UR(ARITH_MAX);

*(u32*)(out_buf + pos) =
SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num);

}
//随机选取一个dword,随机选取字节序,加上一个随机数
if (UR(2)) {

u32 pos = UR(temp_len - 3);

*(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX);

} else {

u32 pos = UR(temp_len - 3);
u32 num = 1 + UR(ARITH_MAX);

*(u32*)(out_buf + pos) =
SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num);

}
//随机选取某个字节,设置为一个随机数
out_buf[UR(temp_len)] ^= 1 + UR(255);
//随机删除一段bytes
del_len = choose_block_len(temp_len - 1);
del_from = UR(temp_len - del_len + 1);
memmove(out_buf + del_from, out_buf + del_from + del_len,
temp_len - del_from - del_len);
//随机选取一个位置,插入一段随机内容,其中75%的概率是插入原文中随机位置的内容,25%的概率是插入一段随机选取的数
//随机选取一个位置,替换一段随机内容,其中75%的概率是插入原文中随机位置的内容,25%的概率是插入一段随机选取的数
//随机选取一个token进行替换
//随机选取一个token进行插入

AFL会生成一个随机数来限制随机选取变异的组合数量:

1
2
3
4
5
//config.h
#define HAVOC_STACK_POW2 7

//afl-fuzz.c
u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2));

splice

最后进入splice,将两个input拼接在一起,然后继续执行havoc变异。具体的,对于当前input来说,在当前队列中随机选取一个input(但不是本身),当前input的头部拼接选取的input的尾部。然后再进行随机性策略的变异。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
//afl-fuzz.c
/* Pick a random queue entry and seek to it. Don't splice with yourself. */

do { tid = UR(queued_paths); } while (tid == current_entry);

splicing_with = tid;
target = queue;
... ...

/* Read the testcase into a new buffer. */

fd = open(target->fname, O_RDONLY);

if (fd < 0) PFATAL("Unable to open '%s'", target->fname);

new_buf = ck_alloc_nozero(target->len);

ck_read(fd, new_buf, target->len, target->fname);

close(fd);

/* Find a suitable splicing location, somewhere between the first and
the last differing byte. Bail out if the difference is just a single
byte or so. */

locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff); //两个input进行比对,返回第一个和最后一个diff的偏移

if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) { //如果没有找到diff或first和last相同
ck_free(new_buf);
goto retry_splicing;
}

/* Split somewhere between the first and last differing byte. */

split_at = f_diff + UR(l_diff - f_diff); //随机选取一个在first_diff和last_diff的位置作为拼接位置

/* Do the thing. */

len = target->len;
memcpy(new_buf, in_buf, split_at); //使用当前input的头部替换选取的input的头部
in_buf = new_buf;

ck_free(out_buf);
out_buf = ck_alloc_nozero(len);
memcpy(out_buf, in_buf, len);

goto havoc_stage;

}

对一个文件变异完成后,AFL会对文件队列的下一个进行变异处理。当队列中的全部文件都变异测试后,就完成了一个”cycle”。

参考

https://zhuanlan.zhihu.com/p/51443698

http://rk700.github.io/2017/12/28/afl-internals/

http://rk700.github.io/2018/01/04/afl-mutations/