wPI

wPI

Author: Karol Baryła (aka Lorak_)

---Writeup not finished----
wPI was a task from WPI CTF 2019. Task description:

My solution is not the intended one, but hey, it works. Brief steps to solution:

  • Analyzing binary in ghidra
  • Reversing functions to the point I got working C code
  • Re-write one function because I can't easily reverse it
  • Brute force flag by pair of chars

Binary isn't stripped, so we can easily locate main function, and Ghidra does great job decompiling it. That's what ghidra gives us:

undefined8 main(int param_1,long param_2)

{
  ushort *data;
  int iVar1;
  undefined8 uVar2;
  size_t len;
  long in_FS_OFFSET;
  uint local_148;
  uint local_144;
  uint local_140;
  int local_13c;
  ushort *local_138;
  int local_124;
  int local_120;
  int local_11c;
  MD5_CTX local_118;
  SHA256_CTX local_b8;
  ushort local_48 [8];
  uchar local_38 [40];
  long local_10;
  
  local_10 = *(long *)(in_FS_OFFSET + 0x28);
  InitializeTwoPowers();
  puts("Running...");
  if (param_1 == 2) {
    data = *(ushort **)(param_2 + 8);
    len = strlen((char *)data);
    if (len == 0x20) {
      MD5_Init(&local_118);
      len = strlen((char *)data);
      MD5_Update(&local_118,data,len);
      MD5_Final((uchar *)local_48,&local_118);
      local_144 = (uint)local_48[0];
      local_140 = local_144;
      local_13c = 0;
      local_138 = data;
      while (local_13c < 0x10) {
        local_148 = (uint)*local_138;
        local_140 = local_140 + local_148;
        findDigits(local_140,(long)&local_124,3);
        if (((local_124 != *(int *)(digits + (long)local_13c * 0xc)) ||
            (local_120 != *(int *)(digits + (long)local_13c * 0xc + 4))) ||
           (local_11c != *(int *)(digits + (long)local_13c * 0xc + 8))) {
          uVar2 = 0;
          goto LAB_001019d5;
        }
        local_138 = local_138 + 1;
        local_13c = local_13c + 1;
      }
      SHA256_Init(&local_b8);
      len = strlen((char *)data);
      SHA256_Update(&local_b8,data,len);
      SHA256_Final(local_38,&local_b8);
      iVar1 = strncmp((char *)local_38,hash,0x20);
      if (iVar1 == 0) {
        puts("Correct!");
      }
      uVar2 = 1;
    }
    else {
      uVar2 = 0;
    }
  }
  else {
    uVar2 = 0;
  }
LAB_001019d5:
  if (local_10 != *(long *)(in_FS_OFFSET + 0x28)) {
                    /* WARNING: Subroutine does not return */
    __stack_chk_fail();
  }
  return uVar2;
}

All we need to do here is some renaming and retyping to get more readable code. That's what I've got:


int main(int argc,char **argv)

{
  int iVar1;
  size_t len;
  long in_FS_OFFSET;
  uint data_fragment;
  uint digest_md5_2b;
  uint accumulator;
  int i;
  ushort *data_iterator;
  int calculated_digits [3];
  MD5_CTX md5_struct;
  SHA256_CTX local_b8;
  ushort md5_digest [8];
  uchar local_38 [40];
  long canary;
  ushort *data;
  
  canary = *(long *)(in_FS_OFFSET + 0x28);
  InitializeTwoPowers();
  puts("Running...");
  if (argc == 2) {
    data = (ushort *)argv[1];
    len = strlen((char *)data);
    if (len == 0x20) {
      MD5_Init(&md5_struct);
      len = strlen((char *)data);
      MD5_Update(&md5_struct,data,len);
      MD5_Final((uchar *)md5_digest,&md5_struct);
      digest_md5_2b = (uint)md5_digest[0];
      accumulator = digest_md5_2b;
      i = 0;
      data_iterator = data;
      while (i < 0x10) {
        data_fragment = (uint)*data_iterator;
        accumulator = accumulator + data_fragment;
        findDigits(accumulator,(long)calculated_digits,3);
        if (((calculated_digits[0] != *(int *)(digits + (long)i * 0xc)) ||
            (calculated_digits[1] != *(int *)(digits + (long)i * 0xc + 4))) ||
           (calculated_digits[2] != *(int *)(digits + (long)i * 0xc + 8))) {
          iVar1 = 0;
          goto exit_program;
        }
        data_iterator = data_iterator + 1;
        i = i + 1;
      }
      SHA256_Init(&local_b8);
      len = strlen((char *)data);
      SHA256_Update(&local_b8,data,len);
      SHA256_Final(local_38,&local_b8);
      iVar1 = strncmp((char *)local_38,hash,0x20);
      if (iVar1 == 0) {
        puts("Correct!");
      }
      iVar1 = 1;
    }
    else {
      iVar1 = 0;
    }
  }
  else {
    iVar1 = 0;
  }
exit_program:
  if (canary != *(long *)(in_FS_OFFSET + 0x28)) {
                    /* WARNING: Subroutine does not return */
    __stack_chk_fail();
  }
  return iVar1;
}

Now we can see what the binary does.

  • It takes argv[1] and calculates md5 sum of it.
  • It takes first 2 bytes of that hash and stores it in accumulator variable
  • It treats argv[1] as array of shorts (so first and second char are treated as one number, then third and fourth etc.)
  • For each such number it adds it to the accumulator, passes accumulator to findDigits function.
  • Result of findDigits is stored in calculatedDigits variable.
  • Result is checked against predefind array of numbers. If it's wrong, program exits.

We know the first 4 chars of flag: WPI{
It means we can bruteforce the first 2 bytes of md5, because first value of accumulator is md5[0:2] + input[0:2]. From that bruteforce we get more than 10 working values, but only one still works in second iteration (with input[2:4] added) At that point I didn't have working C code, so I had to do that with gdb loop, which took a loooot of time (about 2 hours IIRC) (I thought it would be much faster and planned on doing whole challenge that way, but reality verified my plans). I'll add gdb commands later, because I lost them (storing CTF files in /tmp isn't the best idea after all).

Placeholder for gdb script

Now that we have first 2 chars, we could do the rest of challange that way. But doing that in gdb would be way too slow, so I decided to completely reverse binary, get C code, and do that in C. First thing we need for that is findDigits source. Ghidra does pretty good job, but we need to help it a bit (again, retyping and renaming variables, changing series return type to double, and then some tweaks in final code is enough). That's what Ghidra creates initially:

void findDigits(int param_1,long param_2,int param_3)

{
  double extraout_XMM0_Qa;
  double extraout_XMM0_Qa_00;
  double extraout_XMM0_Qa_01;
  double extraout_XMM0_Qa_02;
  double dVar1;
  int local_34;
  double local_30;
  
  series(1,param_1);
  series(4,param_1);
  series(5,param_1);
  series(6,param_1);
  dVar1 = ((extraout_XMM0_Qa * 4.00000000 - (extraout_XMM0_Qa_00 + extraout_XMM0_Qa_00)) -
          extraout_XMM0_Qa_01) - extraout_XMM0_Qa_02;
  local_30 = (dVar1 - (double)(int)dVar1) + 1.00000000;
  local_34 = 0;
  while (local_34 < param_3) {
    local_30 = (local_30 - (double)(int)local_30) * 16.00000000;
    *(int *)((long)local_34 * 4 + param_2) = (int)local_30;
    local_34 = local_34 + 1;
  }
  return;
}

My code:

void findDigits(int value, int *to_calculate, int size){
	int i;
	double wynik;

	double series1 = series(1,value);
	double series4 = series(4,value);
	double series5 = series(5,value);
	double series6 = series(6,value);

	series6 = ((series1 * 4.00000000 - (series4 + series4)) - series5) - series6;
	wynik = (series6 - (double)(int)series6) + 1.00000000;

	for(int i = 0; i < size; i++) {
		wynik = (wynik - (double)(int)wynik) * 16.00000000;
		to_calculate[(long)i] = (int)wynik;
	}
	return;
}

Now let's do the same with series function. Initial code:

undefined  [16] series(int param_1,int param_2)

{
  ulong extraout_RAX;
  ulong uVar1;
  undefined8 in_RDX;
  undefined8 extraout_RDX;
  undefined8 extraout_RDX_00;
  double dVar2;
  int local_30;
  int local_2c;
  
  local_30 = 0;
  while (local_2c = param_2, local_30 < param_2) {
    ModPow16(ZEXT816((ulong)(double)(param_2 - local_30)),(double)(param_1 + local_30 * 8));
    local_30 = local_30 + 1;
    in_RDX = extraout_RDX;
  }
  while ((uVar1 = (ulong)(param_2 + 100U), local_2c <= (int)(param_2 + 100U) &&
         (dVar2 = pow(16.00000000,(double)(param_2 - local_2c)), uVar1 = extraout_RAX,
         in_RDX = extraout_RDX_00, 0.00000000 <= dVar2 / (double)(param_1 + local_2c * 8)))) {
    local_2c = local_2c + 1;
    in_RDX = extraout_RDX_00;
  }
  return CONCAT88(in_RDX,uVar1);
}

Tweaked code:

double series(int param_1,int param_2) {
	double retval;
	double modulo;
	int i;
	int j;
	double wynik;

	wynik = 0.00000000;
	i = 0;
	for (int i = 0; i < param_2; i++) {
		modulo = (double)(param_1 + i * 8);
		retval = ModPow16((double)(param_2 - i),modulo);
		wynik = retval / modulo + wynik;
		wynik = wynik - (double)(int)wynik; 
	}


	for(int j = param_2; j <= param_2 + 100; j++){
		modulo = pow(16.00000000,(double)(param_2 - j));
		modulo = modulo / (double)(param_1 + j * 8);
		if (modulo < 0.00000000) break;
		wynik = wynik + modulo;
		wynik = wynik - (double)(int)wynik;
	}

	return wynik;
}

Now, we see that this function calls ModPow16. Let's see what Ghidra can tell us about this function:

undefined  [16] ModPow16(undefined param_1 [16],double param_2)

{
  undefined auVar1 [16];
  int local_28;
  int local_24;
  double local_20;
  double local_18;
  double local_10;
  
  local_20 = SUB168(param_1,0);
  if (param_2 == 1.00000000) {
    auVar1 = (undefined  [16])0x0;
  }
  else {
    local_28 = 0;
    while ((local_28 < 0x20 && ((double)*(int *)(twoPowers + (long)local_28 * 4) <= local_20))) {
      local_28 = local_28 + 1;
    }
    local_18 = (double)*(int *)(twoPowers + (long)(local_28 + -1) * 4);
    local_10 = 1.00000000;
    local_24 = 1;
    while (local_24 <= local_28) {
      if (local_18 <= local_20) {
        local_10 = local_10 * 16.00000000 -
                   (double)(int)((local_10 * 16.00000000) / param_2) * param_2;
        local_20 = local_20 - local_18;
      }
      local_18 = local_18 * 0.50000000;
      if (1.00000000 <= local_18) {
        local_10 = local_10 * local_10 - (double)(int)((local_10 * local_10) / param_2) * param_2;
      }
      local_24 = local_24 + 1;
    }
    auVar1 = ZEXT816((ulong)local_10);
  }
  return auVar1;
}

It doesn't look good. I couldn't get ghidra to give us anything much better than this:

double ModPow16(double param_1,double param_2)

{
  unkfloat1 retval;
  int i;
  int local_24;
  double local_20;
  double max_2_pow;
  
  local_20 = SUB168((undefined  [16])(longdouble)param_1,0);
  if (param_2 != 1.00000000) {
    i = 0;
    while ((i < 0x20 && ((double)twoPowers[(long)i] <= local_20))) {
      i = i + 1;
    }
    max_2_pow = (double)twoPowers[(long)(i + -1)];
    local_24 = 1;
    while (retval = (unkfloat1)local_24, local_24 <= i) {
      if (max_2_pow <= local_20) {
        local_20 = local_20 - max_2_pow;
      }
      max_2_pow = max_2_pow * 0.50000000;
      local_24 = local_24 + 1;
    }
  }
  return (double)retval;
}

And this code is totally wrong and does something totally different than original function. GDB proved very useful here. I could run binary in gdb, break somwhere in main, and call that function myself to see what it returns. Turns out it counts (16.0 ^ param_1) % param_2. So I asked my friend who is more into alghoritms than me to implement it himself it, here's what he gave me:

double mod(double a, double b) {
	return a - (double)((long)(a/b)) * b;
}

double ModPow16(double exponent, double modulo) {
	double wynik = 1;
	if (exponent < 0.0) return 1.0;
	int exp = exponent;
	if (modulo != 1.000000000) {
		double pods = 16.0;
		while (exp > 0) {
			if (exp & 1) wynik = mod((wynik * pods), modulo);
			pods = mod(pods * pods, modulo);
			exp >>= 1;
		}
		wynik = mod(wynik, modulo);
		return wynik;
	}

		return 0.0;
}

If modulo is integer, this function works as original one. If it is not, then it gives different results but it doesn't matter - it is always called with modulo being integer. Now I could execute original plan - knowing initial value of accumulator, let's check what 2-char pair could come next. It took some time, because the bigger the accumulator is, the longer it takes to calculate series. Also there was some guessing, because it always gave more than 1 result, usually 4-5. Program to calculate possible next values:

#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <unistd.h>

int digits[48] = {0xE,0x8,0x9,0xB,0x8,0x7,0x8,0xA,0xD,0x7,0xC,0x7,0xE,0x7,0xC,0x3,0xA,0x3,0x4,0x2,0x5,0x2,0x7,0x5,0xB,0x5,0x4,0x1,0xA,0x6,0x2,0xE,0xD,0x1,0xC,0x5,0xF,0xD,0xF,0x3,0x3,0x1,0x2,0xD,0xB,0x7,0x4,0xF};


int twoPowers[32];


void InitializeTwoPowers(void){
  int i;

  twoPowers[0] = 1;
  i = 1;
  while (i < 32) {
    twoPowers[(long)i] =
         (int)((double)twoPowers[(long)(i + -1)] + (double)twoPowers[(long)(i + -1)]);
    i = i + 1;
  }
  return;
}


double mod(double a, double b) {
	return a - (double)((long)(a/b)) * b;
}

// 16 ^ exponent % modulo
// exponent i modulo są doublami
// ale zawsze są calkowite
double ModPow16(double exponent, double modulo) {
	double wynik = 1;
	if (exponent < 0.0) return 1.0;
	int exp = exponent;
	if (modulo != 1.000000000) {
		double pods = 16.0;
		while (exp > 0) {
			if (exp & 1) wynik = mod((wynik * pods), modulo);
			pods = mod(pods * pods, modulo);
			exp >>= 1;
		}
		wynik = mod(wynik, modulo);
		return wynik;
	}

		return 0.0;
}


//Tafunkcja nas interesuje
double series(int param_1,int param_2) {
	double retval;
	double modulo;
	int i;
	int j;
	double wynik;

	wynik = 0.00000000;
	i = 0;
	for (int i = 0; i < param_2; i++) {
		modulo = (double)(param_1 + i * 8);
		retval = ModPow16((double)(param_2 - i),modulo);
		wynik = retval / modulo + wynik;
		wynik = wynik - (double)(int)wynik; //Część ułamkowa wyniku
	}


	for(int j = param_2; j <= param_2 + 100; j++){
		modulo = pow(16.00000000,(double)(param_2 - j));
		modulo = modulo / (double)(param_1 + j * 8);
		if (modulo < 0.00000000) break;
		wynik = wynik + modulo;
		wynik = wynik - (double)(int)wynik;
	}

	return wynik;
}

void findDigits(int value, int *to_calculate, int size){
	int i;
	double wynik;

	double series1 = series(1,value);
	double series4 = series(4,value);
	double series5 = series(5,value);
	double series6 = series(6,value);

	series6 = ((series1 * 4.00000000 - (series4 + series4)) - series5) - series6;
	wynik = (series6 - (double)(int)series6) + 1.00000000;

	for(int i = 0; i < size; i++) {
		wynik = (wynik - (double)(int)wynik) * 16.00000000;
		to_calculate[(long)i] = (int)wynik;
	}
	return;
}


int main(int argc,char **argv){
	size_t len;
	uint data_fragment;
	uint accumulator;
	int i;
	ushort *data_iterator;
	int calculated_digits [3];
	ushort *data;

	InitializeTwoPowers();
	puts("Running...");

	accumulator = 0xd75e; //First 2 bytes of md5(data)
	accumulator += 'W' + ('P'<<8);
	accumulator += 'I' + ('{'<<8);
	accumulator += 'S' + ('w'<<8);
	accumulator += '3' + ('3'<<8);
	accumulator += 'T' + ('e'<<8);
	accumulator += 'R' + ('_'<<8);
	accumulator += 't' + ('H'<<8);
	accumulator += '@' + ('n'<<8);
	accumulator += '_' + ('a'<<8);
	accumulator += '+' + ('R'<<8);
	printf("Accumulator: %x\n", accumulator);
	i = 10;

	int begin = 0;
	for(int j = 0; j < 8; j++) {
		pid_t pid = fork();
		begin = j;
		if (pid != 0) break;
		if (pid == 0 && j == 7)  exit(0);
	}
	for (int a = 32 + begin * 12; a < 32 + (begin+1) * 12; a++){
		for(int b = 32; b < 127; b++){
			findDigits(accumulator + a + (b <<8), calculated_digits, 3);
			if (((calculated_digits[0] == digits[(long)i * 3]) &&
			(calculated_digits[1] == digits[(long)i * 3 + 1])) &&
			(calculated_digits[2] == digits[(long)i * 3 + 2])) {
				printf("Possible result: %c %c %x\n", (char)a, (char)b, a + (b <<8));
			}
			if(b == 32) {
				printf("Progress: (%d/12) (%d)\n", a - (32 + begin * 12) + 1, a);
			}
		}
	}

	printf("Task %d ended\n", begin);
	exit(0);


	return 0;
}

It runs 8 processes to increase speed. To run it, you must set i in line 127 to number of guessed pairs of chars, above it are those pairs (it's pretty obvious). Then compile it with gcc solve.c -O2 -o solve -lm, run, and it should print possible next pairs. After some waiting and guessing, we got the flag: WPI{Sw33TeR_tH@n_aH_cH3rRi3_P1e}

Turns out that's not how you are supposed to solve this task. We didn't go deep into what this functions actually calculate. Author of the task told me (after we solved it), that it was using this formula: link. So I guess the proper solution was to precalculate pi and search for indexes of digits from the predefined array.