死磕了那么久,还是没搞出来QAQ,太菜了,还是,不过这个WP给我了些做这种语言类题目的想法。按WP的方法复现了一下,确实可以直接出

ez_cython

对于这类题目,以往我只会hook一下传入参数和返回值。但是这个WP通过构造自己的类使其返回了符号值从而直接打印流程,十分NB,具体代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import cy

class Symbol:
def __init__(self, name):
self.name = name

def __repr__(self) -> str:
return self.name

def __add__(self, other):
if isinstance(other, Symbol):
return Symbol(f"({self} + {other})")
else:
return Symbol(f"({self} + {hex(other)})")
def __radd__(self, other):
return self.__add__(other)
def __xor__(self, other):
if isinstance(other, Symbol):
return Symbol(f"({self} ^ {other})")
else:
return Symbol(f"({self} ^ {hex(other)})")
def __rxor__(self, other):
return self.__xor__(other)

def __lshift__(self, other):
return Symbol(f"({self} << {other})")


def __rshift__(self, other):
return Symbol(f"({self} >> {other})")
def __and__(self, other):
if isinstance(other, Symbol):
return Symbol(f"({self} & {other})")
else:
return Symbol(f"({self} & {hex(other)})")


cout = 0


class SA:
def __init__(self, nums):
self.nums = [Symbol(str(num)) for num in nums]
self.count = 0
def __getitem__(self, index):
return self.nums[index]

def copy(self):
return SA(self.nums)

def __len__(self):
return len(self.nums)
def __setitem__(self, index, value):
print(f"new_{self.nums[index]} = {value}")
self.nums[index] = Symbol(f'new_{self.nums[index]}')
global cout
cout += 1

def __eq__(self, value: object) -> bool:
print(f"\n{self.nums} \n= \n{value}")
return self.nums == value



inp = SA([f"a[{i}]" for i in range(32)])
output = cy.sub14514(inp)
print(output)
print(f"round = {cout//32}")

根据输出可以一眼看出是xxtea

然后直接写XXTEA脚本就好QAQ

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//main.cpp
#include <iostream>
#include <stdint.h>
#include "Arr.h"
unsigned int key[] = { 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 83, 83, 67, 67, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121, 49, 49, 121, 121 };

int deXXTEA(Arr& input) {
uint32_t delta = 0x9E3779B9;
uint32_t len = input.getsize();
uint32_t round = 60 / len + 4;
uint32_t sum = delta * round;
int t = 32 * 5 - 1;
for (int i = 0; i < round; i++)
{
for (int now = len - 1; now > -1; now--)
{
input[now] -= (((input[now - 1] >> 3 ^ input[now + 1] << 3) +
(input[now + 1] >> 4 ^ input[now - 1] << 2)) ^ ((sum ^ input[now + 1]) +
(key[t] ^ input[now - 1])));
t--;
//std::cout << ((now & 2) ^ ((sum >> 3) & 3)) << "\n";
}
sum -= delta;
}
return 0;
}
int main(void) {
uint32_t const key[4] = { 0x53,0x79,0x43,0x31 };

uint32_t data[] = { 0xf4e984ac, 0xcaf0191d, 0x576ffc19, 0x2ef90939, 0x585c0e45, 0x2eaa8337, 0x617f6b89, 0xf55432a7, 0xa81cb817, 0xbf1ee8c3, 0x656309c6, 0x92ace1cc, 0x8efeb57e, 0x39c000bb, 0x4d650916, 0x6a78dba7, 0xeba5c0a3, 0x92fdb0f3, 0xf4b5dea2, 0x2e7098d9, 0x112df304, 0xc60e9667, 0xf5b523ec, 0x7c8eb381, 0x2720ac77, 0xb8939ede, 0x8428a41a, 0x288ac504, 0xca50bcfd, 0x2992ba6f, 0x4222d1a6, 0x876c84df
};
Arr* test = new Arr(data, 32);
//XXTEA(*test, (uint32_t*)key);
//std::cout << *test;
//deXXTEA(*test, (uint32_t*)key);
deXXTEA(*test);

char str[33] = {};
for (int i = 0; i < 32; i++)
{
str[i] = (test->getarr())[i];
}
std::cout << str;
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//Arr.h
#pragma once
#include <initializer_list>
#include<algorithm>
#include<iostream>
#include <iomanip>
#include <cstring>
class Arr
{
private:
size_t size;
unsigned int* arr;
public:
Arr(size_t s);
Arr(uint32_t* p, unsigned s);
Arr(std::initializer_list<unsigned int> init);
~Arr();

unsigned int getsize() const;
unsigned int* getarr() const;

friend std::ostream& operator<<(std::ostream& os, const Arr& obj);
unsigned int& operator[](unsigned i);

unsigned int* begin() const;
unsigned int* end() const;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
//Arr.cpp
#include "Arr.h"

Arr::Arr(size_t s) : size(s), arr(new uint32_t[s]) {
std::fill_n(arr, s, 0);
}

Arr::Arr(uint32_t* p, unsigned s) : Arr(s)
{
for (size_t i = 0; i < size; i++)
{
arr[i] = p[i];
}
}


Arr::Arr(std::initializer_list<unsigned int> init) :size(init.size()), arr(new unsigned int[init.size()])
{
std::copy(init.begin(), init.end(), arr);
}

Arr::~Arr()
{
delete[] arr;
}

unsigned int Arr::getsize() const
{
return size;
}

unsigned int* Arr::getarr() const
{
return arr;
}

unsigned int& Arr::operator[](unsigned i)
{
i = (i + size) % size;
i = i >= 0 ? i : (i + size);
return arr[i];
}

unsigned int* Arr::begin() const
{
return arr;
}

unsigned int* Arr::end() const
{
return arr + size;
}

std::ostream& operator<<(std::ostream& os, const Arr& obj)
{
os << "len: " << obj.getsize() << std::endl;
os << "element list:" << std::endl;
for (unsigned i : obj) {
os << "0x" << std::hex << std::uppercase << std::setw(8) << std::setfill('0') << i << std::endl;
}

return os;
}

我一开始是想使用frida和python setattr一起搞的,最后失败了QAQ。
我现在在想,会不会对于那些语言形成的混淆,其实可以用语言本身的特性去解决,至少对于解释型语言似乎重点都在混淆里面,那么对于Java的so文件,似乎也可以这样?如果加密是传入一个byte[]类型,那我继承byte生成自己的类传进去,似乎就没有问题了。

再往编译型语言看看,像rust,go似乎都是直接看汇编比看ida反编译的更好。但是如果他们写成dll似乎也可以被我们主动触发。而剩下的exe类型,还是直接让之后专门的反编译器来解决吧。

我觉得像这种语言层面的混淆,会随着语言变多/发展而愈来愈多,所以我觉得深入研究某个语言的内容是不划算的(除非专门研究某个语言的逆向,但是这样也可能遇到很多不开源的框架,对于框架来说,我们也很难分析)。那么对于这些内容,我们为了理清逻辑,更应该使用通解:HOOK?来获取具体流程。


关于我一开始的方法:frida_hook py38.dll的运算函数

hook到的永远是一个地址,且如果先传给python再解引用会导致一些bug,frida直接解引用也会导致bug。

所以还是要看一下具体的偏移

1
2
3
4
5
6
typedef struct _object {//定长类型
PyObject_HEAD
} PyObject;
typedef struct {//变长类型
PyObject_VAR_HEAD
} PyVarObject;
1
2
3
4
5
6
7
8
9
10
11
12
13
typedef struct {//float为定长类型
PyObject_HEAD
double ob_fval;
} PyFloatObject;

typedef struct _PyLongValue {//int为变长类型
uintptr_t lv_tag; /* Number of digits, sign and flags */
digit ob_digit[1];
} _PyLongValue;
struct _longobject {
PyObject_HEAD
_PyLongValue long_value;
};

3. Object 的基本结构 - Think In Python

cpython/Include/cpython/longintrepr.h at main · python/cpython (github.com)

根据如上去查找PyObject_HEAD,发现:

1
#define PyObject_HEAD                   PyObject ob_base;
1
2
3
4
5
typedef struct _object {
_PyObject_HEAD_EXTRA
Py_ssize_t ob_refcnt;
struct _typeobject *ob_type;
} PyObject;

其中_PyObject_HEAD_EXTRA被定义为NULL,Py_ssize_t为int64,struct _typeobject *是指针也为64
所以这里有16字节

1
2
3
4
typedef struct _PyLongValue {
uintptr_t lv_tag; /* Number of digits, sign and flags */
digit ob_digit[1];
} _PyLongValue;

lv_tag保存了

1
2
3
低2位:符号信息:0:正数,1:0,2:-1
第三低位:保留
之前的:保存长整数中数字的个数,ndigits = lv_tag >> 3(64位中为(1<<61)-1

ob_digit为数组,存放无符号整型,会放30位内容进去,作为值

Python 3 的 int 类型详解(为什么 int 不存在溢出问题?) - 长安223 - 博客园 (cnblogs.com)

4. Long Object 初探 - Think In Python

那么我们可以直接使用以下转换函数来转换

1
2
3
4
5
6
7
8
9
10
11
12
function getNumber(Nptr){
let ps = Process.pointerSize
let shift = ps == 8? 30:15;
let length = Nptr.add(0x10).readPointer().toInt32();
let number = ptr(0);

for(let i = 0;i<length;i++){
let this_block = ptr(Nptr.add(0x18+i*(ps/2)).readU32())
number = number.or(this_block.shl(shift*i));
}
return number;
}

根据测试,这个可以hook出正确的值,但是乘法不对,似乎和底层实现有关,QAQ有没有佬教教我

hook出的每轮的值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
[*] Function PyNumber_And called with arguments: ['0x9e3779ce', '0xffffffff']
[*] Function PyNumber_And returned: 0x9e3779ce
[*] Function PyNumber_Xor called with arguments: ['0x13c6ef39', '0x10']
[*] Function PyNumber_Xor returned: 0x13c6ef29
[*] Function PyNumber_Xor called with arguments: ['0x0', '0x278dde738']
[*] Function PyNumber_Xor returned: 0x278dde738
[*] Function PyNumber_Add called with arguments: ['0x13c6ef29', '0x278dde738']
[*] Function PyNumber_Add returned: 0x28ca4d661
[*] Function PyNumber_Xor called with arguments: ['0x2', '0x9e3779b9']
[*] Function PyNumber_Xor returned: 0x9e3779bb
[*] Function PyNumber_Xor called with arguments: ['0x0', '0x3']
[*] Function PyNumber_Xor returned: 0x3
[*] Function PyNumber_Xor called with arguments: ['0x31', '0x9e3779ce']
[*] Function PyNumber_Xor returned: 0x9e3779ff
[*] Function PyNumber_Add called with arguments: ['0x9e3779bb', '0x9e3779ff']
[*] Function PyNumber_Add returned: 0x13c6ef3ba
[*] Function PyNumber_Xor called with arguments: ['0x28ca4d661', '0x13c6ef3ba']
[*] Function PyNumber_Xor returned: 0x3b0ca25db
[*] Function PyNumber_Add called with arguments: ['0x5', '0x3b0ca25db']
[*] Function PyNumber_Add returned: 0x3b0ca25e0
[*] Function PyNumber_And called with arguments: ['0x3b0ca25e0', '0xffffffff']

感觉还是很对的。