<?php
/**
 * x64disasm.php v0.0.0.1
 *
 * @description x64 disassembler
 * @author secator
 * @link https://secator.com/
 * @date 20.12.2024
 */

set_time_limit(0);

require 
'instructions.php';

// высчитываем размер ModR/M и SIB
function modrm_offset($ModRM$data$sib) {
    
//+---+---+---+---+---+---+---+---+
    //|  mod  |    reg    |     rm    |
    //+---+---+---+---+---+---+---+---+
    
$offset 0;
    if (
$ModRM[0] == '11') { // r/m

    
} elseif ($ModRM[0] == '00' && $ModRM[2] == '101') { // 4 bytes, disp32
        
$offset += 4;
    } elseif (
$ModRM[2] == '100') { // sib
        
if (!isset($data[$sib])) {
            return 
null;
        }
        
$sib ord($data[$sib]);
        
$offset++;
        if (
$ModRM[0] == '00') { // X byte
            
$sib sprintf('%08b'$sib);
            
$sib sscanf($sib'%2c%3c%3c'); // scale, index, base
            
if ($sib[2] == '101') { // 4 byte, disp32
                
$offset += 4;
            }
        } elseif (
$ModRM[0] == '01') { // 1 byte, disp8
            
$offset += 1;
        } elseif (
$ModRM[0] == '10') { // 4 bytes, disp32
            
$offset += 4;
        }
    } elseif (
$ModRM[0] == '01') { // 1 byte, disp8
        
$offset += 1;
    } elseif (
$ModRM[0] == '10') { // 4 bytes, disp32
        
$offset += 4;
    } else {

    }

    return 
$offset;
}

function 
x64disasm($data) {
    global 
$instructions;

    
// префиксы наследия, которые нам не интересны
    
$null = array('26''2E''36''3E''64''65');  // es:, cs:, ss:, ds:, fs:, gs:
    // префиксы которые интересны, и которые влияют на размер инструкции
    
$legacy_prefixes = array('66''67''F2''F3');
    
// размер в байтах
    
$codes_size = array(
        
'cb' => 1,
        
'cw' => 2,
        
'cd' => 4,
        
'cp' => 6,
        
'co' => 8,
        
'ct' => 10,

        
'ib' => 1,
        
'iw' => 2,
        
'id' => 4,
        
'io' => 8,

        
'mo' => 8,
    );

    
// здесь будет список дизассемблированных инструкций
    
$return = array();
    
// текущая позиция
    
$offset 0;
    
// сколько всего байт
    
$all strlen($data);

    do {
        
$start $offset;

        if (
$offset >= $all) {
            break;
        }

        
// битность по умолчанию
        
$bits = array(
            
'xxx',
            
'x32',
            
'x64',
            
'x16',
        );

        
$bits_override false// 16 bit
        
$size_override false;

        
$prefix '';
        
$rex null;
        
$opcode null;

        
$first sprintf('%02X'ord($data[$offset]));
        
$offset++;

        
// проверяем префиксы, их по порядку может быть больше одного
        
while (in_array($first$legacy_prefixes)) {
            if (
$first == '66') {
                
$bits_override true;
            }
            if (
$first != '67') {
                
$prefix $first;
            } else {
                
$size_override true;
            }
            if (!isset(
$data[$offset])) {
                break;
            }
            
$first sprintf('%02X'ord($data[$offset]));
            
$offset++;
        }

        
// POP или XOP?
        
if ($first === '8F') {
            if (!isset(
$data[$offset])) {
                break;
            }
            
$p0 sprintf('%08b'ord($data[$offset]));
            
preg_match('|(?<R>[01]{1})(?<X>[01]{1})(?<B>[01]{1})(?<mmmmm>[01]{5})|'$p0$p0);

            if (
$p0['mmmmm'] >= '1000') {
                
$first 'XOP';
            }
        }

        
// ненужный префикс?
        
if (in_array($first$null)) {
            continue;
        }
        
// LOCK, такой же ненужный
        
elseif ($first == 'F0') {
            continue;
        }
        
// XOP
        
elseif ($first === 'XOP') {
            
$offset++;

            
$xop = array(
                
'R' => '',
                
'X' => '',
                
'W' => '',
                
'B' => '',
                
'mmm' => '',
                
'vvvv' => '',
                
'vector' => '',
                
'prefix' => '',
                
'map' => '',
            );

            
// MAP
            
switch ($p0['mmmmm']) {
                case 
'01000': {
                    
$xop['map'] = 'X8';
                    break;
                }
                case 
'01001': {
                    
$xop['map'] = 'X9';
                    break;
                }
                case 
'01010': {
                    
$xop['map'] = 'XA';
                    break;
                }
            }

            if (!isset(
$data[$offset])) {
                break;
            }

            
$p1 sprintf('%08b'ord($data[$offset]));
            
$offset++;

            
preg_match('|(?<W>[01]{1})(?<vvvv>[01]{4})(?<L>[01]{1})(?<pp>[01]{2})|'$p1$p1);
            
$xop['W'] = $p1['W'];

            switch (
$p1['L']) {
                case 
'0': {
                    
$xop['vector'] = 128;
                    break;
                }
                case 
'1': {
                    
$xop['vector'] = 256;
                    break;
                }
            }

            switch (
$p1['pp']) {
                case 
'01': {
                    
$xop['prefix'] = '66';
                    break;
                }
                case 
'10': {
                    
$xop['prefix'] = 'F3';
                    break;
                }
                case 
'11': {
                    
$xop['prefix'] = 'F2';
                    break;
                }
            }

            if (!isset(
$data[$offset])) {
                break;
            }

            
$opcode = array(sprintf('%02X'ord($data[$offset])));
            
$offset++;

            if (!empty(
$xop['prefix'])) {
                
$opcode[] = $xop['prefix'];
            }
            if (!empty(
$xop['map'])) {
                
$opcode[] = $xop['map'];
            }

            
$opcode implode('.'$opcode);

            if (empty(
$instructions['XOP'][$opcode])) {
                
$return[$start] = 'invalid';
                continue;
            }

            
$current $instructions['XOP'][$opcode];
            if (!empty(
$current['W' $xop['W']])) {
                
$current $current['W' $xop['W']];
            }
            else {
                
$return[$start] = 'invalid';
                continue;
            }

            if (
$xop['vector'] && !empty($current[$xop['vector']])) {
                
$current $current[$xop['vector']];
            } elseif (!empty(
$current['L' $p1['L']])) {
                
$current $current['L' $p1['L']];
            } else {
                
$return[$start] = 'invalid';
                continue;
            }
        }
        
// EVEX/MVEX
        
elseif ($first === '62') {

            
$evex = array(
                
'R' => '',
                
'X' => '',
                
'W' => '',
                
'B' => '',
                
'mmm' => '',
                
'vvvv' => '',
                
'vector' => '',
                
'prefix' => '',
                
'map' => '',
            );

            if (!isset(
$data[$offset])) {
                break;
            }

            
$p0 sprintf('%08b'ord($data[$offset]));
            
$offset++;

            
preg_match('|(?<R>[01]{1})(?<X>[01]{1})(?<B>[01]{1})(?<R1>[01]{1})0(?<mmm>[01]{3})|'$p0$p0);

            
$evex['R'] = $p0['R'];
            
$evex['X'] = $p0['X'];
            
$evex['B'] = $p0['B'];

            switch (
$p0['mmm']) {
                case 
'001': {
                    
$evex['map'] = '0F';
                    break;
                }
                case 
'010': {
                    
$evex['map'] = '0F38';
                    break;
                }
                case 
'011': {
                    
$evex['map'] = '0F3A';
                    break;
                }
                case 
'101': {
                    
$evex['map'] = 'MAP5';
                    break;
                }
                case 
'110': {
                    
$evex['map'] = 'MAP6';
                    break;
                }
            }

            if (!isset(
$data[$offset])) {
                break;
            }

            
$p1 sprintf('%08b'ord($data[$offset]));
            
$offset++;

            
preg_match('|(?<W>[01]{1})(?<vvvv>[01]{4})(?<m>[01]{1})(?<pp>[01]{2})|'$p1$p1);

            
$evex['W'] = $p1['W'];
            switch (
$p1['pp']) {
                case 
'01': {
                    
$evex['prefix'] = '66';
                    break;
                }
                case 
'10': {
                    
$evex['prefix'] = 'F3';
                    break;
                }
                case 
'11': {
                    
$evex['prefix'] = 'F2';
                    break;
                }
            }

            if (!isset(
$data[$offset])) {
                break;
            }

            
$p2 sprintf('%08b'ord($data[$offset]));
            
$offset++;

            
preg_match('|(?<z>[01]{1})(?<LL>[01]{2})(?<b>[01]{1})(?<V1>[01]{1})(?<aaa>[01]{3})|'$p2$p2);

            switch (
$p2['LL']) {
                case 
'00': {
                    
$evex['vector'] = 128;
                    break;
                }
                case 
'01': {
                    
$evex['vector'] = 256;
                    break;
                }
                case 
'10': {
                    
$evex['vector'] = 512;
                    break;
                }
                case 
'11': { // invalid! p2[b] = 1 ??
                    
$evex['vector'] = 512;
                    break;
                }
            }

            if (!isset(
$data[$offset])) {
                break;
            }

            
$opcode = array(sprintf('%02X'ord($data[$offset])));
            
$offset++;

            if (!empty(
$evex['prefix'])) {
                
$opcode[] = $evex['prefix'];
            }
            if (!empty(
$evex['map'])) {
                
$opcode[] = $evex['map'];
            }

            
$opcode implode('.'$opcode);

            
$vx = empty($p1['m']) ? 'MVEX' 'EVEX';

            if (empty(
$instructions[$vx][$opcode])) {
                
$return[$start] = 'invalid';
                continue;
            }

            
$current $instructions[$vx][$opcode];

            if (!empty(
$current['WIG'])) {
                
$current $current['WIG'];
            }
            elseif (
$evex['W'] === '0' || $evex['W'] === '1') {
                
$current $current['W' $evex['W']];
            }
            else {
                
$return[$start] = 'invalid';
                continue;
            }

            if (!empty(
$current[$evex['vector']])) {
                
$current $current[$evex['vector']];
            }
            elseif (!empty(
$current['LIG'])) {
                
$current $current['LIG'];
            } elseif (!empty(
$current['512'])) {
                
$current $current['512'];
            } else {
                
$return[$start] = 'invalid';
                continue;
            }


            if (empty(
$current)) {
                
$return[$start] = 'invalid';
                continue;
            }
        }
        
// VEX - 2 или 3 байта
        
elseif ($first === 'C5' || $first === 'C4') {

            
$vex = array(
                
'byte' => 3,
                
'R' => '',
                
'X' => '',
                
'W' => '',
                
'B' => '',
                
'mmmmm' => '',
                
'vvvv' => ''// register name
                
'vector' => '',
                
'prefix' => '',
                
'map' => '',
            );

            if (!isset(
$data[$offset])) {
                break;
            }

            if (
$first === 'C4') {
                
$p1 sprintf('%08b'ord($data[$offset]));
                
$offset++;

                
preg_match('|(?<R>[01]{1})(?<X>[01]{1})(?<B>[01]{1})(?<mmmmm>[01]{5})|'$p1$p1);

                
$vex['R'] = $p1['R'];
                
$vex['X'] = $p1['X'];
                
$vex['B'] = $p1['B'];

                switch (
$p1['mmmmm']) {
                    case 
'00001': {
                        
$vex['map'] = '0F';
                        break;
                    }
                    case 
'00010': {
                        
$vex['map'] = '0F38';
                        break;
                    }
                    case 
'00011': {
                        
$vex['map'] = '0F3A';
                        break;
                    }
                }

                if (!isset(
$data[$offset])) {
                    break;
                }

                
$p0 sprintf('%08b'ord($data[$offset]));
                
$offset++;

                
preg_match('|(?<W>[01]{1})(?<vvvv>[01]{4})(?<L>[01]{1})(?<pp>[01]{2})|'$p0$p0);

                
$vex['W'] = $p0['W'];
            } else {
                
$vex['byte'] = 2;

                
$p0 sprintf('%08b'ord($data[$offset]));
                
$offset++;

                
preg_match('|(?<R>[01]{1})(?<vvvv>[01]{4})(?<L>[01]{1})(?<pp>[01]{2})|'$p0$p0);
                
$vex['R'] = $p0['R'];

                
$vex['map'] = '0F';
            }

            switch (
$p0['pp']) {
                case 
'01': {
                    
$vex['prefix'] = '66';
                    break;
                }
                case 
'10': {
                    
$vex['prefix'] = 'F3';
                    break;
                }
                case 
'11': {
                    
$vex['prefix'] = 'F2';
                    break;
                }
            }

            if (!isset(
$data[$offset])) {
                break;
            }

            
$opcode = array(sprintf('%02X'ord($data[$offset])));
            
$offset++;

            if (!empty(
$vex['prefix'])) {
                
$opcode[] = $vex['prefix'];
            }
            if (!empty(
$vex['map'])) {
                
$opcode[] = $vex['map'];
            }

            
$opcode implode('.'$opcode);

            if (empty(
$instructions['VEX'][$opcode])) {
                
$return[$start] = 'invalid';
                continue;
            }

            
$current $instructions['VEX'][$opcode];

            if (!empty(
$current['WIG'])) {
                
$current $current['WIG'];
            }
            elseif (
$vex['W'] === '0' || $vex['W'] === '1') {
                if (empty(
$current['W' $vex['W']])) {
                    
print_r($current);
                }
                
$current $current['W' $vex['W']];
            }
            elseif (!empty(
$current['W0'])) {
                
$current $current['W0'];
            }
            elseif (!empty(
$current['W1'])) {
                
$current $current['W1'];
            }
            else {
                
$return[$start] = 'invalid';
                continue;
            }

            if (
$p0['L'] == 0) {
                if (!empty(
$current['128'])) {
                    
$current $current['128'];
                }
                elseif (!empty(
$current['L0'])) {
                    
$current $current['L0'];
                }
                elseif (!empty(
$current['LZ'])) {
                    
$current $current['LZ'];
                } elseif (!empty(
$current['LIG'])) {
                    
$current $current['LIG'];
                } else {
                    
$return[$start] = 'invalid';
                    continue;
                }
            } else {
                if (!empty(
$current['256'])) {
                    
$current $current['256'];
                }
                elseif (!empty(
$current['L1'])) {
                    
$current $current['L1'];
                } elseif (!empty(
$current['LIG'])) {
                    
$current $current['LIG'];
                } else {
                    
$return[$start] = 'invalid';
                    continue;
                }
            }

            if (empty(
$current)) {
                
$return[$start] = 'invalid';
                continue;
            }

        } else {

            
$opcode $first;

            
// REX, может быть больше одного
            
while ($opcode >= '40' && $opcode <= '4F') {
                
$rex sprintf('%04b'hexdec($opcode) % 0x40);
                
preg_match('|(?<W>[01])(?<R>[01])(?<X>[01])(?<B>[01])|'$rex$rex);

                if (!isset(
$data[$offset])) {
                    break;
                }
                
$opcode sprintf('%02X'ord($data[$offset]));
                
$offset++;
            }

            
$op2 '';
            if (isset(
$data[$offset])) {
                
$op2 $opcode '.' sprintf('%02X'ord($data[$offset]));
            }

            
// 2 байта
            
if (isset($instructions[$op2])) {
                
$current $instructions[$op2];
                
$offset += 1;
            }
            
// 1 байт
            
elseif (isset($instructions[$opcode])) {
                
$current $instructions[$opcode];
            } else {
                
$return[$start] = 'invalid';
                continue;
            }

            
// меняем приоритет битности в зависимости от REX
            
if ($bits_override && empty($rex['W'])) {
                
$bits = array(
                    
'x16',
                    
'x64',
                    
'xxx',
                );
            } elseif (!empty(
$rex['W'])) {
                
$bits = array(
                    
'x64',
                    
'xxx',
                    
'x32',
                );
            }
        }


        
$found = array();
        foreach (
$current as $codes => $instruction) {

            
// находим все варианты для разной битности
            
preg_match('/x(16|32|64|xx)/'$codes$bit);
            
$bit $bit[0];

            if (isset(
$found[$bit])) {
                continue;
            }

            
$ModRM null;
            
$offset_possible $offset;

            if (
strpos($codes'^') !== false) {
                if (empty(
$prefix)) {
                    continue;
                }
                if (
strpos($codes'^' $prefix) === false) {
                    continue;
                }
            }

            if (
strpos($codes'|') !== false) {

                if (!isset(
$data[$offset_possible])) {
                    continue;
                }

                
$next sprintf('%02X'ord($data[$offset_possible]));

                if (
strpos($codes'|' $next) === false) {
                    continue;
                }

                
$offset_possible++;
            }

            
// /r, /[0-7], /vsib, 11:rrr:bbb - ModRM
            
if (strpos($codes'/') !== false || strpos($codes':') !== false) {

                if (empty(
$ModRM) && isset($data[$offset_possible])) {
                    
$ModRM sscanf(sprintf('%08b'ord($data[$offset_possible])), '%2c%3c%3c');
                    
$offset_possible++;
                }

                if (empty(
$ModRM)) {
                    continue;
                }

                if (
preg_match("/(?<set>~)?(?<not>!)?\(?(?<m>[m01]{2})\)?:(?<r>[r01]{3}):(?<b>[b01]{3})/"$codes$amx)) {
                    
// ~11:rrr:bbb
                    
if (!empty($amx['set'])) {
                        
// устанавливаем ModR/M.mod
                        
$ModRM[0] = $amx['m'];
                    }
                    elseif (!empty(
$amx['not'])) {
                        if (
$ModRM[0] == $amx['m']) {
                            continue;
                        }
                    } else {
                        if (
$ModRM[0] != $amx['m']) {
                            continue;
                        }
                    }

                    if (
$amx['r'] != 'rrr') {
                        if (
$ModRM[1] != $amx['r']) {
                            continue;
                        }
                    }

                    if (
$amx['b'] != 'bbb') {
                        if (
$ModRM[2] != $amx['b']) {
                            continue;
                        }
                    }

                } elseif (
strpos($codes'/r') !== false) {

                } elseif (
preg_match('|/([0-7])|'$codes$rm)) {
                    
$rm sprintf('%03b'$rm[1]);

                    if (
$ModRM[1] != $rm) {
                        continue;
                    }

                } elseif (
strpos($codes'/vsib') !== false) {

                }

                
$end modrm_offset($ModRM$data$offset_possible);

                if (
is_null($end)) {
                    break;
                }
                
$offset_possible += $end;
            }

            if (
preg_match('|\$([0-9A-F]{2})|'$codes$end)) {
                if (!isset(
$data[$offset_possible]) || sprintf('%02X'ord($data[$offset_possible])) != $end[1]) {
                    continue;
                }
                
$offset_possible += 1;
            }

            
preg_match_all('/(^|\s)(' implode('|'array_keys($codes_size)) . ')/'$codes$bytes);

            if (
strpos($codes'/is') !== false) { // 4 | 5
                
$offset_possible += 1;
            }

            
$found[$bit] = array(
                
'instruction' => $instruction,
                
'offset_possible' => $offset_possible,
                
'codes' => $codes,
                
'ModRM' => $ModRM,
                
'bytes' => (!empty($bytes[2])) ? $bytes[2] : '',
            );

            if (
$bit == 'xxx') {
                break;
            }
        }


        if (empty(
$found)) {
            
$return[$start] = 'invalid';
            continue;
        }

        
// по приоритету выбираем
        
foreach ($bits as $bit) {
            if (isset(
$found[$bit])) {
                
$found $found[$bit];
                break;
            }
        }

        
$offset $found['offset_possible'];

        if (!empty(
$found['bytes'])) {
            if (
is_string($found['bytes'])) {
                
$found['bytes'] = array($found['bytes']);
            }
            foreach (
$found['bytes'] as $byte) {
                if (
$byte === 'mo' && $size_override) {
                    
$byte $codes_size[$byte];
                    
$byte /= 2;
                } else {
                    
$byte $codes_size[$byte];
                }

                
$offset += $byte;
            }
        }

        if (!isset(
$data[$offset-1]) || empty($found['instruction'])) {
            
$return[$start] = 'invalid';
        } else {
            
$return[$start] = $found['instruction'];
        }

    } while (
true);

    return 
$return;
}