1, 'cw' => 2, 'cd' => 4, 'cp' => 6, 'co' => 8, 'ct' => 10, 'ib' => 1, 'iw' => 2, 'id' => 4, 'io' => 8, 'mo' => 8, ); // здесь будет список дизассемблированных инструкций $return = array(); // текущая позиция $offset = 0; // сколько всего байт $all = strlen($data); do { $start = $offset; if ($offset >= $all) { break; } // битность по умолчанию $bits = array( 'xxx', 'x32', 'x64', 'x16', ); $bits_override = false; // 16 bit $size_override = false; $prefix = ''; $rex = null; $opcode = null; $first = sprintf('%02X', ord($data[$offset])); $offset++; // проверяем префиксы, их по порядку может быть больше одного while (in_array($first, $legacy_prefixes)) { if ($first == '66') { $bits_override = true; } if ($first != '67') { $prefix = $first; } else { $size_override = true; } if (!isset($data[$offset])) { break; } $first = sprintf('%02X', ord($data[$offset])); $offset++; } // POP или XOP? if ($first === '8F') { if (!isset($data[$offset])) { break; } $p0 = sprintf('%08b', ord($data[$offset])); preg_match('|(?[01]{1})(?[01]{1})(?[01]{1})(?[01]{5})|', $p0, $p0); if ($p0['mmmmm'] >= '1000') { $first = 'XOP'; } } // ненужный префикс? if (in_array($first, $null)) { continue; } // LOCK, такой же ненужный elseif ($first == 'F0') { continue; } // XOP elseif ($first === 'XOP') { $offset++; $xop = array( 'R' => '', 'X' => '', 'W' => '', 'B' => '', 'mmm' => '', 'vvvv' => '', 'vector' => '', 'prefix' => '', 'map' => '', ); // MAP switch ($p0['mmmmm']) { case '01000': { $xop['map'] = 'X8'; break; } case '01001': { $xop['map'] = 'X9'; break; } case '01010': { $xop['map'] = 'XA'; break; } } if (!isset($data[$offset])) { break; } $p1 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{4})(?[01]{1})(?[01]{2})|', $p1, $p1); $xop['W'] = $p1['W']; switch ($p1['L']) { case '0': { $xop['vector'] = 128; break; } case '1': { $xop['vector'] = 256; break; } } switch ($p1['pp']) { case '01': { $xop['prefix'] = '66'; break; } case '10': { $xop['prefix'] = 'F3'; break; } case '11': { $xop['prefix'] = 'F2'; break; } } if (!isset($data[$offset])) { break; } $opcode = array(sprintf('%02X', ord($data[$offset]))); $offset++; if (!empty($xop['prefix'])) { $opcode[] = $xop['prefix']; } if (!empty($xop['map'])) { $opcode[] = $xop['map']; } $opcode = implode('.', $opcode); if (empty($instructions['XOP'][$opcode])) { $return[$start] = 'invalid'; continue; } $current = $instructions['XOP'][$opcode]; if (!empty($current['W' . $xop['W']])) { $current = $current['W' . $xop['W']]; } else { $return[$start] = 'invalid'; continue; } if ($xop['vector'] && !empty($current[$xop['vector']])) { $current = $current[$xop['vector']]; } elseif (!empty($current['L' . $p1['L']])) { $current = $current['L' . $p1['L']]; } else { $return[$start] = 'invalid'; continue; } } // EVEX/MVEX elseif ($first === '62') { $evex = array( 'R' => '', 'X' => '', 'W' => '', 'B' => '', 'mmm' => '', 'vvvv' => '', 'vector' => '', 'prefix' => '', 'map' => '', ); if (!isset($data[$offset])) { break; } $p0 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{1})(?[01]{1})(?[01]{1})0(?[01]{3})|', $p0, $p0); $evex['R'] = $p0['R']; $evex['X'] = $p0['X']; $evex['B'] = $p0['B']; switch ($p0['mmm']) { case '001': { $evex['map'] = '0F'; break; } case '010': { $evex['map'] = '0F38'; break; } case '011': { $evex['map'] = '0F3A'; break; } case '101': { $evex['map'] = 'MAP5'; break; } case '110': { $evex['map'] = 'MAP6'; break; } } if (!isset($data[$offset])) { break; } $p1 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{4})(?[01]{1})(?[01]{2})|', $p1, $p1); $evex['W'] = $p1['W']; switch ($p1['pp']) { case '01': { $evex['prefix'] = '66'; break; } case '10': { $evex['prefix'] = 'F3'; break; } case '11': { $evex['prefix'] = 'F2'; break; } } if (!isset($data[$offset])) { break; } $p2 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{2})(?[01]{1})(?[01]{1})(?[01]{3})|', $p2, $p2); switch ($p2['LL']) { case '00': { $evex['vector'] = 128; break; } case '01': { $evex['vector'] = 256; break; } case '10': { $evex['vector'] = 512; break; } case '11': { // invalid! p2[b] = 1 ?? $evex['vector'] = 512; break; } } if (!isset($data[$offset])) { break; } $opcode = array(sprintf('%02X', ord($data[$offset]))); $offset++; if (!empty($evex['prefix'])) { $opcode[] = $evex['prefix']; } if (!empty($evex['map'])) { $opcode[] = $evex['map']; } $opcode = implode('.', $opcode); $vx = empty($p1['m']) ? 'MVEX' : 'EVEX'; if (empty($instructions[$vx][$opcode])) { $return[$start] = 'invalid'; continue; } $current = $instructions[$vx][$opcode]; if (!empty($current['WIG'])) { $current = $current['WIG']; } elseif ($evex['W'] === '0' || $evex['W'] === '1') { $current = $current['W' . $evex['W']]; } else { $return[$start] = 'invalid'; continue; } if (!empty($current[$evex['vector']])) { $current = $current[$evex['vector']]; } elseif (!empty($current['LIG'])) { $current = $current['LIG']; } elseif (!empty($current['512'])) { $current = $current['512']; } else { $return[$start] = 'invalid'; continue; } if (empty($current)) { $return[$start] = 'invalid'; continue; } } // VEX - 2 или 3 байта elseif ($first === 'C5' || $first === 'C4') { $vex = array( 'byte' => 3, 'R' => '', 'X' => '', 'W' => '', 'B' => '', 'mmmmm' => '', 'vvvv' => '', // register name 'vector' => '', 'prefix' => '', 'map' => '', ); if (!isset($data[$offset])) { break; } if ($first === 'C4') { $p1 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{1})(?[01]{1})(?[01]{5})|', $p1, $p1); $vex['R'] = $p1['R']; $vex['X'] = $p1['X']; $vex['B'] = $p1['B']; switch ($p1['mmmmm']) { case '00001': { $vex['map'] = '0F'; break; } case '00010': { $vex['map'] = '0F38'; break; } case '00011': { $vex['map'] = '0F3A'; break; } } if (!isset($data[$offset])) { break; } $p0 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{4})(?[01]{1})(?[01]{2})|', $p0, $p0); $vex['W'] = $p0['W']; } else { $vex['byte'] = 2; $p0 = sprintf('%08b', ord($data[$offset])); $offset++; preg_match('|(?[01]{1})(?[01]{4})(?[01]{1})(?[01]{2})|', $p0, $p0); $vex['R'] = $p0['R']; $vex['map'] = '0F'; } switch ($p0['pp']) { case '01': { $vex['prefix'] = '66'; break; } case '10': { $vex['prefix'] = 'F3'; break; } case '11': { $vex['prefix'] = 'F2'; break; } } if (!isset($data[$offset])) { break; } $opcode = array(sprintf('%02X', ord($data[$offset]))); $offset++; if (!empty($vex['prefix'])) { $opcode[] = $vex['prefix']; } if (!empty($vex['map'])) { $opcode[] = $vex['map']; } $opcode = implode('.', $opcode); if (empty($instructions['VEX'][$opcode])) { $return[$start] = 'invalid'; continue; } $current = $instructions['VEX'][$opcode]; if (!empty($current['WIG'])) { $current = $current['WIG']; } elseif ($vex['W'] === '0' || $vex['W'] === '1') { if (empty($current['W' . $vex['W']])) { print_r($current); } $current = $current['W' . $vex['W']]; } elseif (!empty($current['W0'])) { $current = $current['W0']; } elseif (!empty($current['W1'])) { $current = $current['W1']; } else { $return[$start] = 'invalid'; continue; } if ($p0['L'] == 0) { if (!empty($current['128'])) { $current = $current['128']; } elseif (!empty($current['L0'])) { $current = $current['L0']; } elseif (!empty($current['LZ'])) { $current = $current['LZ']; } elseif (!empty($current['LIG'])) { $current = $current['LIG']; } else { $return[$start] = 'invalid'; continue; } } else { if (!empty($current['256'])) { $current = $current['256']; } elseif (!empty($current['L1'])) { $current = $current['L1']; } elseif (!empty($current['LIG'])) { $current = $current['LIG']; } else { $return[$start] = 'invalid'; continue; } } if (empty($current)) { $return[$start] = 'invalid'; continue; } } else { $opcode = $first; // REX, может быть больше одного while ($opcode >= '40' && $opcode <= '4F') { $rex = sprintf('%04b', hexdec($opcode) % 0x40); preg_match('|(?[01])(?[01])(?[01])(?[01])|', $rex, $rex); if (!isset($data[$offset])) { break; } $opcode = sprintf('%02X', ord($data[$offset])); $offset++; } $op2 = ''; if (isset($data[$offset])) { $op2 = $opcode . '.' . sprintf('%02X', ord($data[$offset])); } // 2 байта if (isset($instructions[$op2])) { $current = $instructions[$op2]; $offset += 1; } // 1 байт elseif (isset($instructions[$opcode])) { $current = $instructions[$opcode]; } else { $return[$start] = 'invalid'; continue; } // меняем приоритет битности в зависимости от REX if ($bits_override && empty($rex['W'])) { $bits = array( 'x16', 'x64', 'xxx', ); } elseif (!empty($rex['W'])) { $bits = array( 'x64', 'xxx', 'x32', ); } } $found = array(); foreach ($current as $codes => $instruction) { // находим все варианты для разной битности preg_match('/x(16|32|64|xx)/', $codes, $bit); $bit = $bit[0]; if (isset($found[$bit])) { continue; } $ModRM = null; $offset_possible = $offset; if (strpos($codes, '^') !== false) { if (empty($prefix)) { continue; } if (strpos($codes, '^' . $prefix) === false) { continue; } } if (strpos($codes, '|') !== false) { if (!isset($data[$offset_possible])) { continue; } $next = sprintf('%02X', ord($data[$offset_possible])); if (strpos($codes, '|' . $next) === false) { continue; } $offset_possible++; } // /r, /[0-7], /vsib, 11:rrr:bbb - ModRM if (strpos($codes, '/') !== false || strpos($codes, ':') !== false) { if (empty($ModRM) && isset($data[$offset_possible])) { $ModRM = sscanf(sprintf('%08b', ord($data[$offset_possible])), '%2c%3c%3c'); $offset_possible++; } if (empty($ModRM)) { continue; } if (preg_match("/(?~)?(?!)?\(?(?[m01]{2})\)?:(?[r01]{3}):(?[b01]{3})/", $codes, $amx)) { // ~11:rrr:bbb if (!empty($amx['set'])) { // устанавливаем ModR/M.mod $ModRM[0] = $amx['m']; } elseif (!empty($amx['not'])) { if ($ModRM[0] == $amx['m']) { continue; } } else { if ($ModRM[0] != $amx['m']) { continue; } } if ($amx['r'] != 'rrr') { if ($ModRM[1] != $amx['r']) { continue; } } if ($amx['b'] != 'bbb') { if ($ModRM[2] != $amx['b']) { continue; } } } elseif (strpos($codes, '/r') !== false) { } elseif (preg_match('|/([0-7])|', $codes, $rm)) { $rm = sprintf('%03b', $rm[1]); if ($ModRM[1] != $rm) { continue; } } elseif (strpos($codes, '/vsib') !== false) { } $end = modrm_offset($ModRM, $data, $offset_possible); if (is_null($end)) { break; } $offset_possible += $end; } if (preg_match('|\$([0-9A-F]{2})|', $codes, $end)) { if (!isset($data[$offset_possible]) || sprintf('%02X', ord($data[$offset_possible])) != $end[1]) { continue; } $offset_possible += 1; } preg_match_all('/(^|\s)(' . implode('|', array_keys($codes_size)) . ')/', $codes, $bytes); if (strpos($codes, '/is') !== false) { // 4 | 5 $offset_possible += 1; } $found[$bit] = array( 'instruction' => $instruction, 'offset_possible' => $offset_possible, 'codes' => $codes, 'ModRM' => $ModRM, 'bytes' => (!empty($bytes[2])) ? $bytes[2] : '', ); if ($bit == 'xxx') { break; } } if (empty($found)) { $return[$start] = 'invalid'; continue; } // по приоритету выбираем foreach ($bits as $bit) { if (isset($found[$bit])) { $found = $found[$bit]; break; } } $offset = $found['offset_possible']; if (!empty($found['bytes'])) { if (is_string($found['bytes'])) { $found['bytes'] = array($found['bytes']); } foreach ($found['bytes'] as $byte) { if ($byte === 'mo' && $size_override) { $byte = $codes_size[$byte]; $byte /= 2; } else { $byte = $codes_size[$byte]; } $offset += $byte; } } if (!isset($data[$offset-1]) || empty($found['instruction'])) { $return[$start] = 'invalid'; } else { $return[$start] = $found['instruction']; } } while (true); return $return; }