2020-05-09 13:09:40 +01:00
/*
* Copyright ( c ) 2020 , Andreas Kling < kling @ serenityos . org >
*
2021-04-22 01:24:48 -07:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-05-09 13:09:40 +01:00
*/
2021-01-17 20:28:43 +01:00
# include <AK/Debug.h>
2022-03-27 19:04:48 +02:00
# include <AK/Demangle.h>
2021-05-19 14:35:34 +02:00
# include <AK/OwnPtr.h>
2020-08-09 21:55:32 -04:00
# include <AK/QuickSort.h>
# include <AK/Vector.h>
2020-08-05 22:35:35 +02:00
# include <LibCore/ArgsParser.h>
2021-11-23 11:32:25 +01:00
# include <LibCore/MappedFile.h>
2022-01-16 14:24:21 +01:00
# include <LibCore/System.h>
2020-12-25 02:14:56 +01:00
# include <LibELF/Image.h>
2022-01-13 20:54:35 +01:00
# include <LibMain/Main.h>
2020-04-11 13:16:17 +02:00
# include <LibX86/Disassembler.h>
2020-08-16 12:39:06 -04:00
# include <LibX86/ELFSymbolProvider.h>
2020-08-08 22:08:13 -04:00
# include <string.h>
2020-04-11 13:16:17 +02:00
2022-01-13 20:54:35 +01:00
ErrorOr < int > serenity_main ( Main : : Arguments args )
2020-04-11 13:16:17 +02:00
{
2022-07-11 20:42:03 +00:00
StringView path { } ;
2020-08-05 22:35:35 +02:00
Core : : ArgsParser args_parser ;
2020-12-05 16:22:58 +01:00
args_parser . set_general_help (
" Disassemble an executable, and show human-readable "
" assembly code for each function. " ) ;
2020-08-05 22:35:35 +02:00
args_parser . add_positional_argument ( path , " Path to i386 binary file " , " path " ) ;
2022-01-13 20:54:35 +01:00
args_parser . parse ( args ) ;
2020-04-11 13:16:17 +02:00
2023-09-26 00:54:34 +02:00
OwnPtr < Core : : MappedFile const > file ;
2022-01-16 14:24:21 +01:00
u8 const * asm_data = nullptr ;
size_t asm_size = 0 ;
if ( ( TRY ( Core : : System : : stat ( path ) ) ) . st_size > 0 ) {
file = TRY ( Core : : MappedFile : : map ( path ) ) ;
asm_data = static_cast < u8 const * > ( file - > data ( ) ) ;
asm_size = file - > size ( ) ;
}
2021-01-10 15:55:54 +01:00
2020-08-09 21:55:32 -04:00
struct Symbol {
size_t value ;
size_t size ;
StringView name ;
size_t address ( ) const { return value ; }
size_t address_end ( ) const { return value + size ; }
bool contains ( size_t virtual_address ) { return address ( ) < = virtual_address & & virtual_address < address_end ( ) ; }
} ;
Vector < Symbol > symbols ;
2020-08-08 22:08:13 -04:00
size_t file_offset = 0 ;
2020-08-09 21:55:32 -04:00
Vector < Symbol > : : Iterator current_symbol = symbols . begin ( ) ;
2020-08-16 12:39:06 -04:00
OwnPtr < X86 : : ELFSymbolProvider > symbol_provider ; // nullptr for non-ELF disassembly.
2020-12-25 02:14:56 +01:00
OwnPtr < ELF : : Image > elf ;
2022-01-16 14:16:09 +01:00
if ( asm_size > = 4 & & strncmp ( reinterpret_cast < char const * > ( asm_data ) , " \u007f ELF " , 4 ) = = 0 ) {
2020-12-25 02:14:56 +01:00
elf = make < ELF : : Image > ( asm_data , asm_size ) ;
if ( elf - > is_valid ( ) ) {
2020-08-17 11:30:00 +02:00
symbol_provider = make < X86 : : ELFSymbolProvider > ( * elf ) ;
2022-01-16 14:16:09 +01:00
elf - > for_each_section_of_type ( SHT_PROGBITS , [ & ] ( ELF : : Image : : Section const & section ) {
2020-08-08 22:08:13 -04:00
// FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
if ( section . name ( ) ! = " .text " )
return IterationDecision : : Continue ;
2022-01-16 14:16:09 +01:00
asm_data = reinterpret_cast < u8 const * > ( section . raw_data ( ) ) ;
2020-08-08 22:08:13 -04:00
asm_size = section . size ( ) ;
file_offset = section . address ( ) ;
return IterationDecision : : Break ;
} ) ;
2020-12-25 02:14:56 +01:00
symbols . ensure_capacity ( elf - > symbol_count ( ) + 1 ) ;
2020-08-09 21:55:32 -04:00
symbols . append ( { 0 , 0 , StringView ( ) } ) ; // Sentinel.
2022-01-16 14:16:09 +01:00
elf - > for_each_symbol ( [ & ] ( ELF : : Image : : Symbol const & symbol ) {
2020-08-09 21:55:32 -04:00
symbols . append ( { symbol . value ( ) , symbol . size ( ) , symbol . name ( ) } ) ;
return IterationDecision : : Continue ;
} ) ;
quick_sort ( symbols , [ ] ( auto & a , auto & b ) {
if ( a . value ! = b . value )
return a . value < b . value ;
if ( a . size ! = b . size )
return a . size < b . size ;
return a . name < b . name ;
} ) ;
2021-01-23 23:59:27 +01:00
if constexpr ( DISASM_DUMP_DEBUG ) {
2021-01-17 20:28:43 +01:00
for ( size_t i = 0 ; i < symbols . size ( ) ; + + i )
dbgln ( " {}: {:p}, {} " , symbols [ i ] . name , symbols [ i ] . value , symbols [ i ] . size ) ;
}
2020-08-08 22:08:13 -04:00
}
}
X86 : : SimpleInstructionStream stream ( asm_data , asm_size ) ;
2020-04-11 13:16:17 +02:00
X86 : : Disassembler disassembler ( stream ) ;
2020-08-09 21:55:32 -04:00
bool is_first_symbol = true ;
bool current_instruction_is_in_symbol = false ;
2020-04-11 13:16:17 +02:00
for ( ; ; ) {
auto offset = stream . offset ( ) ;
auto insn = disassembler . next ( ) ;
if ( ! insn . has_value ( ) )
break ;
2020-08-09 21:55:32 -04:00
// Prefix regions of instructions belonging to a symbol with the symbol's name.
// Separate regions of instructions belonging to distinct symbols with newlines,
// and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
// Interesting cases:
// - More than 1 symbol covering a region of instructions (ICF, D1/D2)
// - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
// Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
size_t virtual_offset = file_offset + offset ;
if ( current_symbol < symbols . end ( ) & & ! current_symbol - > contains ( virtual_offset ) ) {
if ( ! is_first_symbol & & current_instruction_is_in_symbol ) {
// The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
2020-10-23 18:37:35 +02:00
outln ( ) ;
2020-08-09 21:55:32 -04:00
current_instruction_is_in_symbol = ( current_symbol + 1 < symbols . end ( ) & & ( current_symbol + 1 ) - > contains ( virtual_offset ) ) ;
}
// Try to find symbol covering current instruction, if one exists.
while ( current_symbol + 1 < symbols . end ( ) & & ! ( current_symbol + 1 ) - > contains ( virtual_offset ) & & ( current_symbol + 1 ) - > address ( ) < = virtual_offset ) {
+ + current_symbol ;
if ( ! is_first_symbol )
2022-03-27 19:04:48 +02:00
outln ( " \n ({} ({:p}-{:p})) \n " , demangle ( current_symbol - > name ) , current_symbol - > address ( ) , current_symbol - > address_end ( ) ) ;
2020-08-09 21:55:32 -04:00
}
while ( current_symbol + 1 < symbols . end ( ) & & ( current_symbol + 1 ) - > contains ( virtual_offset ) ) {
if ( ! is_first_symbol & & ! current_instruction_is_in_symbol )
2020-10-23 18:37:35 +02:00
outln ( ) ;
2020-08-09 21:55:32 -04:00
+ + current_symbol ;
current_instruction_is_in_symbol = true ;
2022-03-27 19:04:48 +02:00
outln ( " {} ({:p}-{:p}): " , demangle ( current_symbol - > name ) , current_symbol - > address ( ) , current_symbol - > address_end ( ) ) ;
2020-08-09 21:55:32 -04:00
}
is_first_symbol = false ;
}
2022-04-07 14:48:22 +02:00
size_t length = insn . value ( ) . length ( ) ;
StringBuilder builder ;
2022-03-25 22:27:31 +01:00
builder . appendff ( " {:p} " , virtual_offset ) ;
2022-04-07 14:48:22 +02:00
for ( size_t i = 0 ; i < 7 ; i + + ) {
if ( i < length )
builder . appendff ( " {:02x} " , asm_data [ offset + i ] ) ;
else
builder . append ( " " sv ) ;
}
2022-03-25 22:27:31 +01:00
builder . append ( " " sv ) ;
2022-12-06 01:12:49 +00:00
builder . append ( insn . value ( ) . to_deprecated_string ( virtual_offset , symbol_provider ) ) ;
2022-04-07 14:48:22 +02:00
outln ( " {} " , builder . string_view ( ) ) ;
for ( size_t bytes_printed = 7 ; bytes_printed < length ; bytes_printed + = 7 ) {
builder . clear ( ) ;
builder . appendff ( " {:p} " , virtual_offset + bytes_printed ) ;
for ( size_t i = bytes_printed ; i < bytes_printed + 7 & & i < length ; i + + )
builder . appendff ( " {:02x} " , asm_data [ offset + i ] ) ;
outln ( " {} " , builder . string_view ( ) ) ;
}
2020-04-11 13:16:17 +02:00
}
2022-01-13 20:54:35 +01:00
return 0 ;
2020-04-11 13:16:17 +02:00
}