2020-05-09 13:09:40 +01:00
/*
* Copyright ( c ) 2020 , Andreas Kling < kling @ serenityos . org >
*
2021-04-22 01:24:48 -07:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-05-09 13:09:40 +01:00
*/
2021-01-17 20:28:43 +01:00
# include <AK/Debug.h>
2022-03-27 19:04:48 +02:00
# include <AK/Demangle.h>
2023-10-17 19:00:30 +02:00
# include <AK/IterationDecision.h>
2021-05-19 14:35:34 +02:00
# include <AK/OwnPtr.h>
2020-08-09 21:55:32 -04:00
# include <AK/QuickSort.h>
2023-10-17 19:00:30 +02:00
# include <AK/String.h>
# include <AK/StringBuilder.h>
2020-08-09 21:55:32 -04:00
# include <AK/Vector.h>
2020-08-05 22:35:35 +02:00
# include <LibCore/ArgsParser.h>
2021-11-23 11:32:25 +01:00
# include <LibCore/MappedFile.h>
2022-01-16 14:24:21 +01:00
# include <LibCore/System.h>
2020-12-25 02:14:56 +01:00
# include <LibELF/Image.h>
2022-01-13 20:54:35 +01:00
# include <LibMain/Main.h>
2020-04-11 13:16:17 +02:00
# include <LibX86/Disassembler.h>
2020-08-16 12:39:06 -04:00
# include <LibX86/ELFSymbolProvider.h>
2023-10-17 19:00:30 +02:00
struct Symbol {
size_t value { 0 } ;
size_t size { 0 } ;
StringView name ;
size_t address ( ) const { return value ; }
size_t address_end ( ) const { return value + size ; }
bool contains ( size_t virtual_address ) { return ( address ( ) < = virtual_address & & virtual_address < address_end ( ) ) | | ( size = = 0 & & address ( ) = = virtual_address ) ; }
String format_symbol_address ( ) const
{
if ( size > 0 )
return MUST ( String : : formatted ( " {:p}-{:p} " , address ( ) , address_end ( ) ) ) ;
return MUST ( String : : formatted ( " {:p} " , address ( ) ) ) ;
}
} ;
2020-04-11 13:16:17 +02:00
2022-01-13 20:54:35 +01:00
ErrorOr < int > serenity_main ( Main : : Arguments args )
2020-04-11 13:16:17 +02:00
{
2022-07-11 20:42:03 +00:00
StringView path { } ;
2023-10-17 19:05:27 +02:00
StringView target_symbol ;
2020-08-05 22:35:35 +02:00
Core : : ArgsParser args_parser ;
2020-12-05 16:22:58 +01:00
args_parser . set_general_help (
" Disassemble an executable, and show human-readable "
" assembly code for each function. " ) ;
2023-10-17 19:05:27 +02:00
args_parser . add_positional_argument ( path , " Path to binary file " , " path " ) ;
args_parser . add_option ( target_symbol , " Show disassembly only for a specific symbol " , " symbol " , ' s ' , " symbol " ) ;
2022-01-13 20:54:35 +01:00
args_parser . parse ( args ) ;
2020-04-11 13:16:17 +02:00
2023-09-12 20:21:23 +02:00
OwnPtr < Core : : MappedFile > file ;
2022-01-16 14:24:21 +01:00
u8 const * asm_data = nullptr ;
size_t asm_size = 0 ;
if ( ( TRY ( Core : : System : : stat ( path ) ) ) . st_size > 0 ) {
file = TRY ( Core : : MappedFile : : map ( path ) ) ;
asm_data = static_cast < u8 const * > ( file - > data ( ) ) ;
2023-09-12 20:21:23 +02:00
asm_size = MUST ( file - > size ( ) ) ;
2022-01-16 14:24:21 +01:00
}
2021-01-10 15:55:54 +01:00
2023-10-17 19:00:30 +02:00
// Functions and similar symbols.
Vector < Symbol > ranged_symbols ;
// Jump labels, relocation targets, etc.
Vector < Symbol > zero_size_symbols ;
2020-08-09 21:55:32 -04:00
2020-08-08 22:08:13 -04:00
size_t file_offset = 0 ;
2020-08-16 12:39:06 -04:00
OwnPtr < X86 : : ELFSymbolProvider > symbol_provider ; // nullptr for non-ELF disassembly.
2020-12-25 02:14:56 +01:00
OwnPtr < ELF : : Image > elf ;
2022-01-16 14:16:09 +01:00
if ( asm_size > = 4 & & strncmp ( reinterpret_cast < char const * > ( asm_data ) , " \u007f ELF " , 4 ) = = 0 ) {
2020-12-25 02:14:56 +01:00
elf = make < ELF : : Image > ( asm_data , asm_size ) ;
if ( elf - > is_valid ( ) ) {
2020-08-17 11:30:00 +02:00
symbol_provider = make < X86 : : ELFSymbolProvider > ( * elf ) ;
2022-01-16 14:16:09 +01:00
elf - > for_each_section_of_type ( SHT_PROGBITS , [ & ] ( ELF : : Image : : Section const & section ) {
2020-08-08 22:08:13 -04:00
// FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
if ( section . name ( ) ! = " .text " )
return IterationDecision : : Continue ;
2022-01-16 14:16:09 +01:00
asm_data = reinterpret_cast < u8 const * > ( section . raw_data ( ) ) ;
2020-08-08 22:08:13 -04:00
asm_size = section . size ( ) ;
file_offset = section . address ( ) ;
return IterationDecision : : Break ;
} ) ;
2023-10-17 19:00:30 +02:00
ranged_symbols . ensure_capacity ( elf - > symbol_count ( ) + 1 ) ;
zero_size_symbols . ensure_capacity ( elf - > symbol_count ( ) + 1 ) ;
// Sentinels:
ranged_symbols . append ( { 0 , 0 , StringView ( ) } ) ;
zero_size_symbols . append ( { 0 , 0 , StringView ( ) } ) ;
2022-01-16 14:16:09 +01:00
elf - > for_each_symbol ( [ & ] ( ELF : : Image : : Symbol const & symbol ) {
2023-10-17 19:00:30 +02:00
if ( symbol . name ( ) . is_empty ( ) )
return IterationDecision : : Continue ;
if ( symbol . size ( ) = = 0 )
zero_size_symbols . append ( { symbol . value ( ) , symbol . size ( ) , symbol . name ( ) } ) ;
else
ranged_symbols . append ( { symbol . value ( ) , symbol . size ( ) , symbol . name ( ) } ) ;
2020-08-09 21:55:32 -04:00
return IterationDecision : : Continue ;
} ) ;
2023-10-17 19:00:30 +02:00
auto symbol_order = [ ] ( auto & a , auto & b ) {
2020-08-09 21:55:32 -04:00
if ( a . value ! = b . value )
return a . value < b . value ;
if ( a . size ! = b . size )
return a . size < b . size ;
return a . name < b . name ;
2023-10-17 19:00:30 +02:00
} ;
quick_sort ( ranged_symbols , symbol_order ) ;
quick_sort ( zero_size_symbols , symbol_order ) ;
2021-01-23 23:59:27 +01:00
if constexpr ( DISASM_DUMP_DEBUG ) {
2023-10-17 19:00:30 +02:00
for ( size_t i = 0 ; i < ranged_symbols . size ( ) ; + + i )
dbgln ( " {}: {:p}, {} " , ranged_symbols [ i ] . name , ranged_symbols [ i ] . value , ranged_symbols [ i ] . size ) ;
for ( size_t i = 0 ; i < zero_size_symbols . size ( ) ; + + i )
dbgln ( " {}: {:p} " , zero_size_symbols [ i ] . name , zero_size_symbols [ i ] . value ) ;
2021-01-17 20:28:43 +01:00
}
2020-08-08 22:08:13 -04:00
}
}
X86 : : SimpleInstructionStream stream ( asm_data , asm_size ) ;
2020-04-11 13:16:17 +02:00
X86 : : Disassembler disassembler ( stream ) ;
2023-10-17 19:00:30 +02:00
Vector < Symbol > : : Iterator current_ranged_symbol = ranged_symbols . begin ( ) ;
Vector < Symbol > : : Iterator current_zero_size_symbol = zero_size_symbols . begin ( ) ;
2020-08-09 21:55:32 -04:00
bool is_first_symbol = true ;
bool current_instruction_is_in_symbol = false ;
2023-10-17 19:05:27 +02:00
bool found_symbol = false ;
2020-08-09 21:55:32 -04:00
2020-04-11 13:16:17 +02:00
for ( ; ; ) {
auto offset = stream . offset ( ) ;
auto insn = disassembler . next ( ) ;
if ( ! insn . has_value ( ) )
break ;
2020-08-09 21:55:32 -04:00
2023-10-17 19:00:30 +02:00
size_t virtual_offset = file_offset + offset ;
2020-08-09 21:55:32 -04:00
// Prefix regions of instructions belonging to a symbol with the symbol's name.
// Separate regions of instructions belonging to distinct symbols with newlines,
// and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
// Interesting cases:
// - More than 1 symbol covering a region of instructions (ICF, D1/D2)
// - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
2023-10-17 19:00:30 +02:00
// Invariant: current_ranged_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
StringBuilder dangling_symbols ;
StringBuilder instruction_symbols ;
bool needs_separator = false ;
if ( current_zero_size_symbol < zero_size_symbols . end ( ) ) {
// Print "dangling" symbols preceding the current instruction.
while ( current_zero_size_symbol + 1 < zero_size_symbols . end ( ) & & ! ( current_zero_size_symbol + 1 ) - > contains ( virtual_offset ) & & ( current_zero_size_symbol + 1 ) - > address ( ) < = virtual_offset ) {
+ + current_zero_size_symbol ;
if ( ! is_first_symbol )
dangling_symbols . appendff ( " \n ({} ({})) \n " , demangle ( current_zero_size_symbol - > name ) , current_zero_size_symbol - > format_symbol_address ( ) ) ;
}
// Find and print all symbols covering the current instruction.
while ( current_zero_size_symbol + 1 < zero_size_symbols . end ( ) & & ( current_zero_size_symbol + 1 ) - > contains ( virtual_offset ) ) {
if ( ! is_first_symbol & & ! current_instruction_is_in_symbol )
needs_separator = true ;
+ + current_zero_size_symbol ;
current_instruction_is_in_symbol = true ;
instruction_symbols . appendff ( " {} ({}): \n " , demangle ( current_zero_size_symbol - > name ) , current_zero_size_symbol - > format_symbol_address ( ) ) ;
}
}
// Handle ranged symbols separately.
if ( current_ranged_symbol < ranged_symbols . end ( ) & & ! current_ranged_symbol - > contains ( virtual_offset ) ) {
2020-08-09 21:55:32 -04:00
if ( ! is_first_symbol & & current_instruction_is_in_symbol ) {
// The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
2023-10-17 19:00:30 +02:00
needs_separator = true ;
current_instruction_is_in_symbol = ( current_ranged_symbol + 1 < ranged_symbols . end ( ) & & ( current_ranged_symbol + 1 ) - > contains ( virtual_offset ) ) ;
2020-08-09 21:55:32 -04:00
}
2023-10-17 19:00:30 +02:00
// Print "dangling" symbols preceding the current instruction.
while ( current_ranged_symbol + 1 < ranged_symbols . end ( ) & & ! ( current_ranged_symbol + 1 ) - > contains ( virtual_offset ) & & ( current_ranged_symbol + 1 ) - > address ( ) < = virtual_offset ) {
+ + current_ranged_symbol ;
2020-08-09 21:55:32 -04:00
if ( ! is_first_symbol )
2023-10-17 19:00:30 +02:00
dangling_symbols . appendff ( " \n ({} ({})) \n " , demangle ( current_ranged_symbol - > name ) , current_ranged_symbol - > format_symbol_address ( ) ) ;
2020-08-09 21:55:32 -04:00
}
2023-10-17 19:00:30 +02:00
// Find and print all symbols covering the current instruction.
while ( current_ranged_symbol + 1 < ranged_symbols . end ( ) & & ( current_ranged_symbol + 1 ) - > contains ( virtual_offset ) ) {
2020-08-09 21:55:32 -04:00
if ( ! is_first_symbol & & ! current_instruction_is_in_symbol )
2023-10-17 19:00:30 +02:00
needs_separator = true ;
+ + current_ranged_symbol ;
2020-08-09 21:55:32 -04:00
current_instruction_is_in_symbol = true ;
2023-10-17 19:00:30 +02:00
instruction_symbols . appendff ( " {} ({}): \n " , demangle ( current_ranged_symbol - > name ) , current_ranged_symbol - > format_symbol_address ( ) ) ;
2020-08-09 21:55:32 -04:00
}
is_first_symbol = false ;
}
2023-10-17 19:05:27 +02:00
// Past the target symbol now; no need to disassemble more.
if ( found_symbol & & current_ranged_symbol - > name ! = target_symbol )
break ;
found_symbol = ! target_symbol . is_empty ( ) & & current_ranged_symbol - > name = = target_symbol ;
// We have not found the target symbol yet; don't print anything.
if ( ! target_symbol . is_empty ( ) & & current_ranged_symbol - > name ! = target_symbol )
continue ;
2023-10-17 19:00:30 +02:00
// Insert extra newline after the "dangling" symbols.
if ( needs_separator )
outln ( ) ;
if ( auto dangling_symbols_text = TRY ( dangling_symbols . to_string ( ) ) ; ! dangling_symbols_text . is_empty ( ) )
outln ( " {} " , dangling_symbols_text ) ;
if ( auto instruction_symbols_text = TRY ( instruction_symbols . to_string ( ) ) ; ! instruction_symbols_text . is_empty ( ) )
out ( " {} " , instruction_symbols_text ) ;
2022-04-07 14:48:22 +02:00
size_t length = insn . value ( ) . length ( ) ;
StringBuilder builder ;
2022-03-25 22:27:31 +01:00
builder . appendff ( " {:p} " , virtual_offset ) ;
2022-04-07 14:48:22 +02:00
for ( size_t i = 0 ; i < 7 ; i + + ) {
if ( i < length )
builder . appendff ( " {:02x} " , asm_data [ offset + i ] ) ;
else
builder . append ( " " sv ) ;
}
2022-03-25 22:27:31 +01:00
builder . append ( " " sv ) ;
2023-12-16 17:49:34 +03:30
builder . append ( insn . value ( ) . to_byte_string ( virtual_offset , symbol_provider ) ) ;
2022-04-07 14:48:22 +02:00
outln ( " {} " , builder . string_view ( ) ) ;
for ( size_t bytes_printed = 7 ; bytes_printed < length ; bytes_printed + = 7 ) {
builder . clear ( ) ;
builder . appendff ( " {:p} " , virtual_offset + bytes_printed ) ;
for ( size_t i = bytes_printed ; i < bytes_printed + 7 & & i < length ; i + + )
builder . appendff ( " {:02x} " , asm_data [ offset + i ] ) ;
outln ( " {} " , builder . string_view ( ) ) ;
}
2020-04-11 13:16:17 +02:00
}
2022-01-13 20:54:35 +01:00
return 0 ;
2020-04-11 13:16:17 +02:00
}