/* * Copyright (c) 2022, Eli Youngs * Copyright (c) 2023, Rodrigo Tobar * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include class SedError { public: SedError() = default; SedError(String&& message) : m_message(move(message)) { } SedError(Error const& error) { *this = formatted("Internal sed error: {}", error.string_literal()); } String const& message() const { return m_message; } template static SedError formatted(CheckedFormatString&& fmtstr, Parameters const&... parameters) { return maybe_with_string(String::formatted(move(fmtstr), parameters...)); } static SedError parsing_error(GenericLexer const& lexer, StringView message) { return parsing_error(lexer, "{}", message); } template static SedError parsing_error(GenericLexer const& lexer, CheckedFormatString&& fmtstr, Parameters const&... parameters) { StringBuilder builder; builder.appendff("Parsing error at position {}: ", lexer.tell()); builder.appendff(move(fmtstr), parameters...); return maybe_with_string(String::from_utf8(builder.string_view())); } static SedError from_error(Error const& error) { return formatted("Internal sed error: {}", error.string_literal()); } private: String m_message; static SedError maybe_with_string(ErrorOr maybe_string) { if (maybe_string.is_error()) return SedError {}; return SedError { maybe_string.release_value() }; } }; template using SedErrorOr = ErrorOr; // function, maximum addresses #define ENUMERATE_FUNCTIONS(F) \ F('a', 1) \ F('b', 2) \ F('c', 2) \ F('d', 2) \ F('D', 2) \ F('g', 2) \ F('G', 2) \ F('h', 2) \ F('H', 2) \ F('i', 1) \ F('l', 2) \ F('n', 2) \ F('N', 2) \ F('p', 2) \ F('P', 2) \ F('q', 1) \ F('r', 1) \ F('s', 2) \ F('t', 2) \ F('w', 2) \ F('x', 2) \ F('y', 2) \ F(':', 0) \ F('=', 1) enum class AddressType { Unset, Line, LastLine, ContextAddress, }; class Address { public: Address() = default; explicit Address(size_t line) : m_line_number(line) , m_address_type(AddressType::Line) { } explicit Address(AddressType address_type) : m_address_type(address_type) { VERIFY(address_type == AddressType::LastLine || address_type == AddressType::ContextAddress); } size_t line_number() const { VERIFY(m_address_type == AddressType::Line); return m_line_number; } AddressType address_type() const { return m_address_type; } bool matches([[maybe_unused]] StringView pattern_space, size_t line_number, bool is_last_line) const { switch (m_address_type) { case AddressType::Line: return line_number == m_line_number; case AddressType::LastLine: return is_last_line; default: warnln("Addressing type not implemented: {}", int(m_address_type)); return false; } } private: size_t m_line_number { 0 }; AddressType m_address_type { AddressType::Unset }; }; namespace AK { template<> class Formatter
: public StandardFormatter { public: AK::ErrorOr format(FormatBuilder& format_builder, Address address) { auto& builder = format_builder.builder(); switch (address.address_type()) { case AddressType::Line: builder.appendff("{}", address.line_number()); break; case AddressType::LastLine: builder.append('$'); break; case AddressType::ContextAddress: VERIFY_NOT_REACHED(); case AddressType::Unset: break; } return {}; } }; } static bool is_command_separator(char c) { return c == '\n' || c == ';'; } template struct TextArgument { String text; static SedErrorOr parse(GenericLexer& lexer) { auto original_text = lexer.consume_until([is_escape_sequence = false](char c) mutable { if (c == '\n' && !is_escape_sequence) return true; is_escape_sequence = c == '\\'; return false; }); if (!original_text.starts_with("\\\n"sv)) return SedError::parsing_error(lexer, "Command should be followed by \\ + \\n"sv); auto text = TRY(String::from_utf8(original_text.substring_view(2))); return ArgsT { TRY(text.replace("\\\n"sv, "\n"sv, AK::ReplaceMode::All)) }; } }; template struct OptionalLabelArgument { Optional label; static SedErrorOr parse(GenericLexer& lexer) { auto blanks = lexer.consume_while(is_ascii_blank); if (blanks.is_empty()) return SedError::parsing_error(lexer, "expected one or more blank characeters"sv); if (lexer.next_is(is_command_separator)) return ArgsT {}; return ArgsT { lexer.consume_until(is_command_separator) }; } }; template struct FilepathArgument { static SedErrorOr parse(GenericLexer& lexer) { auto blanks = lexer.consume_while(is_ascii_blank); if (blanks.is_empty()) return SedError::parsing_error(lexer, "expected one or more blank characeters"sv); auto filepath = lexer.consume_until(is_command_separator); if (filepath.is_empty()) return SedError::parsing_error(lexer, "input filename expected, none found"); return ArgsT { {}, filepath }; } }; struct AArguments : TextArgument { }; struct BArguments : OptionalLabelArgument { }; struct CArguments : TextArgument { }; struct IArguments : TextArgument { }; struct RArguments : FilepathArgument { StringView input_filepath; }; struct SArguments { Regex regex; StringView replacement; PosixOptions options; bool print; Optional output_filepath; static SedErrorOr parse(GenericLexer& lexer) { auto generic_error_message = "Incomplete substitution command"sv; if (lexer.is_eof()) return SedError::parsing_error(lexer, generic_error_message); auto delimiter = lexer.consume(); if (delimiter == '\n' || delimiter == '\\') return SedError::parsing_error(lexer, "\\n and \\ cannot be used as delimiters."sv); auto pattern = lexer.consume_until(delimiter); if (pattern.is_empty()) return SedError::parsing_error(lexer, "Substitution patterns cannot be empty."sv); if (!lexer.consume_specific(delimiter)) return SedError::parsing_error(lexer, generic_error_message); auto replacement = lexer.consume_until(delimiter); // According to Posix, "s/x/y" is an invalid substitution command. // It must have a closing delimiter: "s/x/y/" if (!lexer.consume_specific(delimiter)) return SedError::parsing_error(lexer, "The substitution command was not properly terminated."sv); PosixOptions options = PosixOptions(PosixFlags::Global | PosixFlags::SingleMatch); bool print = false; Optional output_filepath; auto flags = split_flags(lexer); for (auto const& flag : flags) { if (flag.starts_with('w')) { auto flag_filepath = flag.substring_view(1).trim_whitespace(); if (flag_filepath.is_empty()) return SedError::parsing_error(lexer, "No filepath was provided for the 'w' flag."sv); output_filepath = flag_filepath; } else if (flag == "g"sv) { // Allow multiple matches per line by un-setting the SingleMatch flag options &= ~PosixFlags::SingleMatch; } else if (flag == "i"sv || flag == "I"sv) { options |= PosixFlags::Insensitive; } else if (flag == "p"sv) { print = true; } else { return SedError::parsing_error(lexer, "Unsupported flag for s command: {}", flag); } } return SArguments { Regex { pattern }, replacement, options, print, output_filepath }; } private: static Vector split_flags(GenericLexer& lexer) { Vector flags; while (!lexer.is_eof() && !lexer.next_is(is_command_separator)) { StringView flag; if (lexer.next_is(is_ascii_digit)) { flag = lexer.consume_while(is_ascii_digit); } else if (lexer.peek() == 'w') { flag = lexer.consume_until(is_command_separator); } else { flag = lexer.consume(1); } flags.append(flag); } return flags; } }; struct TArguments : OptionalLabelArgument { }; struct WArguments : FilepathArgument { StringView output_filepath; }; struct YArguments { StringView characters; StringView replacements; static SedErrorOr parse(GenericLexer& lexer) { return SedError::parsing_error(lexer, "not implemented"sv); } }; struct ColonArguments { StringView label; static SedErrorOr parse(GenericLexer& lexer) { ColonArguments args {}; args.label = lexer.consume_until(is_command_separator); if (args.label.is_empty()) return SedError::parsing_error(lexer, "label expected, none found"); return args; } }; struct Command { Address address1; Address address2; char function = '\0'; Optional> arguments; StringView arguments_view; void enable_for(StringView pattern_space, size_t line_number, bool is_last_line) { m_is_enabled = selects(pattern_space, line_number, is_last_line); } bool is_enabled() const { return m_is_enabled; } private: bool selects(StringView pattern_space, size_t line_number, bool is_last_line) { // no address set, all patterns match if (address1.address_type() == AddressType::Unset) { VERIFY(address2.address_type() == AddressType::Unset); return true; } // single address set if (address2.address_type() == AddressType::Unset) return address1.matches(pattern_space, line_number, is_last_line); // two addresses if (!m_is_selection_active && address1.matches(pattern_space, line_number, is_last_line)) { m_is_selection_active = true; return true; } if (m_is_selection_active && address2.matches(pattern_space, line_number, is_last_line)) { m_is_selection_active = false; return true; } return false; } bool m_is_enabled { false }; bool m_is_selection_active { false }; }; namespace AK { template<> class Formatter : public StandardFormatter { public: AK::ErrorOr format(FormatBuilder& format_builder, Command const& command) { auto& builder = format_builder.builder(); builder.appendff("{}", command.address1); if (command.address2.address_type() != AddressType::Unset) { builder.appendff(",{}", command.address2); } builder.append(command.function); builder.append(command.arguments_view); return {}; } }; } struct AddressParsingResult { Optional
address; }; static Optional
parse_address(GenericLexer& lexer) { if (lexer.is_eof()) return {}; if (lexer.peek() == '$') { lexer.consume(); return Address { AddressType::LastLine }; } auto lineno = lexer.consume_while(AK::is_ascii_digit); if (lineno.is_empty()) return {}; return Address { AK::StringUtils::convert_to_uint(lineno).release_value() }; } template static SedErrorOr verify_number_of_addresses(Command const& command) { if constexpr (max_addresses == 2) { return {}; } else { static_assert(max_addresses == 0 || max_addresses == 1); auto c = command.function; if constexpr (max_addresses == 0) { if (command.address1.address_type() != AddressType::Unset) { return SedError::formatted("'{}' doesn't take any address, at least one given", c); } } else { if (command.address2.address_type() != AddressType::Unset) { return SedError::formatted("'{}' takes a single address, two given", c); } } } return {}; } static SedErrorOr parse_command(GenericLexer& lexer) { lexer.consume_while(is_ascii_blank); Command command; command.address1 = parse_address(lexer).value_or({}); if (lexer.is_eof()) return SedError::parsing_error(lexer, "Incomplete command"sv); if (lexer.peek() == ',') { lexer.consume(); command.address2 = parse_address(lexer).value_or({}); } if (lexer.is_eof()) return SedError::parsing_error(lexer, "Incomplete command"sv); char command_char = lexer.consume(); #define HANDLE_FUNCTION_CASE(c, max_addresses) \ case c: \ command.function = c; \ TRY(verify_number_of_addresses(command)); \ break; switch (command_char) { ENUMERATE_FUNCTIONS(HANDLE_FUNCTION_CASE) default: return SedError::parsing_error(lexer, "Unknown function command '{}'", command_char); } #undef HANDLE_FUNCTION_CASE auto args_start = lexer.tell(); switch (command_char) { case 'a': command.arguments = TRY(AArguments::parse(lexer)); break; case 'b': command.arguments = TRY(BArguments::parse(lexer)); break; case 'c': command.arguments = TRY(CArguments::parse(lexer)); break; case 'i': command.arguments = TRY(IArguments::parse(lexer)); break; case 'r': command.arguments = TRY(RArguments::parse(lexer)); break; case 's': command.arguments = TRY(SArguments::parse(lexer)); break; case 't': command.arguments = TRY(TArguments::parse(lexer)); break; case 'w': command.arguments = TRY(WArguments::parse(lexer)); break; case 'y': command.arguments = TRY(YArguments::parse(lexer)); break; case ':': command.arguments = TRY(ColonArguments::parse(lexer)); break; default: { auto padding = lexer.consume_until(is_command_separator); if (!padding.is_whitespace()) { warnln("Command had arguments but none were expected, ignoring: '{}'", padding); } } } auto args_end = lexer.tell(); VERIFY(args_end >= args_start); auto args_length = args_end - args_start; lexer.retreat(args_length); command.arguments_view = lexer.consume(args_length); return command; } class Script { public: [[nodiscard]] bool add_script_part(StringView data) { auto last_pos = m_script.length(); m_script.append(data); auto lexer = GenericLexer(m_script.string_view().substring_view(last_pos)); while (!lexer.is_eof()) { if (lexer.is_eof()) break; auto maybe_command = parse_command(lexer); if (maybe_command.is_error()) { warnln("Problem while parsing script part: {}", maybe_command.release_error().message()); return false; }; m_commands.append(maybe_command.release_value()); lexer.consume_until(is_command_separator); if (lexer.is_eof()) break; lexer.consume(); } return true; } Vector& commands() { return m_commands; } ErrorOr> output_filenames() const { Vector output_filenames; for (auto const& command : m_commands) { if (!command.arguments.has_value()) continue; if (command.arguments->has()) { auto const& s_arguments = command.arguments->get(); if (s_arguments.output_filepath.has_value()) { TRY(add(output_filenames, s_arguments.output_filepath.value())); } } else if (command.arguments->has()) { TRY(add(output_filenames, command.arguments->get().output_filepath)); } } return output_filenames; } ErrorOr> input_filenames() const { Vector input_filenames; for (auto const& command : m_commands) { if (!command.arguments.has_value()) { continue; } if (command.arguments->has()) { TRY(add(input_filenames, command.arguments->get().input_filepath)); } } return input_filenames; } private: StringBuilder m_script; Vector m_commands; ErrorOr add(Vector& container, StringView element_sv) const { auto element = TRY(String::from_utf8(element_sv)); TRY(container.try_append(move(element))); return {}; }; }; enum class CycleDecision { None, Next, Quit }; class InputFile { AK_MAKE_NONCOPYABLE(InputFile); InputFile(NonnullOwnPtr&& file) : m_file(move(file)) { } public: static ErrorOr create(NonnullOwnPtr&& file) { auto buffered_file = TRY(Core::BufferedFile::create(move(file))); return InputFile(move(buffered_file)); } static ErrorOr create_from_stdin() { return create(TRY(Core::File::standard_input())); } InputFile(InputFile&&) = default; InputFile& operator=(InputFile&&) = default; ErrorOr has_next() const { return m_file->can_read_line(); } ErrorOr next() { VERIFY(TRY(has_next())); m_current_line = TRY(m_file->read_line(m_buffer)); ++m_line_number; return m_current_line; } size_t line_number() const { return m_line_number; } private: NonnullOwnPtr m_file; size_t m_line_number { 0 }; DeprecatedString m_current_line; constexpr static size_t MAX_SUPPORTED_LINE_SIZE = 4096; Array m_buffer; }; static ErrorOr write_pattern_space(Core::File& output, StringBuilder& pattern_space) { TRY(output.write_until_depleted(pattern_space.string_view().bytes())); TRY(output.write_until_depleted("\n"sv.bytes())); return {}; } static void print_unambiguous(StringView pattern_space) { // TODO: find out the terminal width, folding width should be less than that // to make it clear that folding is happening constexpr size_t fold_width = 70; AK::StringBuilder unambiguous_output; auto folded_append = [&unambiguous_output, current_line_length = size_t { 0 }](auto const& value, size_t length) mutable { if (current_line_length + length < fold_width) { current_line_length += length; } else { unambiguous_output.append("\\\n"sv); current_line_length = length; } unambiguous_output.append(value); }; for (auto const c : pattern_space) { if (c == '\\') folded_append("\\\\"sv, 2); else if (c == '\a') folded_append("\\a"sv, 2); else if (c == '\b') folded_append("\\b"sv, 2); else if (c == '\f') folded_append("\\f"sv, 2); else if (c == '\r') folded_append("\\r"sv, 2); else if (c == '\t') folded_append("\\t"sv, 2); else if (c == '\v') folded_append("\\v"sv, 2); else if (c == '\n') folded_append("$\n"sv, 1); else if (AK::is_ascii_printable(c)) folded_append(c, 1); else folded_append(DeprecatedString::formatted("\\{:3o}", (unsigned char)c), 4); } outln("{}$", unambiguous_output.string_view()); } static ErrorOr apply(Command const& command, StringBuilder& pattern_space, StringBuilder& hold_space, InputFile& input, bool suppress_default_output) { auto stdout = TRY(Core::File::standard_output()); auto cycle_decision = CycleDecision::None; switch (command.function) { case 'd': pattern_space.clear(); cycle_decision = CycleDecision::Next; break; case 'g': pattern_space = hold_space; break; case 'G': pattern_space.append('\n'); pattern_space.append(hold_space.string_view()); break; case 'h': hold_space = pattern_space; break; case 'H': hold_space.append('\n'); hold_space.append(pattern_space.string_view()); break; case 'i': outln("{}", command.arguments->get().text); break; case 'l': print_unambiguous(pattern_space.string_view()); break; case 'n': if (!suppress_default_output) TRY(write_pattern_space(*stdout, pattern_space)); if (TRY(input.has_next())) { pattern_space.clear(); pattern_space.append(TRY(input.next())); } break; case 'p': TRY(write_pattern_space(*stdout, pattern_space)); break; case 'P': { auto pattern_sv = pattern_space.string_view(); auto newline_position = pattern_sv.find('\n').value_or(pattern_sv.length() - 1); TRY(stdout->write_until_depleted(pattern_sv.substring_view(0, newline_position + 1).bytes())); break; } case 'q': cycle_decision = CycleDecision::Quit; break; case 's': { auto pattern_space_sv = pattern_space.string_view(); auto const& s_args = command.arguments->get(); auto result = s_args.regex.replace(pattern_space_sv, s_args.replacement, s_args.options); auto replacement_made = result != pattern_space_sv; pattern_space.clear(); pattern_space.append(result); if (replacement_made && s_args.print) TRY(write_pattern_space(*stdout, pattern_space)); break; } case 'x': swap(pattern_space, hold_space); break; case '=': outln("{}", input.line_number()); break; case '#': break; default: warnln("Command not implemented: {}", command.function); break; } return cycle_decision; } static ErrorOr run(Vector& inputs, Script& script, bool suppress_default_output) { // TODO: verify all commands are valid StringBuilder pattern_space; StringBuilder hold_space; auto stdout = TRY(Core::File::standard_output()); // TODO: extend to multiple input files auto& input = inputs[0]; // main loop while (TRY(input.has_next())) { // Avoid potential last, empty line auto line = TRY(input.next()); auto is_last_line = !TRY(input.has_next()); // TODO: "Reading from input shall be skipped if a was in the pattern space prior to a D command ending the previous cycle" pattern_space.append(line); // Turn commands on/off depending on selection. We need for (auto& command : script.commands()) command.enable_for(pattern_space.string_view(), input.line_number(), is_last_line); // Go, go, go! CycleDecision cycle_decision = CycleDecision::None; for (auto& command : script.commands()) { if (!command.is_enabled()) continue; auto command_cycle_decision = TRY(apply(command, pattern_space, hold_space, input, suppress_default_output)); if (command_cycle_decision == CycleDecision::Next || command_cycle_decision == CycleDecision::Quit) { cycle_decision = command_cycle_decision; break; } } if (cycle_decision == CycleDecision::Next) continue; if (cycle_decision == CycleDecision::Quit) break; if (!suppress_default_output) TRY(write_pattern_space(*stdout, pattern_space)); pattern_space.clear(); } return {}; } ErrorOr serenity_main(Main::Arguments args) { TRY(Core::System::pledge("stdio cpath rpath wpath")); bool suppress_default_output = false; Core::ArgsParser arg_parser; Script script; Vector pos_args; arg_parser.set_general_help("The Stream EDitor"); arg_parser.add_option(suppress_default_output, "suppress default output", nullptr, 'n'); arg_parser.add_option(Core::ArgsParser::Option { .argument_mode = Core::ArgsParser::OptionArgumentMode::Required, .help_string = "A file containing script commands", .short_name = 'f', .value_name = "script-file", .accept_value = [&script](StringView script_file) { auto maybe_file = Core::File::open(script_file, Core::File::OpenMode::Read); if (maybe_file.is_error()) { warnln("Failed to open script file: {}", maybe_file.release_error()); return false; } auto maybe_file_contents = maybe_file.release_value()->read_until_eof(1); if (maybe_file_contents.is_error()) { warnln("Failed to read contents of script file {}: {}", script_file, maybe_file_contents.release_error()); return false; } return script.add_script_part(StringView { maybe_file_contents.release_value().bytes() }); }, }); arg_parser.add_option(Core::ArgsParser::Option { .argument_mode = Core::ArgsParser::OptionArgumentMode::Required, .help_string = "A script of commands", .short_name = 'e', .value_name = "script", .accept_value = [&script](StringView script_argument) { return script.add_script_part(script_argument); }, }); arg_parser.add_positional_argument(pos_args, "script and/or file", "...", Core::ArgsParser::Required::No); arg_parser.parse(args); if (script.commands().is_empty()) { if (pos_args.is_empty()) { warnln("No script specified, aborting"); return 1; } if (!script.add_script_part(pos_args[0])) { return 1; } pos_args.remove(0); } for (auto const& input_filename : TRY(script.input_filenames())) { TRY(Core::System::unveil(input_filename, "r"sv)); } for (auto const& output_filename : TRY(script.output_filenames())) { TRY(Core::System::unveil(output_filename, "w"sv)); } Vector inputs; for (auto const& filename : pos_args) { if (filename == "-"sv) { inputs.empend(TRY(InputFile::create_from_stdin())); } else { auto file = TRY(Core::File::open(filename, Core::File::OpenMode::Read)); inputs.empend(TRY(InputFile::create(move(file)))); } } if (inputs.is_empty()) { inputs.empend(TRY(InputFile::create_from_stdin())); } TRY(run(inputs, script, suppress_default_output)); return 0; }