2020-11-27 20:44:25 +03:30
|
|
|
/*
|
|
|
|
* Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
|
|
|
|
*
|
2021-04-22 01:24:48 -07:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-11-27 20:44:25 +03:30
|
|
|
*/
|
|
|
|
|
2021-05-15 12:34:40 +02:00
|
|
|
#include <AK/Assertions.h>
|
2022-12-04 18:02:33 +00:00
|
|
|
#include <AK/DeprecatedString.h>
|
2022-03-20 18:43:08 +01:00
|
|
|
#include <AK/LexicalPath.h>
|
2021-01-23 21:58:14 -07:00
|
|
|
#include <AK/ScopeGuard.h>
|
2022-10-25 22:11:34 -06:00
|
|
|
#include <AK/StringBuilder.h>
|
2020-11-27 20:44:25 +03:30
|
|
|
#include <AK/Vector.h>
|
|
|
|
#include <LibCore/ArgsParser.h>
|
2023-02-08 21:08:01 +01:00
|
|
|
#include <LibCore/DeprecatedFile.h>
|
2020-11-27 20:44:25 +03:30
|
|
|
#include <LibCore/DirIterator.h>
|
2023-02-09 03:02:46 +01:00
|
|
|
#include <LibCore/File.h>
|
2022-01-13 21:17:16 +01:00
|
|
|
#include <LibCore/System.h>
|
|
|
|
#include <LibMain/Main.h>
|
2020-11-27 20:44:25 +03:30
|
|
|
#include <LibRegex/Regex.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2020-11-28 18:10:56 +03:30
|
|
|
enum class BinaryFileMode {
|
|
|
|
Binary,
|
|
|
|
Text,
|
|
|
|
Skip,
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename... Ts>
|
|
|
|
void fail(StringView format, Ts... args)
|
|
|
|
{
|
2021-05-31 15:43:25 +01:00
|
|
|
warn("\x1b[31m");
|
2020-11-28 18:10:56 +03:30
|
|
|
warnln(format, forward<Ts>(args)...);
|
2021-05-31 15:43:25 +01:00
|
|
|
warn("\x1b[0m");
|
2020-11-28 18:10:56 +03:30
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2022-10-25 22:11:34 -06:00
|
|
|
constexpr StringView ere_special_characters = ".^$*+?()[{\\|"sv;
|
|
|
|
constexpr StringView basic_special_characters = ".^$*[\\"sv;
|
|
|
|
|
|
|
|
static DeprecatedString escape_characters(StringView string, StringView characters)
|
|
|
|
{
|
|
|
|
StringBuilder builder;
|
|
|
|
for (auto ch : string) {
|
|
|
|
if (characters.contains(ch))
|
|
|
|
builder.append('\\');
|
|
|
|
|
|
|
|
builder.append(ch);
|
|
|
|
}
|
|
|
|
return builder.to_deprecated_string();
|
|
|
|
}
|
|
|
|
|
2022-01-13 21:17:16 +01:00
|
|
|
ErrorOr<int> serenity_main(Main::Arguments args)
|
2020-11-27 20:44:25 +03:30
|
|
|
{
|
2022-04-03 16:16:06 -07:00
|
|
|
TRY(Core::System::pledge("stdio rpath"));
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2022-12-04 18:02:33 +00:00
|
|
|
DeprecatedString program_name = AK::LexicalPath::basename(args.strings[0]);
|
2022-03-20 18:43:08 +01:00
|
|
|
|
2022-12-04 18:02:33 +00:00
|
|
|
Vector<DeprecatedString> files;
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2022-03-20 18:43:08 +01:00
|
|
|
bool recursive = (program_name == "rgrep"sv);
|
|
|
|
bool use_ere = (program_name == "egrep"sv);
|
2022-10-25 22:11:34 -06:00
|
|
|
bool fixed_strings = (program_name == "fgrep"sv);
|
2022-12-04 18:02:33 +00:00
|
|
|
Vector<DeprecatedString> patterns;
|
2020-11-28 18:10:56 +03:30
|
|
|
BinaryFileMode binary_mode { BinaryFileMode::Binary };
|
|
|
|
bool case_insensitive = false;
|
2021-11-05 00:35:31 +01:00
|
|
|
bool line_numbers = false;
|
2021-01-23 22:11:19 -07:00
|
|
|
bool invert_match = false;
|
2021-10-30 12:54:26 +02:00
|
|
|
bool quiet_mode = false;
|
2021-10-30 13:04:12 +02:00
|
|
|
bool suppress_errors = false;
|
2021-08-12 19:12:38 +08:00
|
|
|
bool colored_output = isatty(STDOUT_FILENO);
|
2021-11-12 22:01:54 +01:00
|
|
|
bool count_lines = false;
|
|
|
|
|
|
|
|
size_t matched_line_count = 0;
|
2020-11-27 20:44:25 +03:30
|
|
|
|
|
|
|
Core::ArgsParser args_parser;
|
2021-09-03 21:41:17 +02:00
|
|
|
args_parser.add_option(recursive, "Recursively scan files", "recursive", 'r');
|
2021-07-10 13:20:44 +04:30
|
|
|
args_parser.add_option(use_ere, "Extended regular expressions", "extended-regexp", 'E');
|
2022-10-25 22:11:34 -06:00
|
|
|
args_parser.add_option(fixed_strings, "Treat pattern as a string, not a regexp", "fixed-strings", 'F');
|
2021-11-13 01:55:47 +01:00
|
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
2022-07-12 22:13:38 +02:00
|
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
|
2021-11-13 01:55:47 +01:00
|
|
|
.help_string = "Pattern",
|
|
|
|
.long_name = "regexp",
|
|
|
|
.short_name = 'e',
|
|
|
|
.value_name = "Pattern",
|
2023-02-21 15:14:41 +03:30
|
|
|
.accept_value = [&](StringView str) {
|
2021-11-13 01:55:47 +01:00
|
|
|
patterns.append(str);
|
|
|
|
return true;
|
|
|
|
},
|
|
|
|
});
|
2020-11-28 18:10:56 +03:30
|
|
|
args_parser.add_option(case_insensitive, "Make matches case-insensitive", nullptr, 'i');
|
2021-11-05 00:35:31 +01:00
|
|
|
args_parser.add_option(line_numbers, "Output line-numbers", "line-numbers", 'n');
|
2021-01-23 22:11:19 -07:00
|
|
|
args_parser.add_option(invert_match, "Select non-matching lines", "invert-match", 'v');
|
2021-10-30 12:54:26 +02:00
|
|
|
args_parser.add_option(quiet_mode, "Do not write anything to standard output", "quiet", 'q');
|
2021-10-30 13:04:12 +02:00
|
|
|
args_parser.add_option(suppress_errors, "Suppress error messages for nonexistent or unreadable files", "no-messages", 's');
|
2020-11-28 18:10:56 +03:30
|
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
2022-07-12 22:13:38 +02:00
|
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
|
2020-11-28 18:10:56 +03:30
|
|
|
.help_string = "Action to take for binary files ([binary], text, skip)",
|
|
|
|
.long_name = "binary-mode",
|
2023-02-21 15:14:41 +03:30
|
|
|
.accept_value = [&](StringView str) {
|
2021-07-04 11:08:46 +02:00
|
|
|
if ("text"sv == str)
|
2020-11-28 18:10:56 +03:30
|
|
|
binary_mode = BinaryFileMode::Text;
|
2021-07-04 11:08:46 +02:00
|
|
|
else if ("binary"sv == str)
|
2020-11-28 18:10:56 +03:30
|
|
|
binary_mode = BinaryFileMode::Binary;
|
2021-07-04 11:08:46 +02:00
|
|
|
else if ("skip"sv == str)
|
2020-11-28 18:10:56 +03:30
|
|
|
binary_mode = BinaryFileMode::Skip;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
},
|
|
|
|
});
|
|
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
2022-07-12 22:13:38 +02:00
|
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::None,
|
2020-11-28 18:10:56 +03:30
|
|
|
.help_string = "Treat binary files as text (same as --binary-mode text)",
|
|
|
|
.long_name = "text",
|
|
|
|
.short_name = 'a',
|
|
|
|
.accept_value = [&](auto) {
|
|
|
|
binary_mode = BinaryFileMode::Text;
|
|
|
|
return true;
|
|
|
|
},
|
|
|
|
});
|
|
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
2022-07-12 22:13:38 +02:00
|
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::None,
|
2020-11-28 18:10:56 +03:30
|
|
|
.help_string = "Ignore binary files (same as --binary-mode skip)",
|
|
|
|
.long_name = nullptr,
|
|
|
|
.short_name = 'I',
|
|
|
|
.accept_value = [&](auto) {
|
|
|
|
binary_mode = BinaryFileMode::Skip;
|
|
|
|
return true;
|
|
|
|
},
|
|
|
|
});
|
2021-08-12 19:12:38 +08:00
|
|
|
args_parser.add_option(Core::ArgsParser::Option {
|
2022-07-12 22:13:38 +02:00
|
|
|
.argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
|
2021-08-12 19:12:38 +08:00
|
|
|
.help_string = "When to use colored output for the matching text ([auto], never, always)",
|
|
|
|
.long_name = "color",
|
|
|
|
.short_name = 0,
|
|
|
|
.value_name = "WHEN",
|
2023-02-21 15:14:41 +03:30
|
|
|
.accept_value = [&](StringView str) {
|
2021-08-12 19:12:38 +08:00
|
|
|
if ("never"sv == str)
|
|
|
|
colored_output = false;
|
|
|
|
else if ("always"sv == str)
|
|
|
|
colored_output = true;
|
|
|
|
else if ("auto"sv != str)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
},
|
|
|
|
});
|
2021-11-12 22:01:54 +01:00
|
|
|
args_parser.add_option(count_lines, "Output line count instead of line contents", "count", 'c');
|
2020-11-27 20:44:25 +03:30
|
|
|
args_parser.add_positional_argument(files, "File(s) to process", "file", Core::ArgsParser::Required::No);
|
2022-01-13 21:17:16 +01:00
|
|
|
args_parser.parse(args);
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2021-09-07 12:56:50 +02:00
|
|
|
// mock grep behavior: if -e is omitted, use first positional argument as pattern
|
2021-11-13 01:55:47 +01:00
|
|
|
if (patterns.size() == 0 && files.size())
|
|
|
|
patterns.append(files.take_first());
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2021-09-03 21:41:17 +02:00
|
|
|
auto user_has_specified_files = !files.is_empty();
|
2021-11-12 22:01:54 +01:00
|
|
|
auto user_specified_multiple_files = files.size() >= 2;
|
2021-09-03 21:41:17 +02:00
|
|
|
|
2020-11-28 18:10:56 +03:30
|
|
|
PosixOptions options {};
|
|
|
|
if (case_insensitive)
|
|
|
|
options |= PosixFlags::Insensitive;
|
|
|
|
|
2021-11-13 01:55:47 +01:00
|
|
|
auto grep_logic = [&](auto&& regular_expressions) {
|
|
|
|
for (auto& re : regular_expressions) {
|
|
|
|
if (re.parser_result.error != regex::Error::NoError) {
|
2022-10-25 22:11:34 -06:00
|
|
|
warnln("regex parse error: {}", regex::get_error_string(re.parser_result.error));
|
2021-11-13 01:55:47 +01:00
|
|
|
return 1;
|
|
|
|
}
|
2021-07-10 13:20:44 +04:30
|
|
|
}
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2021-11-05 00:35:31 +01:00
|
|
|
auto matches = [&](StringView str, StringView filename, size_t line_number, bool print_filename, bool is_binary) {
|
2021-07-10 13:20:44 +04:30
|
|
|
size_t last_printed_char_pos { 0 };
|
|
|
|
if (is_binary && binary_mode == BinaryFileMode::Skip)
|
|
|
|
return false;
|
2020-11-28 18:10:56 +03:30
|
|
|
|
2021-11-13 01:55:47 +01:00
|
|
|
for (auto& re : regular_expressions) {
|
|
|
|
auto result = re.match(str, PosixFlags::Global);
|
|
|
|
if (!(result.success ^ invert_match))
|
|
|
|
continue;
|
|
|
|
|
2021-10-30 12:54:26 +02:00
|
|
|
if (quiet_mode)
|
|
|
|
return true;
|
|
|
|
|
2021-11-12 22:01:54 +01:00
|
|
|
if (count_lines) {
|
|
|
|
matched_line_count++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-07-10 13:20:44 +04:30
|
|
|
if (is_binary && binary_mode == BinaryFileMode::Binary) {
|
2022-07-11 17:32:29 +00:00
|
|
|
outln(colored_output ? "binary file \x1B[34m{}\x1B[0m matches"sv : "binary file {} matches"sv, filename);
|
2021-07-10 13:20:44 +04:30
|
|
|
} else {
|
2021-08-12 19:12:38 +08:00
|
|
|
if ((result.matches.size() || invert_match) && print_filename)
|
2022-07-11 17:32:29 +00:00
|
|
|
out(colored_output ? "\x1B[34m{}:\x1B[0m"sv : "{}:"sv, filename);
|
2021-11-05 00:35:31 +01:00
|
|
|
if ((result.matches.size() || invert_match) && line_numbers)
|
2022-07-11 17:32:29 +00:00
|
|
|
out(colored_output ? "\x1B[35m{}:\x1B[0m"sv : "{}:"sv, line_number);
|
2021-07-10 13:20:44 +04:30
|
|
|
|
|
|
|
for (auto& match : result.matches) {
|
2022-07-27 22:35:13 +01:00
|
|
|
auto pre_match_length = match.global_offset - last_printed_char_pos;
|
2022-07-11 17:32:29 +00:00
|
|
|
out(colored_output ? "{}\x1B[32m{}\x1B[0m"sv : "{}{}"sv,
|
2022-07-27 22:35:13 +01:00
|
|
|
pre_match_length > 0 ? StringView(&str[last_printed_char_pos], pre_match_length) : ""sv,
|
2022-12-06 01:12:49 +00:00
|
|
|
match.view.to_deprecated_string());
|
2021-07-10 13:20:44 +04:30
|
|
|
last_printed_char_pos = match.global_offset + match.view.length();
|
|
|
|
}
|
2022-07-27 22:35:13 +01:00
|
|
|
auto remaining_length = str.length() - last_printed_char_pos;
|
|
|
|
outln("{}", remaining_length > 0 ? StringView(&str[last_printed_char_pos], remaining_length) : ""sv);
|
2020-11-28 18:10:56 +03:30
|
|
|
}
|
|
|
|
|
2021-07-10 13:20:44 +04:30
|
|
|
return true;
|
2020-11-27 20:44:25 +03:30
|
|
|
}
|
|
|
|
|
2021-07-10 13:20:44 +04:30
|
|
|
return false;
|
|
|
|
};
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2022-03-14 18:08:16 +01:00
|
|
|
bool did_match_something = false;
|
|
|
|
|
2023-01-17 18:26:30 -05:00
|
|
|
auto handle_file = [&matches, binary_mode, count_lines, quiet_mode,
|
|
|
|
user_specified_multiple_files, &matched_line_count, &did_match_something](StringView filename, bool print_filename) -> ErrorOr<void> {
|
2023-02-09 03:02:46 +01:00
|
|
|
auto file = TRY(Core::File::open(filename, Core::File::OpenMode::Read));
|
|
|
|
auto buffered_file = TRY(Core::BufferedFile::create(move(file)));
|
2022-03-14 17:59:03 +01:00
|
|
|
|
2023-01-17 18:26:30 -05:00
|
|
|
for (size_t line_number = 1; TRY(buffered_file->can_read_line()); ++line_number) {
|
|
|
|
Array<u8, PAGE_SIZE> buffer;
|
|
|
|
auto line = TRY(buffered_file->read_line(buffer));
|
2022-03-14 17:59:03 +01:00
|
|
|
|
2023-01-17 18:26:30 -05:00
|
|
|
auto is_binary = line.contains('\0');
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2022-03-14 18:08:16 +01:00
|
|
|
auto matched = matches(line, filename, line_number, print_filename, is_binary);
|
|
|
|
did_match_something = did_match_something || matched;
|
|
|
|
if (matched && is_binary && binary_mode == BinaryFileMode::Binary)
|
2021-11-12 22:01:54 +01:00
|
|
|
break;
|
2021-07-10 13:20:44 +04:30
|
|
|
}
|
2021-11-12 22:01:54 +01:00
|
|
|
|
|
|
|
if (count_lines && !quiet_mode) {
|
|
|
|
if (user_specified_multiple_files)
|
|
|
|
outln("{}:{}", filename, matched_line_count);
|
|
|
|
else
|
|
|
|
outln("{}", matched_line_count);
|
|
|
|
matched_line_count = 0;
|
|
|
|
}
|
|
|
|
|
2023-01-17 18:26:30 -05:00
|
|
|
return {};
|
2021-07-10 13:20:44 +04:30
|
|
|
};
|
|
|
|
|
2023-01-17 18:26:30 -05:00
|
|
|
auto add_directory = [&handle_file, user_has_specified_files, suppress_errors](DeprecatedString base, Optional<DeprecatedString> recursive, auto handle_directory) -> void {
|
2021-07-10 13:20:44 +04:30
|
|
|
Core::DirIterator it(recursive.value_or(base), Core::DirIterator::Flags::SkipDots);
|
|
|
|
while (it.has_next()) {
|
|
|
|
auto path = it.next_full_path();
|
2023-02-08 21:08:01 +01:00
|
|
|
if (!Core::DeprecatedFile::is_directory(path)) {
|
2021-09-03 21:41:17 +02:00
|
|
|
auto key = user_has_specified_files ? path.view() : path.substring_view(base.length() + 1, path.length() - base.length() - 1);
|
2023-01-17 18:26:30 -05:00
|
|
|
if (auto result = handle_file(key, true); result.is_error() && !suppress_errors)
|
|
|
|
warnln("Failed with file {}: {}", key, result.release_error());
|
|
|
|
|
2021-07-10 13:20:44 +04:30
|
|
|
} else {
|
|
|
|
handle_directory(base, path, handle_directory);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!files.size() && !recursive) {
|
|
|
|
char* line = nullptr;
|
|
|
|
size_t line_len = 0;
|
|
|
|
ssize_t nread = 0;
|
|
|
|
ScopeGuard free_line = [line] { free(line); };
|
2021-11-05 00:35:31 +01:00
|
|
|
size_t line_number = 0;
|
2021-07-10 13:20:44 +04:30
|
|
|
while ((nread = getline(&line, &line_len, stdin)) != -1) {
|
|
|
|
VERIFY(nread > 0);
|
|
|
|
if (line[nread - 1] == '\n')
|
|
|
|
--nread;
|
2021-11-05 00:35:31 +01:00
|
|
|
// Human-readable indexes start at 1, so it's fine to increment already.
|
|
|
|
line_number += 1;
|
2021-07-10 13:20:44 +04:30
|
|
|
StringView line_view(line, nread);
|
2022-09-12 16:31:16 +02:00
|
|
|
bool is_binary = line_view.contains('\0');
|
2021-07-10 13:20:44 +04:30
|
|
|
|
|
|
|
if (is_binary && binary_mode == BinaryFileMode::Skip)
|
|
|
|
return 1;
|
2020-11-28 18:10:56 +03:30
|
|
|
|
2022-07-11 17:32:29 +00:00
|
|
|
auto matched = matches(line_view, "stdin"sv, line_number, false, is_binary);
|
2021-07-10 13:20:44 +04:30
|
|
|
did_match_something = did_match_something || matched;
|
|
|
|
if (matched && is_binary && binary_mode == BinaryFileMode::Binary)
|
2021-11-12 22:01:54 +01:00
|
|
|
break;
|
2021-07-10 13:20:44 +04:30
|
|
|
}
|
2021-11-12 22:01:54 +01:00
|
|
|
|
|
|
|
if (count_lines && !quiet_mode)
|
|
|
|
outln("{}", matched_line_count);
|
2021-07-10 13:20:44 +04:30
|
|
|
} else {
|
|
|
|
if (recursive) {
|
2021-09-03 21:41:17 +02:00
|
|
|
if (user_has_specified_files) {
|
|
|
|
for (auto& filename : files) {
|
|
|
|
add_directory(filename, {}, add_directory);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
add_directory(".", {}, add_directory);
|
|
|
|
}
|
2020-11-27 20:44:25 +03:30
|
|
|
|
|
|
|
} else {
|
2021-07-10 13:20:44 +04:30
|
|
|
bool print_filename { files.size() > 1 };
|
|
|
|
for (auto& filename : files) {
|
2023-01-17 18:26:30 -05:00
|
|
|
auto result = handle_file(filename, print_filename);
|
|
|
|
if (result.is_error()) {
|
|
|
|
if (!suppress_errors)
|
|
|
|
warnln("Failed with file {}: {}", filename, result.release_error());
|
2021-07-10 13:20:44 +04:30
|
|
|
return 1;
|
2023-01-17 18:26:30 -05:00
|
|
|
}
|
2021-07-10 13:20:44 +04:30
|
|
|
}
|
2020-11-27 20:44:25 +03:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-10 13:20:44 +04:30
|
|
|
return did_match_something ? 0 : 1;
|
|
|
|
};
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2021-11-13 01:55:47 +01:00
|
|
|
if (use_ere) {
|
|
|
|
Vector<Regex<PosixExtended>> regular_expressions;
|
|
|
|
for (auto pattern : patterns) {
|
2022-10-25 22:11:34 -06:00
|
|
|
auto escaped_pattern = (fixed_strings) ? escape_characters(pattern, ere_special_characters) : pattern;
|
|
|
|
regular_expressions.append(Regex<PosixExtended>(escaped_pattern, options));
|
2021-11-13 01:55:47 +01:00
|
|
|
}
|
|
|
|
return grep_logic(regular_expressions);
|
|
|
|
}
|
2020-11-27 20:44:25 +03:30
|
|
|
|
2021-11-13 01:55:47 +01:00
|
|
|
Vector<Regex<PosixBasic>> regular_expressions;
|
|
|
|
for (auto pattern : patterns) {
|
2022-10-25 22:11:34 -06:00
|
|
|
auto escaped_pattern = (fixed_strings) ? escape_characters(pattern, basic_special_characters) : pattern;
|
|
|
|
dbgln("'{}'", escaped_pattern);
|
|
|
|
regular_expressions.append(Regex<PosixBasic>(escaped_pattern, options));
|
2021-11-13 01:55:47 +01:00
|
|
|
}
|
|
|
|
return grep_logic(regular_expressions);
|
2020-11-27 20:44:25 +03:30
|
|
|
}
|