Shell: Make escaping more intelligent

Instead of the previous only-escape-with-backslashes, extend the
escaping to one of:
- No escape
- Escape with backslash
- Escape with "\xhh" if control character that isn't easily represented
  as \X
- Escape with "\uhhhhhhhh" if unicode character that is too big to
  represent as "\xhh".

Fixes #6986.
This commit is contained in:
Ali Mohammad Pur 2021-05-10 11:09:40 +04:30 committed by Andreas Kling
parent 22b244df45
commit 417910fd28
2 changed files with 93 additions and 21 deletions

View file

@ -1142,9 +1142,9 @@ String Shell::escape_token_for_double_quotes(const String& token)
return builder.build();
}
bool Shell::is_special(char c)
Shell::SpecialCharacterEscapeMode Shell::special_character_escape_mode(u32 code_point)
{
switch (c) {
switch (code_point) {
case '\'':
case '"':
case '$':
@ -1156,25 +1156,72 @@ bool Shell::is_special(char c)
case '{':
case '}':
case '&':
case ';':
case '\\':
case ' ':
return true;
return SpecialCharacterEscapeMode::Escaped;
case '\n':
case '\t':
case '\r':
return SpecialCharacterEscapeMode::QuotedAsEscape;
default:
return false;
// FIXME: Should instead use unicode's "graphic" property (categories L, M, N, P, S, Zs)
if (code_point < NumericLimits<i32>::max()) {
if (isascii(static_cast<i32>(code_point)))
return isprint(static_cast<i32>(code_point)) ? SpecialCharacterEscapeMode::Untouched : SpecialCharacterEscapeMode::QuotedAsHex;
}
return SpecialCharacterEscapeMode::Untouched;
}
}
String Shell::escape_token(const String& token)
{
StringBuilder builder;
auto do_escape = [](auto& token) {
StringBuilder builder;
for (auto c : token) {
static_assert(sizeof(c) == sizeof(u32) || sizeof(c) == sizeof(u8));
switch (special_character_escape_mode(c)) {
case SpecialCharacterEscapeMode::Untouched:
if constexpr (sizeof(c) == sizeof(u8))
builder.append(c);
else
builder.append(Utf32View { &c, 1 });
break;
case SpecialCharacterEscapeMode::Escaped:
builder.append('\\');
builder.append(c);
break;
case SpecialCharacterEscapeMode::QuotedAsEscape:
switch (c) {
case '\n':
builder.append(R"("\n")");
break;
case '\t':
builder.append(R"("\t")");
break;
case '\r':
builder.append(R"("\r")");
break;
default:
VERIFY_NOT_REACHED();
}
break;
case SpecialCharacterEscapeMode::QuotedAsHex:
if (c <= NumericLimits<u8>::max())
builder.appendff(R"("\x{:0>2x}")", static_cast<u8>(c));
else
builder.appendff(R"("\u{:0>8x}")", static_cast<u32>(c));
break;
}
}
for (auto c : token) {
if (is_special(c))
builder.append('\\');
builder.append(c);
}
return builder.build();
};
return builder.build();
Utf8View view { token };
if (view.validate())
return do_escape(view);
return do_escape(token);
}
String Shell::unescape_token(const String& token)
@ -2057,5 +2104,4 @@ SavedFileDescriptors::~SavedFileDescriptors()
}
}
}
}

View file

@ -156,7 +156,13 @@ public:
static String escape_token_for_single_quotes(const String& token);
static String escape_token(const String& token);
static String unescape_token(const String& token);
static bool is_special(char c);
enum class SpecialCharacterEscapeMode {
Untouched,
Escaped,
QuotedAsEscape,
QuotedAsHex,
};
static SpecialCharacterEscapeMode special_character_escape_mode(u32 c);
static bool is_glob(const StringView&);
static Vector<StringView> split_path(const StringView&);
@ -352,17 +358,37 @@ inline size_t find_offset_into_node(const String& unescaped_text, size_t escaped
{
size_t unescaped_offset = 0;
size_t offset = 0;
for (auto& c : unescaped_text) {
if (offset == escaped_offset)
return unescaped_offset;
auto do_find_offset = [&](auto& unescaped_text) {
for (auto c : unescaped_text) {
if (offset == escaped_offset)
return unescaped_offset;
if (Shell::is_special(c))
switch (Shell::special_character_escape_mode(c)) {
case Shell::SpecialCharacterEscapeMode::Untouched:
break;
case Shell::SpecialCharacterEscapeMode::Escaped:
++offset; // X -> \X
break;
case Shell::SpecialCharacterEscapeMode::QuotedAsEscape:
offset += 3; // X -> "\Y"
break;
case Shell::SpecialCharacterEscapeMode::QuotedAsHex:
if (c > NumericLimits<u8>::max())
offset += 11; // X -> "\uhhhhhhhh"
else
offset += 5; // X -> "\xhh"
break;
}
++offset;
++offset;
++unescaped_offset;
}
++unescaped_offset;
}
return unescaped_offset;
};
return unescaped_offset;
Utf8View view { unescaped_text };
if (view.validate())
return do_find_offset(view);
return do_find_offset(unescaped_text);
}
}