ladybird/Userland/Libraries/LibDiff/Generator.cpp
Shannon Booth b738929195 LibDiff: Fix wrong index used when prepending context lines
`i` is used as the index for 'old lines' in diff generation, not 'new
lines'. Using the wrong index would mean that for certain diffs the
prefixed context information would have wrong content, and could even
result in a crash.

Fix this, and add a test for an input which was previously crashing.
2023-09-11 12:10:50 +01:00

165 lines
5.9 KiB
C++

/*
* Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
* Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "Generator.h"
namespace Diff {
ErrorOr<Vector<Hunk>> from_text(StringView old_text, StringView new_text, size_t context)
{
auto old_lines = old_text.lines();
auto new_lines = new_text.lines();
/**
* This is a simple implementation of the Longest Common Subsequence algorithm (over
* the lines of the text as opposed to the characters). A Dynamic programming approach
* is used here.
*/
enum class Direction {
Down, // Added a new line
Right, // Removed a line
Diagonal, // Line remained the same
};
// A single cell in the DP-matrix. Cell (i, j) represents the longest common
// sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
struct Cell {
size_t length;
Direction direction;
};
auto dp_matrix = Vector<Cell>();
TRY(dp_matrix.try_resize((old_lines.size() + 1) * (new_lines.size() + 1)));
auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
return dp_matrix[i + width * j];
};
// Initialize the first row and column
for (size_t i = 0; i <= old_lines.size(); ++i)
dp(i, new_lines.size()) = { 0, Direction::Right };
for (size_t j = 0; j <= new_lines.size(); ++j)
dp(old_lines.size(), 0) = { 0, Direction::Down };
// Fill in the rest of the DP table
for (int i = old_lines.size() - 1; i >= 0; --i) {
for (int j = new_lines.size() - 1; j >= 0; --j) {
if (old_lines[i] == new_lines[j]) {
dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
} else {
auto down = dp(i, j + 1).length;
auto right = dp(i + 1, j).length;
if (down > right)
dp(i, j) = { down, Direction::Down };
else
dp(i, j) = { right, Direction::Right };
}
}
}
Vector<Hunk> hunks;
Hunk cur_hunk;
auto flush_hunk = [&]() -> ErrorOr<void> {
// A file with no content has a zero indexed start line.
if (cur_hunk.location.new_range.start_line != 0 || cur_hunk.location.new_range.number_of_lines != 0)
cur_hunk.location.new_range.start_line++;
if (cur_hunk.location.old_range.start_line != 0 || cur_hunk.location.old_range.number_of_lines != 0)
cur_hunk.location.old_range.start_line++;
TRY(hunks.try_append(cur_hunk));
cur_hunk.lines.clear();
return {};
};
size_t i = 0;
size_t j = 0;
auto set_up_hunk_prepended_with_context = [&](Hunk& hunk) -> ErrorOr<void> {
// Prefix the hunk with requested number context lines, and set the hunk location to where that context begins.
size_t available_context = min(i, context);
hunk.location.old_range = { i - available_context, available_context };
hunk.location.new_range = { j - available_context, available_context };
for (size_t offset = 0; offset < available_context; ++offset) {
size_t context_line = i + offset - available_context;
TRY(hunk.lines.try_append(Line { Line::Operation::Context, TRY(String::from_utf8(old_lines[context_line])) }));
}
return {};
};
size_t current_context = 0;
while (i < old_lines.size() && j < new_lines.size()) {
auto& cell = dp(i, j);
if (cell.direction == Direction::Down) {
if (cur_hunk.lines.is_empty())
TRY(set_up_hunk_prepended_with_context(cur_hunk));
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) }));
cur_hunk.location.new_range.number_of_lines++;
++j;
current_context = 0;
} else if (cell.direction == Direction::Right) {
if (cur_hunk.lines.is_empty())
TRY(set_up_hunk_prepended_with_context(cur_hunk));
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) }));
cur_hunk.location.old_range.number_of_lines++;
++i;
current_context = 0;
} else {
if (!cur_hunk.lines.is_empty()) {
// We're currently in the middle of generating a hunk and have found a context line. If we have already added
// the number of context lines that were requested then we have already finished with this hunk. Otherwise we
// need to continue looking through the hunk until we have located the requested number of context lines in a
// row.
if (current_context == context) {
TRY(flush_hunk());
} else {
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Context, TRY(String::from_utf8(old_lines[i])) }));
cur_hunk.location.new_range.number_of_lines++;
cur_hunk.location.old_range.number_of_lines++;
}
++current_context;
}
++i;
++j;
}
}
while (i < old_lines.size()) {
if (cur_hunk.lines.is_empty())
TRY(set_up_hunk_prepended_with_context(cur_hunk));
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) }));
cur_hunk.location.old_range.number_of_lines++;
++i;
}
while (j < new_lines.size()) {
if (cur_hunk.lines.is_empty())
TRY(set_up_hunk_prepended_with_context(cur_hunk));
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) }));
cur_hunk.location.new_range.number_of_lines++;
++j;
}
if (!cur_hunk.lines.is_empty())
TRY(flush_hunk());
return hunks;
}
}