From 11440fa78f0404a377bfbaf74d35904025ae36dc Mon Sep 17 00:00:00 2001 From: Itamar Date: Sat, 12 Sep 2020 20:38:55 +0300 Subject: [PATCH] LibDiff: Add library for working with diffs LibDiff currently contains functionality for parsing diffs in the "unified format" and for a generating simple diff that contains only additions. --- Libraries/CMakeLists.txt | 1 + Libraries/LibDiff/CMakeLists.txt | 8 ++ Libraries/LibDiff/Format.cpp | 43 +++++++++ Libraries/LibDiff/Format.h | 33 +++++++ Libraries/LibDiff/Hunks.cpp | 151 +++++++++++++++++++++++++++++++ Libraries/LibDiff/Hunks.h | 57 ++++++++++++ 6 files changed, 293 insertions(+) create mode 100644 Libraries/LibDiff/CMakeLists.txt create mode 100644 Libraries/LibDiff/Format.cpp create mode 100644 Libraries/LibDiff/Format.h create mode 100644 Libraries/LibDiff/Hunks.cpp create mode 100644 Libraries/LibDiff/Hunks.h diff --git a/Libraries/CMakeLists.txt b/Libraries/CMakeLists.txt index 23aa1451c87..b254c8046bf 100644 --- a/Libraries/CMakeLists.txt +++ b/Libraries/CMakeLists.txt @@ -27,3 +27,4 @@ add_subdirectory(LibTLS) add_subdirectory(LibVT) add_subdirectory(LibWeb) add_subdirectory(LibX86) +add_subdirectory(LibDiff) diff --git a/Libraries/LibDiff/CMakeLists.txt b/Libraries/LibDiff/CMakeLists.txt new file mode 100644 index 00000000000..fe81fedda92 --- /dev/null +++ b/Libraries/LibDiff/CMakeLists.txt @@ -0,0 +1,8 @@ + +set(SOURCES + Hunks.cpp + Format.cpp +) + +serenity_lib(LibDiff diff) +target_link_libraries(LibDiff LibC) diff --git a/Libraries/LibDiff/Format.cpp b/Libraries/LibDiff/Format.cpp new file mode 100644 index 00000000000..d1a5711247c --- /dev/null +++ b/Libraries/LibDiff/Format.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Format.h" +#include +#include +#include + +namespace Diff { +String generate_only_additions(const String& text) +{ + auto lines = text.split('\n', true); // Keep empty + StringBuilder builder; + builder.appendf("@@ -1,%u +1,%u @@\n", lines.size()); + for (const auto& line : lines) { + builder.appendf("+%s\n", line.characters()); + } + return builder.to_string(); +} +}; diff --git a/Libraries/LibDiff/Format.h b/Libraries/LibDiff/Format.h new file mode 100644 index 00000000000..73cbbcf4074 --- /dev/null +++ b/Libraries/LibDiff/Format.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include + +namespace Diff { +String generate_only_additions(const String&); +}; diff --git a/Libraries/LibDiff/Hunks.cpp b/Libraries/LibDiff/Hunks.cpp new file mode 100644 index 00000000000..c7828c2f64b --- /dev/null +++ b/Libraries/LibDiff/Hunks.cpp @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Hunks.h" + +// #define DEBUG_HUNKS + +namespace Diff { +Vector parse_hunks(const String& diff) +{ + Vector diff_lines = diff.split('\n'); + if (diff_lines.is_empty()) + return {}; + + Vector hunks; + + size_t line_index = 0; + HunkLocation current_location {}; + + // Skip to first hunk + while (diff_lines[line_index][0] != '@') { + ++line_index; + } + + while (line_index < diff_lines.size()) { + if (diff_lines[line_index][0] == '@') { + current_location = parse_hunk_location(diff_lines[line_index]); + ++line_index; + continue; + } + if (diff_lines[line_index][0] == ' ') { + current_location.apply_offset(1, HunkLocation::LocationType::Both); + ++line_index; + continue; + } + Hunk hunk {}; + hunk.original_start_line = current_location.original_start_line; + hunk.target_start_line = current_location.target_start_line; + + while (line_index < diff_lines.size() && diff_lines[line_index][0] == '-') { + hunk.removed_lines.append(diff_lines[line_index].substring(1, diff_lines[line_index].length() - 1)); + current_location.apply_offset(1, HunkLocation::LocationType::Original); + ++line_index; + } + while (line_index < diff_lines.size() && diff_lines[line_index][0] == '+') { + hunk.added_lines.append(diff_lines[line_index].substring(1, diff_lines[line_index].length() - 1)); + current_location.apply_offset(1, HunkLocation::LocationType::Target); + ++line_index; + } + + while (line_index < diff_lines.size() && diff_lines[line_index][0] == ' ') { + current_location.apply_offset(1, HunkLocation::LocationType::Both); + ++line_index; + } + hunks.append(hunk); + } + +#ifdef DEBUG_HUNKS + for (const auto& hunk : hunks) { + dbg() << "Hunk location:"; + dbg() << "orig: " << hunk.original_start_line; + dbg() << "target: " << hunk.target_start_line; + dbg() << "removed:"; + for (const auto& line : hunk.removed_lines) { + dbg() << "- " << line; + } + dbg() << "added:"; + for (const auto& line : hunk.added_lines) { + dbg() << "+ " << line; + } + } +#endif + + return hunks; +} + +HunkLocation parse_hunk_location(const String& location_line) +{ + size_t char_index = 0; + struct StartAndLength { + size_t start { 0 }; + size_t length { 0 }; + }; + auto parse_start_and_length_pair = [](const String& raw) { + auto index_of_separator = raw.index_of(",").value(); + auto start = raw.substring(0, index_of_separator); + auto length = raw.substring(index_of_separator + 1, raw.length() - index_of_separator - 1); + auto res = StartAndLength { start.to_uint().value() - 1, length.to_uint().value() - 1 }; + return res; + }; + while (char_index < location_line.length() && location_line[char_index++] != '-') { + } + ASSERT(char_index < location_line.length()); + + size_t original_location_start_index = char_index; + + while (char_index < location_line.length() && location_line[char_index++] != ' ') { + } + ASSERT(char_index < location_line.length() && location_line[char_index] == '+'); + size_t original_location_end_index = char_index - 2; + + size_t target_location_start_index = char_index + 1; + + char_index += 1; + while (char_index < location_line.length() && location_line[char_index++] != ' ') { + } + ASSERT(char_index < location_line.length()); + + size_t target_location_end_index = char_index - 2; + + auto original_pair = parse_start_and_length_pair(location_line.substring(original_location_start_index, original_location_end_index - original_location_start_index + 1)); + auto target_pair = parse_start_and_length_pair(location_line.substring(target_location_start_index, target_location_end_index - target_location_start_index + 1)); + return { original_pair.start, original_pair.length, target_pair.start, target_pair.length }; +} + +void HunkLocation::apply_offset(size_t offset, HunkLocation::LocationType type) +{ + if (type == LocationType::Original || type == LocationType::Both) { + original_start_line += offset; + original_length -= offset; + } + if (type == LocationType::Target || type == LocationType::Both) { + target_start_line += offset; + target_length -= offset; + } +} + +}; diff --git a/Libraries/LibDiff/Hunks.h b/Libraries/LibDiff/Hunks.h new file mode 100644 index 00000000000..a24dd2e72c6 --- /dev/null +++ b/Libraries/LibDiff/Hunks.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020, Itamar S. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include + +namespace Diff { + +struct HunkLocation { + size_t original_start_line { 0 }; + size_t original_length { 0 }; + size_t target_start_line { 0 }; + size_t target_length { 0 }; + + enum class LocationType { + Original, + Target, + Both + }; + void apply_offset(size_t offset, LocationType); +}; + +struct Hunk { + size_t original_start_line { 0 }; + size_t target_start_line { 0 }; + Vector removed_lines; + Vector added_lines; +}; + +Vector parse_hunks(const String& diff); +HunkLocation parse_hunk_location(const String& location_line); +};