Userland: Refactor head(1) to not use stdio

This fixes extensive copying data around, and also makes head(1) in
bytes mode read exactly as much data as it needs.

Also, rename --characters to --bytes: that's exactly what it does
(actual character counting is way more complicated), and that's what
the option is called in GNU coreutils.

Fixes https://github.com/SerenityOS/serenity/issues/6852
This commit is contained in:
Sergey Bugaev 2021-05-04 18:41:38 +03:00 committed by Andreas Kling
parent d288f6654e
commit 72125827da

View file

@ -4,14 +4,16 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/ScopeGuard.h>
#include <AK/StdLibExtras.h>
#include <LibCore/ArgsParser.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
int head(const String& filename, bool print_filename, int line_count, int char_count);
int head(const String& filename, bool print_filename, ssize_t line_count, ssize_t byte_count);
int main(int argc, char** argv)
{
@ -20,8 +22,8 @@ int main(int argc, char** argv)
return 1;
}
int line_count = 0;
int char_count = 0;
int line_count = -1;
int byte_count = -1;
bool never_print_filenames = false;
bool always_print_filenames = false;
Vector<const char*> files;
@ -29,13 +31,13 @@ int main(int argc, char** argv)
Core::ArgsParser args_parser;
args_parser.set_general_help("Print the beginning ('head') of a file.");
args_parser.add_option(line_count, "Number of lines to print (default 10)", "lines", 'n', "number");
args_parser.add_option(char_count, "Number of characters to print", "characters", 'c', "number");
args_parser.add_option(byte_count, "Number of bytes to print", "bytes", 'c', "number");
args_parser.add_option(never_print_filenames, "Never print filenames", "quiet", 'q');
args_parser.add_option(always_print_filenames, "Always print filenames", "verbose", 'v');
args_parser.add_positional_argument(files, "File to process", "file", Core::ArgsParser::Required::No);
args_parser.parse(argc, argv);
if (line_count == 0 && char_count == 0) {
if (line_count == -1 && byte_count == -1) {
line_count = 10;
}
@ -46,13 +48,13 @@ int main(int argc, char** argv)
print_filenames = false;
if (files.is_empty()) {
return head("", print_filenames, line_count, char_count);
return head("", print_filenames, line_count, byte_count);
}
int rc = 0;
for (auto& file : files) {
if (head(file, print_filenames, line_count, char_count) != 0) {
if (head(file, print_filenames, line_count, byte_count) != 0) {
rc = 1;
}
}
@ -60,17 +62,22 @@ int main(int argc, char** argv)
return rc;
}
int head(const String& filename, bool print_filename, int line_count, int char_count)
int head(const String& filename, bool print_filename, ssize_t line_count, ssize_t byte_count)
{
bool is_stdin = false;
FILE* fp = nullptr;
int fd = -1;
ScopeGuard fd_close_guard = [&fd] {
if (fd > 0)
close(fd);
};
if (filename == "" || filename == "-") {
fp = stdin;
fd = 0;
is_stdin = true;
} else {
fp = fopen(filename.characters(), "r");
if (!fp) {
fd = open(filename.characters(), O_RDONLY);
if (fd < 0) {
fprintf(stderr, "can't open %s for reading: %s\n", filename.characters(), strerror(errno));
return 1;
}
@ -84,56 +91,55 @@ int head(const String& filename, bool print_filename, int line_count, int char_c
}
}
if (line_count) {
for (int line = 0; line < line_count; ++line) {
char buffer[BUFSIZ];
auto* str = fgets(buffer, sizeof(buffer), fp);
if (!str)
break;
fflush(stdout);
// specifically use fputs rather than puts, because fputs doesn't add
// its own newline.
fputs(str, stdout);
size_t buffer_size = line_count != -1 ? BUFSIZ : min((size_t)BUFSIZ, (size_t)byte_count);
char buffer[buffer_size];
while (line_count > 0 || byte_count > 0) {
size_t ntoread = line_count != -1 ? buffer_size : min(buffer_size, (size_t)byte_count);
ssize_t nread = read(fd, buffer, ntoread);
if (nread < 0) {
perror("read");
return 1;
} else if (nread == 0) {
break;
}
} else if (char_count) {
char buffer[BUFSIZ];
while (char_count) {
int nread = fread(buffer, 1, min(BUFSIZ, char_count), fp);
if (nread > 0) {
int ncomplete = 0;
while (ncomplete < nread) {
int nwrote = fwrite(&buffer[ncomplete], 1, nread - ncomplete, stdout);
if (nwrote > 0)
ncomplete += nwrote;
if (feof(stdout)) {
fprintf(stderr, "unexpected eof writing to stdout\n");
return 1;
}
if (ferror(stdout)) {
fprintf(stderr, "error writing to stdout\n");
return 1;
}
size_t ntowrite;
if (byte_count != -1) {
// Write out everything we've read, since we have explicitly ensured
// that we wouldn't read more than we want to write.
ntowrite = nread;
byte_count -= nread;
} else {
// Count line breaks.
ntowrite = 0;
while (line_count) {
const char* newline = strchr(buffer + ntowrite, '\n');
if (newline) {
// Found another line break, include this line.
ntowrite = newline - buffer + 1;
line_count--;
} else {
// No more line breaks, write the whole thing.
ntowrite = nread;
break;
}
}
}
char_count -= nread;
if (feof(fp))
break;
if (ferror(fp)) {
fprintf(stderr, "error reading input\n");
break;
size_t ncomplete = 0;
while (ncomplete < ntowrite) {
ssize_t nwritten = write(1, buffer + ncomplete, ntowrite - ncomplete);
if (nwritten < 0) {
perror("write");
return 1;
}
ncomplete += nwritten;
}
}
fclose(fp);
if (print_filename) {
puts("");
}