使用 getline() 从文件中获取行
POSIX C 库定义了 getline()
函数。此函数分配一个缓冲区来保存行内容并返回新行,行中的字符数和缓冲区的大小。
从 example.txt
获取每一行的示例程序:
#include <stdlib.h>
#include <stdio.h>
#define FILENAME "example.txt"
int main(void)
{
/* Open the file for reading */
char *line_buf = NULL;
size_t line_buf_size = 0;
int line_count = 0;
ssize_t line_size;
FILE *fp = fopen(FILENAME, "r");
if (!fp)
{
fprintf(stderr, "Error opening file '%s'\n", FILENAME);
return EXIT_FAILURE;
}
/* Get the first line of the file. */
line_size = getline(&line_buf, &line_buf_size, fp);
/* Loop through until we are done with the file. */
while (line_size >= 0)
{
/* Increment our line count */
line_count++;
/* Show the line details */
printf("line[%06d]: chars=%06zd, buf size=%06zu, contents: %s", line_count,
line_size, line_buf_size, line_buf);
/* Get the next line */
line_size = getline(&line_buf, &line_buf_size, fp);
}
/* Free the allocated line buffer */
free(line_buf);
line_buf = NULL;
/* Close the file now that we are done with it */
fclose(fp);
return EXIT_SUCCESS;
}
输入文件 example.txt
This is a file
which has
multiple lines
with various indentation,
blank lines
a really long line to show that getline() will reallocate the line buffer if the length of a line is too long to fit in the buffer it has been given,
and punctuation at the end of the lines.
输出
line[000001]: chars=000015, buf size=000016, contents: This is a file
line[000002]: chars=000012, buf size=000016, contents: which has
line[000003]: chars=000015, buf size=000016, contents: multiple lines
line[000004]: chars=000030, buf size=000032, contents: with various indentation,
line[000005]: chars=000012, buf size=000032, contents: blank lines
line[000006]: chars=000001, buf size=000032, contents:
line[000007]: chars=000001, buf size=000032, contents:
line[000008]: chars=000001, buf size=000032, contents:
line[000009]: chars=000150, buf size=000160, contents: a really long line to show that getline() will reallocate the line buffer if the length of a line is too long to fit in the buffer it has been given,
line[000010]: chars=000042, buf size=000160, contents: and punctuation at the end of the lines.
line[000011]: chars=000001, buf size=000160, contents:
在该示例中,最初调用 getline()
时未分配缓冲区。在第一次调用期间,getline()
分配一个缓冲区,读取第一行并将该行的内容放在新缓冲区中。在后续调用中,getline()
更新相同的缓冲区,并且仅当缓冲区不再大到足以适合整条线时才重新分配缓冲区。完成文件后,将释放临时缓冲区。
另一种选择是 getdelim()
。除了指定行结束字符外,这与 getline()
相同。仅当文件类型的行的最后一个字符不是’\ n’时才需要这样做。getline()
甚至与 Windows 文本文件一起工作,因为多字节行结束("\r\n")
’\ n’`仍然是该行的最后一个字符。
getline()
的示例实现
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#if !(defined _POSIX_C_SOURCE)
typedef long int ssize_t;
#endif
/* Only include our version of getline() if the POSIX version isn't available. */
#if !(defined _POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809L
#if !(defined SSIZE_MAX)
#define SSIZE_MAX (SIZE_MAX >> 1)
#endif
ssize_t getline(char **pline_buf, size_t *pn, FILE *fin)
{
const size_t INITALLOC = 16;
const size_t ALLOCSTEP = 16;
size_t num_read = 0;
/* First check that none of our input pointers are NULL. */
if ((NULL == pline_buf) || (NULL == pn) || (NULL == fin))
{
errno = EINVAL;
return -1;
}
/* If output buffer is NULL, then allocate a buffer. */
if (NULL == *pline_buf)
{
*pline_buf = malloc(INITALLOC);
if (NULL == *pline_buf)
{
/* Can't allocate memory. */
return -1;
}
else
{
/* Note how big the buffer is at this time. */
*pn = INITALLOC;
}
}
/* Step through the file, pulling characters until either a newline or EOF. */
{
int c;
while (EOF != (c = getc(fin)))
{
/* Note we read a character. */
num_read++;
/* Reallocate the buffer if we need more room */
if (num_read >= *pn)
{
size_t n_realloc = *pn + ALLOCSTEP;
char * tmp = realloc(*pline_buf, n_realloc + 1); /* +1 for the trailing NUL. */
if (NULL != tmp)
{
/* Use the new buffer and note the new buffer size. */
*pline_buf = tmp;
*pn = n_realloc;
}
else
{
/* Exit with error and let the caller free the buffer. */
return -1;
}
/* Test for overflow. */
if (SSIZE_MAX < *pn)
{
errno = ERANGE;
return -1;
}
}
/* Add the character to the buffer. */
(*pline_buf)[num_read - 1] = (char) c;
/* Break from the loop if we hit the ending character. */
if (c == '\n')
{
break;
}
}
/* Note if we hit EOF. */
if (EOF == c)
{
errno = 0;
return -1;
}
}
/* Terminate the string by suffixing NUL. */
(*pline_buf)[num_read] = '\0';
return (ssize_t) num_read;
}
#endif