使用 getline() 從檔案中獲取行
POSIX C 庫定義了 getline()
函式。此函式分配一個緩衝區來儲存行內容並返回新行,行中的字元數和緩衝區的大小。
從 example.txt
獲取每一行的示例程式:
#include <stdlib.h>
#include <stdio.h>
#define FILENAME "example.txt"
int main(void)
{
/* Open the file for reading */
char *line_buf = NULL;
size_t line_buf_size = 0;
int line_count = 0;
ssize_t line_size;
FILE *fp = fopen(FILENAME, "r");
if (!fp)
{
fprintf(stderr, "Error opening file '%s'\n", FILENAME);
return EXIT_FAILURE;
}
/* Get the first line of the file. */
line_size = getline(&line_buf, &line_buf_size, fp);
/* Loop through until we are done with the file. */
while (line_size >= 0)
{
/* Increment our line count */
line_count++;
/* Show the line details */
printf("line[%06d]: chars=%06zd, buf size=%06zu, contents: %s", line_count,
line_size, line_buf_size, line_buf);
/* Get the next line */
line_size = getline(&line_buf, &line_buf_size, fp);
}
/* Free the allocated line buffer */
free(line_buf);
line_buf = NULL;
/* Close the file now that we are done with it */
fclose(fp);
return EXIT_SUCCESS;
}
輸入檔案 example.txt
This is a file
which has
multiple lines
with various indentation,
blank lines
a really long line to show that getline() will reallocate the line buffer if the length of a line is too long to fit in the buffer it has been given,
and punctuation at the end of the lines.
輸出
line[000001]: chars=000015, buf size=000016, contents: This is a file
line[000002]: chars=000012, buf size=000016, contents: which has
line[000003]: chars=000015, buf size=000016, contents: multiple lines
line[000004]: chars=000030, buf size=000032, contents: with various indentation,
line[000005]: chars=000012, buf size=000032, contents: blank lines
line[000006]: chars=000001, buf size=000032, contents:
line[000007]: chars=000001, buf size=000032, contents:
line[000008]: chars=000001, buf size=000032, contents:
line[000009]: chars=000150, buf size=000160, contents: a really long line to show that getline() will reallocate the line buffer if the length of a line is too long to fit in the buffer it has been given,
line[000010]: chars=000042, buf size=000160, contents: and punctuation at the end of the lines.
line[000011]: chars=000001, buf size=000160, contents:
在該示例中,最初呼叫 getline()
時未分配緩衝區。在第一次呼叫期間,getline()
分配一個緩衝區,讀取第一行並將該行的內容放在新緩衝區中。在後續呼叫中,getline()
更新相同的緩衝區,並且僅當緩衝區不再大到足以適合整條線時才重新分配緩衝區。完成檔案後,將釋放臨時緩衝區。
另一種選擇是 getdelim()
。除了指定行結束字元外,這與 getline()
相同。僅當檔案型別的行的最後一個字元不是’\ n’時才需要這樣做。getline()
甚至與 Windows 文字檔案一起工作,因為多位元組行結束("\r\n")
’\ n’`仍然是該行的最後一個字元。
getline()
的示例實現
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#if !(defined _POSIX_C_SOURCE)
typedef long int ssize_t;
#endif
/* Only include our version of getline() if the POSIX version isn't available. */
#if !(defined _POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809L
#if !(defined SSIZE_MAX)
#define SSIZE_MAX (SIZE_MAX >> 1)
#endif
ssize_t getline(char **pline_buf, size_t *pn, FILE *fin)
{
const size_t INITALLOC = 16;
const size_t ALLOCSTEP = 16;
size_t num_read = 0;
/* First check that none of our input pointers are NULL. */
if ((NULL == pline_buf) || (NULL == pn) || (NULL == fin))
{
errno = EINVAL;
return -1;
}
/* If output buffer is NULL, then allocate a buffer. */
if (NULL == *pline_buf)
{
*pline_buf = malloc(INITALLOC);
if (NULL == *pline_buf)
{
/* Can't allocate memory. */
return -1;
}
else
{
/* Note how big the buffer is at this time. */
*pn = INITALLOC;
}
}
/* Step through the file, pulling characters until either a newline or EOF. */
{
int c;
while (EOF != (c = getc(fin)))
{
/* Note we read a character. */
num_read++;
/* Reallocate the buffer if we need more room */
if (num_read >= *pn)
{
size_t n_realloc = *pn + ALLOCSTEP;
char * tmp = realloc(*pline_buf, n_realloc + 1); /* +1 for the trailing NUL. */
if (NULL != tmp)
{
/* Use the new buffer and note the new buffer size. */
*pline_buf = tmp;
*pn = n_realloc;
}
else
{
/* Exit with error and let the caller free the buffer. */
return -1;
}
/* Test for overflow. */
if (SSIZE_MAX < *pn)
{
errno = ERANGE;
return -1;
}
}
/* Add the character to the buffer. */
(*pline_buf)[num_read - 1] = (char) c;
/* Break from the loop if we hit the ending character. */
if (c == '\n')
{
break;
}
}
/* Note if we hit EOF. */
if (EOF == c)
{
errno = 0;
return -1;
}
}
/* Terminate the string by suffixing NUL. */
(*pline_buf)[num_read] = '\0';
return (ssize_t) num_read;
}
#endif