8 #include "file_operations.h"
17 #include "string_operations.h"
22 int readTokensFromFile(
const char* fileName, GArray** tokens,
const char* delimiters)
24 int fd = open(fileName, O_RDONLY);
27 printf(
"FATAL: can not open %s\n", fileName);
31 *tokens = tokens_new();
33 int needConverter = 1;
34 iconv_t converter = NULL;
36 Token* remainder = NULL;
39 char convertedBuffer[BUFFSIZE];
42 size_t leftFromLast = 0;
43 while ((n = read(fd,
buffer + leftFromLast,
sizeof(
buffer) - leftFromLast)) > 0)
45 size_t len = (size_t) n + leftFromLast;
52 converter = guessConverter(
buffer, len);
58 size_t inputLeft = len;
60 char* output = convertedBuffer;
61 size_t outputLength =
sizeof(convertedBuffer);
62 iconv(converter, &input, &inputLeft, &output, &outputLength);
64 if (outputLength !=
sizeof(convertedBuffer)) {
65 chunk = convertedBuffer;
66 len =
sizeof(convertedBuffer) - outputLength;
68 leftFromLast = inputLeft;
69 for (
size_t i = 0; i < leftFromLast; i++)
75 printf(
"WARNING: cannot re-encode '%s', going binary from now on\n", fileName);
76 iconv_close(converter);
84 int addedTokens = streamTokenize(chunk, len, delimiters, tokens, &remainder);
87 printf(
"WARNING: can not complete tokenizing of '%s'\n", fileName);
92 streamTokenize(
buffer, leftFromLast, delimiters, tokens, &remainder);
93 streamTokenize(NULL, 0, NULL, tokens, &remainder);
99 iconv_close(converter);
char buffer[2048]
The last thing received from the scheduler.