10 #include <CUnit/CUnit.h>
14 #include "string_operations.h"
18 void test_tokenize() {
19 char* test = g_strdup(
"^foo^^ba^");
21 GArray* token = tokenize(test,
"^");
23 CU_ASSERT_EQUAL(token->len, 2);
24 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).hashedContent, hash(
"foo"));
25 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).length, 3);
26 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).removedBefore, 1);
27 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).hashedContent, hash(
"ba"));
28 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).length, 2);
29 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).removedBefore, 2);
31 g_array_free(token, TRUE);
35 void test_tokenizeWithSpecialDelims() {
36 char* test = g_strdup(
"/*foo \n * bar \n *baz*/ ***booo \n:: qoo \ndnl zit ");
38 GArray* token = tokenize(test,
" \n");
39 CU_ASSERT_EQUAL(token->len, 6);
40 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).hashedContent, hash(
"foo"));
41 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).length, 3);
42 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).removedBefore, 2);
43 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).hashedContent, hash(
"bar"));
44 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).length, 3);
45 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).removedBefore, 5);
46 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).hashedContent, hash(
"baz"));
47 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).length, 3);
48 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).removedBefore, 4);
49 CU_ASSERT_EQUAL(g_array_index(token,
Token, 3).hashedContent, hash(
"booo"));
50 CU_ASSERT_EQUAL(g_array_index(token,
Token, 3).length, 4);
51 CU_ASSERT_EQUAL(g_array_index(token,
Token, 3).removedBefore, 6);
52 CU_ASSERT_EQUAL(g_array_index(token,
Token, 4).hashedContent, hash(
"qoo"));
53 CU_ASSERT_EQUAL(g_array_index(token,
Token, 4).length, 3);
54 CU_ASSERT_EQUAL(g_array_index(token,
Token, 4).removedBefore, 5);
55 CU_ASSERT_EQUAL(g_array_index(token,
Token, 5).hashedContent, hash(
"zit"));
56 CU_ASSERT_EQUAL(g_array_index(token,
Token, 5).length, 3);
57 CU_ASSERT_EQUAL(g_array_index(token,
Token, 5).removedBefore, 6);
58 g_array_free(token, TRUE);
62 void test_streamTokenize() {
63 char* test = g_strdup(
"^foo^^ba^REM^boooREM^REM^");
64 const char* delimiters =
"^";
66 GArray* token = tokens_new();
68 Token* remainder = NULL;
70 size_t len = strlen(test);
76 unsigned int tokenCount = token->len;
77 size_t thisChunkSize =
MIN(chunkSize, len - rea);
79 int addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
81 CU_ASSERT_EQUAL(addedTokens, token->len - tokenCount);
86 streamTokenize(NULL, 0, NULL, &token, &remainder);
88 CU_ASSERT_EQUAL_FATAL(token->len, 3);
89 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).hashedContent, hash(
"foo"));
90 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).length, 3);
91 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).removedBefore, 1);
92 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).hashedContent, hash(
"ba"));
93 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).length, 2);
94 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).removedBefore, 2);
95 #ifndef MONK_CASE_INSENSITIVE
96 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).hashedContent, hash(
"boooREM"));
98 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).hashedContent, hash(
"booorem"));
100 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).length, 7);
101 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).removedBefore, 5);
103 CU_ASSERT_PTR_NULL(remainder);
105 CU_ASSERT_EQUAL(token_position_of(3, token), 20);
111 void test_streamTokenizeEventuallyGivesUp() {
112 char* test = g_strdup(
"^foo^^ba");
113 const char* delimiters =
"^";
115 GArray* token = tokens_new();
117 Token* remainder = NULL;
119 char* endPtr = test + strlen(test);
121 printf(
"test: expecting a warning: ");
124 guint addedTokens = 0;
126 while ((i < 1 << 27) && (*ptr) && (ptr <= endPtr)) {
127 unsigned int tokenCount = token->len;
128 int thisChunkSize =
MIN(chunkSize, endPtr - ptr);
130 addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
132 if (addedTokens == (guint)-1) {
135 if (addedTokens != token->len - tokenCount)
136 CU_FAIL(
"wrong return value from streamTokenize()");
140 streamTokenize(NULL, 0, NULL, &token, &remainder);
142 CU_ASSERT_EQUAL(addedTokens, -1);
144 CU_ASSERT_TRUE(token->len > 0);
146 g_array_free(token, TRUE);
150 void assertTokenPosition(
char*
string, guint count, ...) {
151 char* test = g_strdup(
string);
153 GArray* tokens = tokenize(test,
"^");
155 CU_ASSERT_EQUAL(tokens->len, count);
156 if (tokens->len == count) {
159 va_start(argptr, count);
160 for (
size_t i = 0; i < tokens->len; i++) {
161 size_t expected = va_arg(argptr,
size_t);
162 size_t current = token_position_of(i, tokens);
163 if (current != expected) {
164 printf(
"ASSERT tokenizing '%s': posof(token[%ld]) == %ld != %ld\n",
string, i, current, expected);
165 CU_FAIL(
"see output");
168 CU_ASSERT_EQUAL(current, token_position_of(i, tokens));
172 printf(
"ASSERT tokenizing '%s': token count %d != %d\n",
string, tokens->len, count);
175 g_array_free(tokens, TRUE);
179 void test_tokenPosition() {
180 assertTokenPosition(
"foo", 1, 0);
181 assertTokenPosition(
"^foo", 1, 1);
182 assertTokenPosition(
"^foo^^bar", 2, 1, 6);
183 assertTokenPosition(
"foo^^bar", 2, 0, 5);
184 assertTokenPosition(
"^foo^^bar^^^^^baz", 3, 1, 6, 14);
187 void test_tokenPositionAtEnd() {
188 char* test = g_strdup(
"^^23^5^7");
189 GArray* tokens = tokenize(test,
"^");
191 CU_ASSERT_EQUAL(token_position_of(0, tokens), 2);
192 CU_ASSERT_EQUAL(token_position_of(1, tokens), 5);
193 CU_ASSERT_EQUAL(token_position_of(2, tokens), 7);
194 CU_ASSERT_EQUAL(token_position_of(3, tokens), 8);
196 g_array_free(tokens, TRUE);
200 void test_token_equal() {
201 char* text = g_strdup(
"^foo^^bar^ba^barr");
202 char*
search = g_strdup(
"bar^^foo^");
204 GArray* tokenizedText = tokenize(text,
"^");
205 GArray* tokenizedSearch = tokenize(
search,
"^");
207 Token* t0 = tokens_index(tokenizedText, 0);
208 Token* t1 = tokens_index(tokenizedText, 1);
209 Token* t2 = tokens_index(tokenizedText, 2);
210 Token* t3 = tokens_index(tokenizedText, 3);
211 Token* s0 = tokens_index(tokenizedSearch, 0);
212 Token* s1 = tokens_index(tokenizedSearch, 1);
214 CU_ASSERT_TRUE(tokenEquals(t0, s1));
215 CU_ASSERT_TRUE(tokenEquals(t1, s0));
216 CU_ASSERT_FALSE(tokenEquals(t2, s0));
217 CU_ASSERT_FALSE(tokenEquals(t3, s0));
219 g_array_free(tokenizedText, TRUE);
220 g_array_free(tokenizedSearch, TRUE);
225 CU_TestInfo string_operations_testcases[] = {
226 {
"Testing tokenize:", test_tokenize},
227 {
"Testing tokenize with special delimiters:", test_tokenizeWithSpecialDelims},
228 {
"Testing stream tokenize:", test_streamTokenize},
229 {
"Testing stream tokenize with too long stream:",test_streamTokenizeEventuallyGivesUp},
230 {
"Testing find token position in string:", test_tokenPosition},
231 {
"Testing find token position at end:", test_tokenPositionAtEnd},
232 {
"Testing token equals:", test_token_equal},
#define MIN(a, b)
Min of two.