FOSSology  4.4.0
Open Source License Compliance by Open Source Software
test_string_operations.c
1 /*
2  Author: Daniele Fognini, Andreas Wuerl
3  SPDX-FileCopyrightText: © 2013-2014 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <CUnit/CUnit.h>
11 #include <stdarg.h>
12 #include <stdint.h>
13 
14 #include "string_operations.h"
15 #include "hash.h"
16 #include "monk.h"
17 
18 void test_tokenize() {
19  char* test = g_strdup("^foo^^ba^");
20 
21  GArray* token = tokenize(test, "^");
22 
23  CU_ASSERT_EQUAL(token->len, 2);
24  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).hashedContent, hash("foo"));
25  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).length, 3);
26  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).removedBefore, 1);
27  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).hashedContent, hash("ba"));
28  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).length, 2);
29  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).removedBefore, 2);
30 
31  g_array_free(token, TRUE);
32  g_free(test);
33 }
34 
35 void test_tokenizeWithSpecialDelims() {
36  char* test = g_strdup("/*foo \n * bar \n *baz*/ ***booo \n:: qoo \ndnl zit ");
37 
38  GArray* token = tokenize(test, " \n");
39  CU_ASSERT_EQUAL(token->len, 6);
40  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).hashedContent, hash("foo"));
41  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).length, 3);
42  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).removedBefore, 2);
43  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).hashedContent, hash("bar"));
44  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).length, 3);
45  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).removedBefore, 5);
46  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).hashedContent, hash("baz"));
47  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).length, 3);
48  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).removedBefore, 4);
49  CU_ASSERT_EQUAL(g_array_index(token, Token, 3).hashedContent, hash("booo"));
50  CU_ASSERT_EQUAL(g_array_index(token, Token, 3).length, 4);
51  CU_ASSERT_EQUAL(g_array_index(token, Token, 3).removedBefore, 6);
52  CU_ASSERT_EQUAL(g_array_index(token, Token, 4).hashedContent, hash("qoo"));
53  CU_ASSERT_EQUAL(g_array_index(token, Token, 4).length, 3);
54  CU_ASSERT_EQUAL(g_array_index(token, Token, 4).removedBefore, 5);
55  CU_ASSERT_EQUAL(g_array_index(token, Token, 5).hashedContent, hash("zit"));
56  CU_ASSERT_EQUAL(g_array_index(token, Token, 5).length, 3);
57  CU_ASSERT_EQUAL(g_array_index(token, Token, 5).removedBefore, 6);
58  g_array_free(token, TRUE);
59  g_free(test);
60 }
61 
62 void test_streamTokenize() {
63  char* test = g_strdup("^foo^^ba^REM^boooREM^REM^");
64  const char* delimiters = "^";
65 
66  GArray* token = tokens_new();
67 
68  Token* remainder = NULL;
69 
70  size_t len = strlen(test);
71 
72  size_t chunkSize = 2;
73  char* ptr = test;
74  size_t rea = 0;
75  while (rea < len) {
76  unsigned int tokenCount = token->len;
77  size_t thisChunkSize = MIN(chunkSize, len - rea);
78 
79  int addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
80 
81  CU_ASSERT_EQUAL(addedTokens, token->len - tokenCount);
82 
83  ptr += chunkSize;
84  rea += chunkSize;
85  }
86  streamTokenize(NULL, 0, NULL, &token, &remainder);
87 
88  CU_ASSERT_EQUAL_FATAL(token->len, 3);
89  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).hashedContent, hash("foo"));
90  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).length, 3);
91  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).removedBefore, 1);
92  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).hashedContent, hash("ba"));
93  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).length, 2);
94  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).removedBefore, 2);
95 #ifndef MONK_CASE_INSENSITIVE
96  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).hashedContent, hash("boooREM"));
97 #else
98  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).hashedContent, hash("booorem"));
99 #endif
100  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).length, 7);
101  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).removedBefore, 5);
102 
103  CU_ASSERT_PTR_NULL(remainder);
104 
105  CU_ASSERT_EQUAL(token_position_of(3, token), 20);
106 
107  tokens_free(token);
108  g_free(test);
109 }
110 
111 void test_streamTokenizeEventuallyGivesUp() {
112  char* test = g_strdup("^foo^^ba");
113  const char* delimiters = "^";
114 
115  GArray* token = tokens_new();
116 
117  Token* remainder = NULL;
118 
119  char* endPtr = test + strlen(test);
120 
121  printf("test: expecting a warning: ");
122  int chunkSize = 5;
123  char* ptr = test;
124  guint addedTokens = 0;
125  uint32_t i = 0;
126  while ((i < 1 << 27) && (*ptr) && (ptr <= endPtr)) {
127  unsigned int tokenCount = token->len;
128  int thisChunkSize = MIN(chunkSize, endPtr - ptr);
129 
130  addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
131 
132  if (addedTokens == (guint)-1) {
133  break;
134  } else
135  if (addedTokens != token->len - tokenCount)
136  CU_FAIL("wrong return value from streamTokenize()");
137 
138  i++;
139  }
140  streamTokenize(NULL, 0, NULL, &token, &remainder);
141 
142  CU_ASSERT_EQUAL(addedTokens, -1);
143 
144  CU_ASSERT_TRUE(token->len > 0);
145 
146  g_array_free(token, TRUE);
147  g_free(test);
148 }
149 
150 void assertTokenPosition(char* string, guint count, ...) {
151  char* test = g_strdup(string);
152 
153  GArray* tokens = tokenize(test, "^");
154 
155  CU_ASSERT_EQUAL(tokens->len, count);
156  if (tokens->len == count) {
157 
158  va_list argptr;
159  va_start(argptr, count);
160  for (size_t i = 0; i < tokens->len; i++) {
161  size_t expected = va_arg(argptr, size_t);
162  size_t current = token_position_of(i, tokens);
163  if (current != expected) {
164  printf("ASSERT tokenizing '%s': posof(token[%ld]) == %ld != %ld\n", string, i, current, expected);
165  CU_FAIL("see output");
166  break;
167  }
168  CU_ASSERT_EQUAL(current, token_position_of(i, tokens));
169  }
170  va_end(argptr);
171  } else {
172  printf("ASSERT tokenizing '%s': token count %d != %d\n", string, tokens->len, count);
173  }
174 
175  g_array_free(tokens, TRUE);
176  g_free(test);
177 }
178 
179 void test_tokenPosition() {
180  assertTokenPosition("foo", 1, 0);
181  assertTokenPosition("^foo", 1, 1);
182  assertTokenPosition("^foo^^bar", 2, 1, 6);
183  assertTokenPosition("foo^^bar", 2, 0, 5);
184  assertTokenPosition("^foo^^bar^^^^^baz", 3, 1, 6, 14);
185 }
186 
187 void test_tokenPositionAtEnd() {
188  char* test = g_strdup("^^23^5^7");
189  GArray* tokens = tokenize(test, "^");
190 
191  CU_ASSERT_EQUAL(token_position_of(0, tokens), 2);
192  CU_ASSERT_EQUAL(token_position_of(1, tokens), 5);
193  CU_ASSERT_EQUAL(token_position_of(2, tokens), 7);
194  CU_ASSERT_EQUAL(token_position_of(3, tokens), 8);
195 
196  g_array_free(tokens, TRUE);
197  g_free(test);
198 }
199 
200 void test_token_equal() {
201  char* text = g_strdup("^foo^^bar^ba^barr");
202  char* search = g_strdup("bar^^foo^");
203 
204  GArray* tokenizedText = tokenize(text, "^");
205  GArray* tokenizedSearch = tokenize(search, "^");
206 
207  Token* t0 = tokens_index(tokenizedText, 0);
208  Token* t1 = tokens_index(tokenizedText, 1);
209  Token* t2 = tokens_index(tokenizedText, 2);
210  Token* t3 = tokens_index(tokenizedText, 3);
211  Token* s0 = tokens_index(tokenizedSearch, 0);
212  Token* s1 = tokens_index(tokenizedSearch, 1);
213 
214  CU_ASSERT_TRUE(tokenEquals(t0, s1)); // foo == foo
215  CU_ASSERT_TRUE(tokenEquals(t1, s0)); // bar == bar
216  CU_ASSERT_FALSE(tokenEquals(t2, s0)); // ba != bar
217  CU_ASSERT_FALSE(tokenEquals(t3, s0)); // barr != bar
218 
219  g_array_free(tokenizedText, TRUE);
220  g_array_free(tokenizedSearch, TRUE);
221  g_free(text);
222  g_free(search);
223 }
224 
225 CU_TestInfo string_operations_testcases[] = {
226  {"Testing tokenize:", test_tokenize},
227  {"Testing tokenize with special delimiters:", test_tokenizeWithSpecialDelims},
228  {"Testing stream tokenize:", test_streamTokenize},
229  {"Testing stream tokenize with too long stream:",test_streamTokenizeEventuallyGivesUp},
230  {"Testing find token position in string:", test_tokenPosition},
231  {"Testing find token position at end:", test_tokenPositionAtEnd},
232  {"Testing token equals:", test_token_equal},
233  CU_TEST_INFO_NULL
234 };
#define MIN(a, b)
Min of two.
Definition: licenses.c:64