FOSSology  4.4.0
Open Source License Compliance by Open Source Software
nomos_regex.c
Go to the documentation of this file.
1 /*
2  SPDX-FileCopyrightText: © 2006-2011 Hewlett-Packard Development Company, L.P.
3  SPDX-FileCopyrightText: © 2014 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 //#define DEBUG_TNG
8 #ifndef DEBUG_TNG
9 #define CALL_IF_DEBUG_MODE(x)
10 #else
11 #define CALL_IF_DEBUG_MODE(x) x
12 #endif
13 
14 #include "nomos_regex.h"
15 #include "nomos_gap.h"
16 #include "nomos_utils.h"
25 static char regexErrbuf[myBUFSIZ];
26 
27 regex_t idx_regc[NFOOTPRINTS];
28 regex_t regc[NFOOTPRINTS];
29 
38 void regexError(int ret, regex_t *regc, char *regex)
39 {
40 #ifdef PROC_TRACE
41  traceFunc("== regexError(%d, %p, %s)\n", ret, regc, regex);
42 #endif /* PROC_TRACE */
43 
44  (void) regerror(ret, regc, regexErrbuf, sizeof(regexErrbuf));
45  Msg("regex = \"%s\"\n", regex);
46  LOG_FATAL("regcomp failure: %s", regexErrbuf)
47  Bail(-__LINE__);
48 }
49 
56 int endsIn(char *s, char *suffix)
57 {
58  int slen = (int) strlen(s);
59  int sufflen = (int) strlen(suffix);
60  /*
61  * compare trailing chars in a string with a constant (should be faster
62  * than calling regcomp() and regexec()!)
63  */
64 #ifdef PROC_TRACE
65  traceFunc("== endsIn(%s, %s)\n", s, suffix);
66 #endif /* PROC_TRACE */
67 
68  if (strncasecmp(s + slen - sufflen, suffix, (size_t) sufflen) == 0)
69  {
70  return (1);
71  }
72  return (0);
73 }
74 
81 int lineInFile(char *pathname, char *regex)
82 {
83  char buf[myBUFSIZ];
84 
85 #ifdef PROC_TRACE
86  traceFunc("== lineInFile(%s, \"%s\")\n", pathname, regex);
87 #endif /* PROC_TRACE */
88 
89  (void) sprintf(buf, "^%s$", regex);
90  return (textInFile(pathname, buf, REG_NEWLINE));
91 }
92 
100 int textInFile(char *pathname, char *regex, int flags)
101 {
102  char *textp;
103  int ret;
104 
105 #ifdef PROC_TRACE
106  traceFunc("== textInFile(%s, \"%s\", 0x%x)\n", pathname, regex, flags);
107 #endif /* PROC_TRACE */
108 
109  if ((pathname == NULL_STR ) || (regex == NULL_STR ))
110  {
111 #ifdef QA_CHECKS
112  if (pathname == NULL_STR)
113  {
114  Assert(NO, "textInFile: NULL pathname");
115  }
116  if (regex == NULL_STR)
117  {
118  Assert(NO, "textInFile: NULL regex");
119  }
120 #endif /* QA_CHECKS */
121  return (0);
122  }
123  if ((textp = mmapFile(pathname)) == NULL_STR)
124  {
125  return (0);
126  }
127  ret = strGrep(regex, textp, flags);
128  munmapFile(textp);
129  return (ret);
130 }
131 
139 int strGrep(char *regex, char *data, int flags)
140 {
141  regex_t regc;
142  int ret;
143 
144 #ifdef PHRASE_DEBUG
145  int i;
146 #endif /* PHRASE_DEBUG */
147 
148 #if defined(PROC_TRACE) || defined(PHRASE_DEBUG)
149  traceFunc("== strGrep(\"%s\", %p, 0x%x)\n", regex, data, flags);
150 #endif /* PROC_TRACE || PHRASE_DEBUG */
151 
152  if (data == NULL_STR || regex == NULL_STR)
153  {
154  return (0);
155  }
156  /* DO NOT, repeat DO NOT add REG_EXTENDED as a default flag! */
157  if ((ret = regcomp(&regc, regex, flags)) != 0)
158  {
159  regexError(ret, &regc, regex);
160  regfree(&regc);
161  return (-1); /* <0 indicates compile failure */
162  }
163  /*
164  * regexec() returns 1 on failure and 0 on success - make sure we call
165  * regfree after the regexec call, else after a million or so regex
166  * searches we'll have lost a LOT of memory. :)
167  */
168  ret = regexec(&regc, data, 1, &cur.regm, 0);
169  regfree(&regc);
170  if (ret)
171  {
172  return (0); /* >0 indicates search failure */
173  }
174 #ifdef QA_CHECKS
175  if (cur.regm.rm_so == cur.regm.rm_eo)
176  {
177  Assert(NO, "start/end offsets are identical in strGrep()");
178  }
179 #endif /* QA_CHECKS */
180 #ifdef PHRASE_DEBUG
181  printf("strGrep MATCH(%s) @ %d! = {", regex, cur.regm.rm_so);
182  for (i = cur.regm.rm_so; i < cur.regm.rm_eo; i++)
183  {
184  printf("%c", data[i]);
185  }
186  printf("}\n");
187 #endif /* PHRASE_DEBUG */
188  if (gl.flags & FL_SAVEBASE)
189  {
190  cur.matchBase = data;
191  }
192  return (1);
193 }
194 
205 int idxGrep(int index, char *data, int flags)
206 {
207  return idxGrep_base(index, data, flags, 0);
208 }
209 
220 int idxGrep_recordPosition(int index, char *data, int flags)
221 {
222  if( optionIsSet(OPTS_NO_HIGHLIGHTINFO) ) {
223  return idxGrep_base(index, data, flags, 0);
224  }
225  else {
226  return idxGrep_base(index, data, flags, 1);
227  }
228 }
229 
241 int idxGrep_recordPositionDoctored(int index, char *data, int flags)
242 {
243 
244  if( optionIsSet(OPTS_NO_HIGHLIGHTINFO) ) {
245  return idxGrep_base(index, data, flags, 0);
246  }
247  else {
248  return idxGrep_base(index, data, flags, 2);
249  }
250 }
251 
263 int idxGrep_recordIndex(int index, char *data, int flags)
264 {
265  if( optionIsSet(OPTS_NO_HIGHLIGHTINFO) ) {
266  return idxGrep_base(index, data, flags, 0);
267  }
268  else {
269  return idxGrep_base(index, data, flags, 3);
270  }
271 }
272 
282 int matchOnce(int isPlain, char *data, char* regex, regex_t *rp,
283  regmatch_t* regmatch)
284 {
285  if(isPlain) {
286  return !strNbuf_noGlobals(data, regex, regmatch , 0 , cur.matchBase );
287  }
288 
289  return regexec(rp, data, 1, regmatch, 0);
290 }
291 
301 int storeOneMatch(regmatch_t currentRegMatch, int lastmatch, GArray* allmatches,
302  char** tmpData, char* data)
303 {
304  regmatch_t storeRegMatch;
305  storeRegMatch.rm_so = currentRegMatch.rm_so + lastmatch;
306  storeRegMatch.rm_eo = currentRegMatch.rm_eo + lastmatch;
307  g_array_append_val(allmatches, storeRegMatch);
308  lastmatch += currentRegMatch.rm_eo;
309  *tmpData = data + lastmatch;
310  return lastmatch;
311 }
312 
324 int idxGrep_base(int index, char *data, int flags, int mode)
325 {
326  int i;
327  int ret;
328 
329  int show = flags & FL_SHOWMATCH;
330  licText_t *ltp = licText + index;
335  regex_t *rp = idx_regc + index;
336 
337  CALL_IF_DEBUG_MODE(printf(" %i %i \"", index, ltp->plain);)
338 
339 #if defined(PROC_TRACE) || defined(PHRASE_DEBUG)
340  traceFunc("== idxGrep(%d, %p, 0x%x)\n... regex \"%s\"\n", index, data,
341  flags, _REGEX(index));
342 #endif /* PROC_TRACE || PHRASE_DEBUG */
343 
344  if (index > NFOOTPRINTS)
345  {
346  LOG_FATAL("idxGrep: index %d out of range", index)
347  Bail(-__LINE__);
348  }
349  if (data == NULL_STR)
350  {
351 #ifdef PHRASE_DEBUG
352  printf("idxGrep: NULL pointer to file data!\n");
353 #endif /* PHRASE_DEBUG */
354  return (0);
355  }
356 
357  if (ltp->plain )
358  {
359  ret = strNbuf(data, ltp->regex);
360  if(ret == 0) return (ret);
361  }
362  else {
363  if ((ret = regcomp(rp, ltp->regex, flags)))
364  {
365  fprintf(stderr, "Compile failed, regex #%d\n", index);
366  regexError(ret, rp, ltp->regex);
367  regfree(rp);
368  printf("Compile error \n");
369  return (-1); /* <0 indicates compile failure */
370  }
371 
372  if (regexec(rp, data, 1, &cur.regm, 0))
373  {
374  regfree(rp);
375  return (0);
376  }
377  else ret =1;
378 
379  #ifdef QA_CHECKS
380  if (cur.regm.rm_so == cur.regm.rm_eo)
381  {
382  regfree(rp);
383  Assert(NO, "start/end offsets are identical in idxGrep(%d)",
384  index);
385  }
386  #endif /* QA_CHECKS */
387  /* Set up a global match-length variable? */
388  if (show)
389  {
390  #ifdef DEBUG
391  printf("REGEX(%d) \"%s\"\n", index, ltp->regex);
392  #endif /* DEBUG */
393  printf("MATCH @ %d! = {", cur.regm.rm_so);
394  for (i = cur.regm.rm_so; i < cur.regm.rm_eo; i++)
395  {
396  printf("%c", data[i]);
397  }
398  printf("}\n");
399  }
400  if (gl.flags & FL_SAVEBASE)
401  {
402  cur.matchBase = data;
403  }
404  }
405 
407 
408  if (mode == 3 ) {
409  recordIndex(cur.indexList, index);
410  }
411  else if (mode==1 || mode == 2)
412  {
413  CALL_IF_DEBUG_MODE(printf("MATCH!\n");)
415 
416  CALL_IF_DEBUG_MODE(printf("%s", data);)
417 
418 
419  GArray* allmatches = g_array_new(FALSE, FALSE, sizeof(regmatch_t));
420  regmatch_t currentRegMatch;
421  int lastmatch = 0;
422 
423  char* tmpData = data;
424 
425  lastmatch = storeOneMatch(cur.regm, lastmatch, allmatches, &tmpData, data);
426 
427  while (!matchOnce(ltp->plain,tmpData, ltp->regex, rp, &currentRegMatch ) )
428  {
429  lastmatch = storeOneMatch(currentRegMatch, lastmatch, allmatches, &tmpData, data);
430  }
431 
432 
433  if(index >= _KW_first && index <= _KW_last ) {
434  rememberWhatWeFound(cur.keywordPositions, allmatches, index, mode);
435  }
436  else if (cur.currentLicenceIndex > -1 ) {
437  rememberWhatWeFound( getLicenceAndMatchPositions(cur.theMatches, cur.currentLicenceIndex )->matchPositions , allmatches, index, mode);
438  }
439  g_array_free(allmatches, 1);
440  CALL_IF_DEBUG_MODE(printf("Bye!\n");)
441  }
442 
443  if (!ltp->plain ) regfree(rp);
444 return (1);
445 }
446 
452 void recordIndex(GArray* indexList, int index){
453  g_array_append_val(indexList, index);
454 }
455 
462 static int getOffset(int posInDoctoredBuffer)
463 {
464  return uncollapsePosition(posInDoctoredBuffer, cur.docBufferPositionsAndOffsets);
465 }
466 
473 regmatch_t* getRegmatch_t(GArray* in, int index)
474 {
475  return & g_array_index(in, regmatch_t, index);
476 }
477 
485 void rememberWhatWeFound(GArray* highlight, GArray* regmatch_tArray, int index,
486  int mode)
487 {
488 
489  if (mode != 1 && mode != 2)
490  {
491  FOSSY_EXIT("This mode is not supported\n", 8);
492  return;
493  }
494 
495  int i = 0;
496  int nmatches = regmatch_tArray->len;
497  int alreadyFound = highlight->len;
498  g_array_set_size(highlight, alreadyFound + nmatches);
499 
500  for (i = 0; i < nmatches; ++i)
501  {
502  regmatch_t* theRegmatch = getRegmatch_t(regmatch_tArray, i);
503  if (theRegmatch->rm_eo == -1 || theRegmatch->rm_so == -1)
504  {
505  FOSSY_EXIT("Found match at negative position... this should not happen\n", 9);
506  return;
507  }
508 
509  MatchPositionAndType* ourMatchv = getMatchfromHighlightInfo(highlight, i + alreadyFound);
510  ourMatchv->start = (mode == 1) ? theRegmatch->rm_so : getOffset(theRegmatch->rm_so);
511  ourMatchv->end = (mode == 1) ? theRegmatch->rm_eo : getOffset(theRegmatch->rm_eo);
512  ourMatchv->index = index;
513 
514  CALL_IF_DEBUG_MODE(printf("here: %i - %i \n", ourMatchv->start, ourMatchv->end);)
515  }
516  CALL_IF_DEBUG_MODE(printf(" We go and now we know %d ", highlight->len);)
517 }
518 
519 #define _XC(q) ((char) xascii[q])
520 
528 int strNbuf(char *data, char *str){
529 
530  return strNbuf_noGlobals(data, str, &(cur.regm), gl.flags & FL_SAVEBASE , cur.matchBase );
531 }
532 
538 int strNbuf_noGlobals(char *data, char *str, regmatch_t* matchPos, int doSave,
539 char* saveData)
540 {
541  static int firstFlag = 1;
542  static char xascii[128];
543  int i;
544  int alph = 0;
545  int save = 0;
546  char *bufp;
547  char *pattp;
548  char *mark;
549  char x;
550  char firstx = 0;
551 
552 #if defined(PROC_TRACE) || defined(PHRASE_DEBUG)
553  traceFunc("== strNbuf(%p, %p)\n", data, str);
554 #endif /* PROC_TRACE || PHRASE_DEBUG */
555 
556  if (firstFlag)
557  {
558  firstFlag = 0;
559  /*
560  * 32 characters separate 'A' (65) and 'a' (97), contiguous up to 'Z'.
561  * Therefore, 'Z' == 90, 'a' == 97, and 'z' == 122
562  */
563  for (i = 0; i < sizeof(xascii); i++)
564  {
565  if ((i >= 65) && (i <= 90))
566  { /* isupper */
567  xascii[i] = i + 32; /* -> tolower */
568  }
569  else if ((i >= 97) && (i <= 122))
570  { /* islower */
571  xascii[i] = i - 32; /* -> toupper */
572  }
573  else
574  {
575  /* *foo = tolower((char)i); */
576  xascii[i] = (char) /*i*/0;
577  }
578  }
579 #ifdef STRSTR_DEBUG
580  /*
581  * Dump the table (debugging purposes only)
582  */
583  for (i = 0; i < sizeof (xascii); i++)
584  {
585  if (xascii[i])
586  {
587  printf(" %c%c ", (unsigned) i, xascii[i]);
588  }
589  else
590  {
591  printf("\\%03d ", (int) xascii[i]);
592  }
593  if (i & 16 == 15)
594  {
595  printf("\n");
596  }
597  }
598 #endif /* STRSTR_DEBUG */
599  }
600 #ifdef STRSTR_DEBUG
601  printf("DATA \"%s\"\nPATT \"%s\"\n", data, str);
602 #endif /* STRSTR_DEBUG */
603  if (data == NULL_STR || str == NULL_STR)
604  {
605  return (0);
606  }
607  alph = isalpha(*str);
608  if (alph)
609  {
610  firstx = xascii[(int) *str];
611 #ifdef STRSTR_DEBUG
612  printf("NOTE: first char (%c) is Alphabetic - alternate is (%c)\n",
613  *str, firstx);
614 #endif /* STRSTR_DEBUG */
615 #ifdef QA_CHECKS
616  if (firstx == NULL_CHAR)
617  {
618  LOG_FATAL("Unexpected initialization")
619  Bail(-__LINE__);
620  }
621 #endif /* QA_CHECKS */
622  }
623  for (bufp = data; /* *pattp && */*bufp; bufp = mark)
624  {
625 #ifdef STRSTR_DEBUG
626  printf("\nDEBUG: start, buffer = \"%s\"\n", bufp);
627 #endif /* STRSTR_DEBUG */
628  pattp = str;
629  /*
630  * Locate the first character of our target-pattern in the buffer...
631  */
632  while (*bufp)
633  {
634 #ifdef STRSTR_DEBUG
635  printf("... findfirst, *bufp is '%c' == [%c%c]?\n",
636  *bufp, *str, alph ? firstx : *str);
637 #endif /* STRSTR_DEBUG */
638  if (*bufp == *pattp)
639  {
640  break;
641  }
642  if (alph && (*bufp == firstx))
643  {
644  break;
645  }
646  bufp++;
647  }
648  if (*bufp == NULL_CHAR)
649  {
650  return (0);
651  }
652  save = bufp - data;
653  mark = ++bufp; /* could optimize this in loop below */
654 #ifdef STRSTR_DEBUG
655  printf("GOT IT, at offset %d (*mark now is '%c')\n",
656  bufp - data - 1, *mark);
657 #endif /* STRSTR_DEBUG */
658  /* optimizeMark = 1; */
659  for (++pattp; *bufp && *pattp; bufp++, pattp++)
660  {
661 #ifdef STRSTR_DEBUG
662  printf("STRING-COMPARE: %c == %c ??\n", *bufp, *pattp);
663 #endif /* STRSTR_DEBUG */
664  if (*bufp == *pattp)
665  {
666  continue;
667  }
668 #ifdef STRSTR_DEBUG
669  printf("... or perhaps: %c == %c ??\n", *bufp,
670  xascii[*pattp]);
671 #endif /* STRSTR_DEBUG */
672  if (((x = xascii[(int) *pattp])) && (*bufp == x))
673  {
674  continue;
675  }
676  break;
677  }
678  if (*pattp == NULL_CHAR)
679  {
680  matchPos->rm_so = save;
681  matchPos->rm_eo = save + strlen(str);
682  if (doSave)
683  {
684  saveData = data;
685  }
686  return (1); /* end of pattern == success */
687  }
688  if (*bufp == NULL_CHAR)
689  {
690  return (0); /* end of buffer == success */
691  }
692  }
693  return (0);
694 }
int s
The socket that the CLI will use to communicate.
Definition: fo_cli.c:37
void munmapFile(void *ptr)
Definition: util.c:1197
void Assert(int fatalFlag, const char *fmt,...)
Raise an assert.
Definition: util.c:1395
void Msg(const char *fmt,...)
DO NOT automatically add to a string passed to Msg(); in parseDistro, we sometimes want to dump a p...
Definition: util.c:1382
char * mmapFile(char *pathname)
Blarg. Files that are EXACTLY a multiple of the system pagesize do not get a NULL on the end of the b...
Definition: util.c:1082
licText_t licText[]
#define NULL_STR
NULL string.
Definition: nomos.h:235
#define _REGEX(x)
Definition: nomos.h:447
#define NO
Definition: nomos.h:171
void Bail(int exitval)
Close connections and exit.
Definition: nomos_utils.c:533
#define FL_SAVEBASE
Definition: nomos.h:155
#define NULL_CHAR
NULL character.
Definition: nomos.h:234
int optionIsSet(int val)
Check if an CLI option is set.
Definition: nomos_utils.c:560
int storeOneMatch(regmatch_t currentRegMatch, int lastmatch, GArray *allmatches, char **tmpData, char *data)
Store a single regex match to array.
Definition: nomos_regex.c:301
int idxGrep_base(int index, char *data, int flags, int mode)
compile a regex, and perform the search (on data?)
Definition: nomos_regex.c:324
regmatch_t * getRegmatch_t(GArray *in, int index)
From a given array, get regex match from a given index.
Definition: nomos_regex.c:473
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
Definition: nomos_regex.c:220
int idxGrep(int index, char *data, int flags)
compile a regex, and perform the search (on data?)
Definition: nomos_regex.c:205
int lineInFile(char *pathname, char *regex)
Check if a line exists in a file.
Definition: nomos_regex.c:81
static char regexErrbuf[myBUFSIZ]
Definition: nomos_regex.c:25
void recordIndex(GArray *indexList, int index)
Add a given index to index list.
Definition: nomos_regex.c:452
int strGrep(char *regex, char *data, int flags)
General-purpose grep function, used for one-time-only searches.
Definition: nomos_regex.c:139
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
Definition: nomos_regex.c:241
int strNbuf(char *data, char *str)
Check if a string exists in buffer (case insensitive)
Definition: nomos_regex.c:528
int strNbuf_noGlobals(char *data, char *str, regmatch_t *matchPos, int doSave, char *saveData)
This is our own internal, case-insensitive version of strstr().
Definition: nomos_regex.c:538
int matchOnce(int isPlain, char *data, char *regex, regex_t *rp, regmatch_t *regmatch)
Perform a regex match on a given data and return only first match.
Definition: nomos_regex.c:282
int endsIn(char *s, char *suffix)
Check if a string ends with given suffix.
Definition: nomos_regex.c:56
void regexError(int ret, regex_t *regc, char *regex)
Log an error caused by regex.
Definition: nomos_regex.c:38
int idxGrep_recordIndex(int index, char *data, int flags)
compile a regex, perform the search and record index
Definition: nomos_regex.c:263
int textInFile(char *pathname, char *regex, int flags)
Check if a regex passes in a file.
Definition: nomos_regex.c:100
void rememberWhatWeFound(GArray *highlight, GArray *regmatch_tArray, int index, int mode)
Store regex matches in highlight array.
Definition: nomos_regex.c:485
static int getOffset(int posInDoctoredBuffer)
Get offset from doctored buffer.
Definition: nomos_regex.c:462
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
Definition: nomos_utils.c:901
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
Definition: nomos_utils.c:913
GArray * matchPositions
Match positions.
Definition: nomos.h:379
int start
Start position of match.
Definition: nomos.h:370
int index
Enums from index (Entrynumber) in STRINGS.in.
Definition: nomos.h:372
int end
End position of match.
Definition: nomos.h:371
GArray * indexList
Definition: nomos.h:416
GArray * theMatches
Definition: nomos.h:417
GArray * keywordPositions
Definition: nomos.h:418
int flags
Flags.
Definition: nomos.h:348
char * regex
License regex.
Definition: nomos.h:435