FOSSology  4.4.0
Open Source License Compliance by Open Source Software
monkbulk.c
1 /*
2  Author: Daniele Fognini, Andreas Wuerl
3  SPDX-FileCopyrightText: © 2013-2015, 2018, 2021 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
8 #include <stdlib.h>
9 
10 #include "libfossology.h"
11 
12 #include "monkbulk.h"
13 #include "database.h"
14 #include "license.h"
15 #include "match.h"
16 #include "common.h"
17 #include "monk.h"
18 
19 int bulk_onAllMatches(MonkState* state, const File* file, const GArray* matches);
20 
21 MatchCallbacks bulkCallbacks = {.onAll = bulk_onAllMatches};
22 
23 int setLeftAndRight(MonkState* state) {
24  BulkArguments* bulkArguments = state->ptr;
25 
26  gchar* tableName = getUploadTreeTableName(state->dbManager, bulkArguments->uploadId);
27 
28  if (!tableName)
29  return 0;
30 
31  gchar* sql = g_strdup_printf("SELECT lft, rgt FROM %s WHERE uploadtree_pk = $1", tableName);
32  gchar* stmt = g_strdup_printf("setLeftAndRight.%s", tableName);
33 
34  if ((!sql) || (!stmt))
35  return 0;
36 
37  PGresult* leftAndRightResult = fo_dbManager_ExecPrepared(
38  fo_dbManager_PrepareStamement(
39  state->dbManager,
40  stmt,
41  sql,
42  long
43  ),
44  bulkArguments->uploadTreeId
45  );
46 
47  g_free(stmt);
48  g_free(sql);
49 
50  int result = 0;
51 
52  if (leftAndRightResult) {
53  if (PQntuples(leftAndRightResult)==1) {
54  int i = 0;
55  bulkArguments->uploadTreeLeft = atol(PQgetvalue(leftAndRightResult, 0, i++));
56  bulkArguments->uploadTreeRight = atol(PQgetvalue(leftAndRightResult, 0, i));
57 
58  result = 1;
59  }
60  PQclear(leftAndRightResult);
61  }
62  return result;
63 }
64 
65 void bulkArguments_contents_free(BulkArguments* bulkArguments);
66 
67 BulkAction** queryBulkActions(MonkState* state, long bulkId);
68 
69 int queryBulkArguments(MonkState* state, long bulkId) {
70  int result = 0;
71 
72  PGresult* bulkArgumentsResult = fo_dbManager_ExecPrepared(
73  fo_dbManager_PrepareStamement(
74  state->dbManager,
75  "queryBulkArguments",
76  "SELECT ut.upload_fk, ut.uploadtree_pk, lrb.user_fk, lrb.group_fk, "
77  "lrb.rf_text, lrb.ignore_irrelevant, lrb.bulk_delimiters, lrb.scan_findings "
78  "FROM license_ref_bulk lrb INNER JOIN uploadtree ut "
79  "ON ut.uploadtree_pk = lrb.uploadtree_fk "
80  "WHERE lrb_pk = $1",
81  long
82  ),
83  bulkId
84  );
85 
86  if (bulkArgumentsResult) {
87  if (PQntuples(bulkArgumentsResult)==1) {
88  BulkArguments* bulkArguments = (BulkArguments*)malloc(sizeof(BulkArguments));
89 
90  int column = 0;
91  bulkArguments->uploadId = atoi(PQgetvalue(bulkArgumentsResult, 0, column++));
92  bulkArguments->uploadTreeId = atol(PQgetvalue(bulkArgumentsResult, 0, column++));
93  bulkArguments->userId = atoi(PQgetvalue(bulkArgumentsResult, 0, column++));
94  bulkArguments->groupId = atoi(PQgetvalue(bulkArgumentsResult, 0, column++));
95  bulkArguments->refText = g_strdup(PQgetvalue(bulkArgumentsResult, 0, column++));
96  bulkArguments->ignoreIrre = strcmp(PQgetvalue(bulkArgumentsResult, 0, column++), "t") == 0;
97  if (PQgetisnull(bulkArgumentsResult, 0, column) == 1)
98  {
99  bulkArguments->delimiters = g_strdup(DELIMITERS);
100  column++;
101  }
102  else
103  {
104  bulkArguments->delimiters = normalize_escape_string(PQgetvalue(bulkArgumentsResult, 0, column++));
105  }
106  bulkArguments->scanFindings = strcmp(PQgetvalue(bulkArgumentsResult, 0, column++), "t") == 0;
107  bulkArguments->bulkId = bulkId;
108  bulkArguments->actions = queryBulkActions(state, bulkId);
109  bulkArguments->jobId = fo_scheduler_jobId();
110 
111  state->ptr = bulkArguments;
112 
113  if (!setLeftAndRight(state)) {
114  printf("FATAL: could not retrieve left and right for bulk id=%ld\n", bulkId);
115  bulkArguments_contents_free(state->ptr);
116  } else {
117  result = 1;
118  }
119  } else {
120  printf("FATAL: could not retrieve arguments for bulk scan with id=%ld\n", bulkId);
121  }
122  PQclear(bulkArgumentsResult);
123  }
124  return result;
125 }
126 
127 BulkAction** queryBulkActions(MonkState* state, long bulkId) {
128 
129  PGresult* bulkActionsResult = fo_dbManager_ExecPrepared(
130  fo_dbManager_PrepareStamement(
131  state->dbManager,
132  "queryBulkActions",
133  "SELECT rf_fk, removing, comment, reportinfo, acknowledgement FROM license_set_bulk WHERE lrb_fk = $1",
134  long
135  ),
136  bulkId
137  );
138 
139  int numberOfRows = bulkActionsResult ? PQntuples(bulkActionsResult) : 0;
140  BulkAction** bulkActions = (BulkAction**)malloc((numberOfRows + 1) * sizeof(BulkAction*));
141 
142  int row;
143  for (row = 0; row < numberOfRows; row++) {
144  int column = 0;
145  BulkAction *action = (BulkAction *) malloc(sizeof(BulkAction));
146  action->licenseId = atoi(PQgetvalue(bulkActionsResult, row, column++));
147  action->removing = (strcmp(PQgetvalue(bulkActionsResult, row, column++), "t") == 0);
148  action->comment = g_strdup(PQgetvalue(bulkActionsResult, row, column++));
149  action->reportinfo = g_strdup(PQgetvalue(bulkActionsResult, row, column++));
150  action->acknowledgement = g_strdup(PQgetvalue(bulkActionsResult, row, column++));
151  bulkActions[row] = action;
152  }
153  bulkActions[row] = NULL;
154 
155  if (bulkActionsResult) {
156  PQclear(bulkActionsResult);
157  }
158 
159  return bulkActions;
160 }
161 
162 void bulkArguments_contents_free(BulkArguments* bulkArguments) {
163 
164  BulkAction **bulkActions = bulkArguments->actions;
165  for (int i=0; bulkActions[i] != NULL; i++) {
166  free(bulkActions[i]);
167  }
168  free(bulkActions);
169 
170  g_free(bulkArguments->refText);
171  g_free(bulkArguments->delimiters);
172 
173  free(bulkArguments);
174 }
175 
176 int bulk_identification(MonkState* state) {
177  BulkArguments* bulkArguments = state->ptr;
178 
179  License license = (License){
180  .refId = bulkArguments->licenseId,
181  };
182  license.tokens = tokenize(bulkArguments->refText, bulkArguments->delimiters);
183 
184  GArray* licenseArray = g_array_new(FALSE, FALSE, sizeof (License));
185  g_array_append_val(licenseArray, license);
186 
187  Licenses* licenses = buildLicenseIndexes(licenseArray, MIN_ADJACENT_MATCHES, 0);
188 
189  PGresult* filesResult = queryFileIdsForUploadAndLimits(
190  state->dbManager,
191  bulkArguments->uploadId,
192  bulkArguments->uploadTreeLeft,
193  bulkArguments->uploadTreeRight,
194  bulkArguments->groupId,
195  bulkArguments->ignoreIrre,
196  bulkArguments->scanFindings
197  );
198 
199  int haveError = 1;
200  if (filesResult != NULL) {
201  int resultsCount = PQntuples(filesResult);
202  haveError = 0;
203 #ifdef MONK_MULTI_THREAD
204  #pragma omp parallel
205 #endif
206  {
207  MonkState threadLocalStateStore = *state;
208  MonkState* threadLocalState = &threadLocalStateStore;
209 
210  threadLocalState->dbManager = fo_dbManager_fork(state->dbManager);
211  if (threadLocalState->dbManager) {
212 #ifdef MONK_MULTI_THREAD
213  #pragma omp for schedule(dynamic)
214 #endif
215  for (int i = 0; i<resultsCount; i++) {
216  if (haveError)
217  continue;
218 
219  long fileId = atol(PQgetvalue(filesResult, i, 0));
220 
221  if (matchPFileWithLicenses(threadLocalState, fileId, licenses,
222  &bulkCallbacks, bulkArguments->delimiters)) {
224  } else {
226  haveError = 1;
227  }
228  }
229  fo_dbManager_finish(threadLocalState->dbManager);
230  } else {
231  haveError = 1;
232  }
233  }
234  PQclear(filesResult);
235  }
236 
237  licenses_free(licenses);
238 
239  return !haveError;
240 }
241 
242 int main(int argc, char** argv) {
243  MonkState stateStore;
244  MonkState* state = &stateStore;
245 
246  fo_scheduler_connect_dbMan(&argc, argv, &(state->dbManager));
247 
248  queryAgentId(state, AGENT_BULK_NAME, AGENT_BULK_DESC);
249 
250  state->scanMode = MODE_BULK;
251 
252  while (fo_scheduler_next() != NULL) {
253  const char* schedulerCurrent = fo_scheduler_current();
254 
255  long bulkId = atol(schedulerCurrent);
256 
257  if (bulkId == 0) continue;
258 
259  if (!queryBulkArguments(state, bulkId)) {
260  bail(state, 1);
261  }
262 
263  BulkArguments* bulkArguments = state->ptr;
264 
265  int arsId = fo_WriteARS(fo_dbManager_getWrappedConnection(state->dbManager),
266  0, bulkArguments->uploadId, state->agentId, AGENT_BULK_ARS, NULL, 0);
267 
268  if (arsId<=0)
269  bail(state, 2);
270 
271  if (!bulk_identification(state))
272  bail(state, 3);
273 
274  fo_WriteARS(fo_dbManager_getWrappedConnection(state->dbManager),
275  arsId, bulkArguments->uploadId, state->agentId, AGENT_BULK_ARS, NULL, 1);
276 
277  bulkArguments_contents_free(bulkArguments);
279  }
280 
281  scheduler_disconnect(state, 0);
282  return 0;
283 }
284 
285 int bulk_onAllMatches(MonkState* state, const File* file, const GArray* matches) {
286  int haveAFullMatch = 0;
287  for (guint j=0; j<matches->len; j++) {
288  Match* match = match_array_index(matches, j);
289 
290  if (match->type == MATCH_TYPE_FULL) {
291  haveAFullMatch = 1;
292  break;
293  }
294  }
295 
296  if (!haveAFullMatch)
297  return 1;
298 
299  BulkArguments* bulkArguments = state->ptr;
300 
301  if (!fo_dbManager_begin(state->dbManager))
302  return 0;
303 
304  BulkAction **actions = bulkArguments->actions;
305  gchar* insertSql;
306  char* uploadtreeTableName = getUploadTreeTableName(state->dbManager,
307  bulkArguments->uploadId);
308  gchar* stmt = g_strdup_printf("saveBulkResult:decision.%s",
309  uploadtreeTableName);
310  if (bulkArguments->ignoreIrre)
311  {
312  insertSql = g_strdup_printf("INSERT INTO clearing_event(uploadtree_fk, user_fk, group_fk, "
313  "job_fk, type_fk, rf_fk, removed, comment, reportinfo, acknowledgement) "
314  "SELECT uploadtree_pk, $2, $3, $4, $5, $6, $7, $8, $9, $10 "
315  "FROM ("
316  "SELECT DISTINCT ON(ut.uploadtree_pk, ut.pfile_fk, scopesort) "
317  "ut.pfile_fk pfile_fk, ut.uploadtree_pk, decision_type, "
318  "CASE cd.scope WHEN 1 THEN 1 ELSE 0 END AS scopesort"
319  " FROM %s AS ut "
320  " LEFT JOIN clearing_decision cd ON "
321  " ((ut.uploadtree_pk = cd.uploadtree_fk AND scope = 0 AND cd.group_fk = $3) "
322  " OR (ut.pfile_fk = cd.pfile_fk AND scope = 1)) "
323  " WHERE upload_fk = $11 AND (lft BETWEEN $12 AND $13) AND ut.pfile_fk = $1"
324  " ORDER BY ut.uploadtree_pk, scopesort, ut.pfile_fk, clearing_decision_pk DESC"
325  ") itemView WHERE decision_type != %d OR decision_type IS NULL"
326  " RETURNING clearing_event_pk;", uploadtreeTableName, DECISION_TYPE_FOR_IRRELEVANT);
327  stmt = g_strconcat(stmt, ".ignoreirre", NULL);
328  }
329  else
330  {
331  insertSql = g_strdup_printf("INSERT INTO clearing_event(uploadtree_fk, user_fk, group_fk, "
332  "job_fk, type_fk, rf_fk, removed, comment, reportinfo, acknowledgement)"
333  "SELECT uploadtree_pk, $2, $3, $4, $5, $6, $7, $8, $9, $10 "
334  "FROM %s "
335  " WHERE upload_fk = $11 AND (lft BETWEEN $12 AND $13) AND pfile_fk = $1"
336  " RETURNING clearing_event_pk;", uploadtreeTableName);
337  }
338  for (int i = 0; actions[i] != NULL; i++) {
339  BulkAction* action = actions[i];
340 
341  PGresult* licenseDecisionIds = fo_dbManager_ExecPrepared(
342  fo_dbManager_PrepareStamement(
343  state->dbManager,
344  stmt,
345  insertSql,
346  long, int, int, int, int, long, int, char*, char*, char*,
347  int, long, long
348  ),
349  file->id,
350 
351  bulkArguments->userId,
352  bulkArguments->groupId,
353  bulkArguments->jobId,
354  BULK_DECISION_TYPE,
355  action->licenseId,
356  action->removing ? 1 : 0,
357  action->comment,
358  action->reportinfo,
359  action->acknowledgement,
360 
361  bulkArguments->uploadId,
362  bulkArguments->uploadTreeLeft,
363  bulkArguments->uploadTreeRight
364  );
365 
366  if (licenseDecisionIds) {
367  for (int i=0; i<PQntuples(licenseDecisionIds);i++) {
368  long licenseDecisionEventId = atol(PQgetvalue(licenseDecisionIds,i,0));
369 
370  for (guint j=0; j<matches->len; j++) {
371  Match* match = match_array_index(matches, j);
372 
373  if (match->type != MATCH_TYPE_FULL)
374  continue;
375 
376  DiffPoint* highlightTokens = match->ptr.full;
377  DiffPoint highlight = getFullHighlightFor(file->tokens, highlightTokens->start, highlightTokens->length);
378 
379  PGresult* highlightResult = fo_dbManager_ExecPrepared(
380  fo_dbManager_PrepareStamement(
381  state->dbManager,
382  "saveBulkResult:highlight",
383  "INSERT INTO highlight_bulk(clearing_event_fk, lrb_fk, start, len) VALUES($1,$2,$3,$4)",
384  long, long, size_t, size_t
385  ),
386  licenseDecisionEventId,
387  bulkArguments->bulkId,
388  highlight.start,
389  highlight.length
390  );
391 
392  if (highlightResult) {
393  PQclear(highlightResult);
394  } else {
395  fo_dbManager_rollback(state->dbManager);
396  return 0;
397  }
398  }
399  }
400  PQclear(licenseDecisionIds);
401  } else {
402  fo_dbManager_rollback(state->dbManager);
403  return 0;
404  }
405  }
406  g_free(stmt);
407  g_free(insertSql);
408 
409 
410  return fo_dbManager_commit(state->dbManager);
411 }
void matchPFileWithLicenses(CopyrightState const &state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler &databaseHandler)
Get the file contents, scan for statements and save findings to database.
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
char * getUploadTreeTableName(fo_dbManager *dbManager, int uploadId)
Get the upload tree table name for a given upload.
Definition: libfossagent.c:25
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:214
The main FOSSology C library.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
void fo_scheduler_connect_dbMan(int *argc, char **argv, fo_dbManager **dbManager)
Make a connection from an agent to the scheduler and create a DB manager as well.
char * fo_scheduler_current()
Get the last read string from the scheduler.
int fo_scheduler_jobId()
Gets the id of the job that the agent is running.
char * fo_scheduler_next()
Get the next data to process from the scheduler.
PGresult * fo_dbManager_ExecPrepared(fo_dbManager_PreparedStatement *preparedStatement,...)
Execute a prepared statement.
Definition: standalone.c:36
Definition: diff.h:14
Definition: monk.h:61
Definition: monk.h:55
Definition: monk.h:67
Definition: match.h:20
Definition: monk.h:44
Definition: nomos.h:426
Store the results of a regex match.
Definition: scanners.hpp:28