FOSSology  4.4.0
Open Source License Compliance by Open Source Software
wget_agent.c
Go to the documentation of this file.
1 /*
2  wget_agent: Retrieve a file and put it in the database.
3 
4  SPDX-FileCopyrightText: © 2007-2014 Hewlett-Packard Development Company, L.P.
5 
6  SPDX-License-Identifier: GPL-2.0-only
7 */
8 
13 #define _GNU_SOURCE // for asprintf
14 
15 #define ASPRINTF_MEM_ERROR 88
16 #define ASPRINTF_MEM_ERROR_LOG LOG_FATAL("Not enough memory for asprintf before line %d", __LINE__)
17 
18 #include "wget_agent.h"
19 
20 char SQL[STRMAX];
21 
22 PGconn *pgConn = NULL;
23 long GlobalUploadKey=-1;
24 char GlobalTempFile[STRMAX];
25 char GlobalURL[URLMAX];
26 char GlobalType[STRMAX];
27 char GlobalParam[STRMAX];
28 char *GlobalProxy[6];
29 char GlobalHttpProxy[STRMAX];
31 gid_t ForceGroup=-1;
32 
38 int IsFile(char *Fname, int Link)
39 {
40  stat_t Stat;
41  int rc;
42  if (!Fname || (Fname[0]=='\0')) return(0); /* not a directory */
43  if (Link) rc = stat64(Fname,&Stat);
44  else rc = lstat64(Fname,&Stat);
45  if (rc != 0) return(0); /* bad name */
46  return(S_ISREG(Stat.st_mode));
47 } /* IsFile() */
48 
53 void SafeExit(int rc)
54 {
55  if (pgConn) PQfinish(pgConn);
57  exit(rc);
58 } /* SafeExit() */
59 
66 int GetPosition(char *URL)
67 {
68  if (NULL != strstr(URL, "http://")) return 7;
69  if (NULL != strstr(URL, "https://")) return 8;
70  if (NULL != strstr(URL, "ftp://")) return 6;
71  return 0;
72 }
73 
81 void DBLoadGold()
82 {
83  Cksum *Sum;
84  char *Unique=NULL;
85  char *SHA1, *MD5, *Len;
86  char SQL[STRMAX];
87  long PfileKey;
88  char *Path;
89  char SHA256[65];
90  FILE *Fin;
91  int rc = -1;
92  PGresult *result;
93  memset(SHA256, '\0', sizeof(SHA256));
94 
95  LOG_VERBOSE0("Processing %s",GlobalTempFile);
96  Fin = fopen(GlobalTempFile,"rb");
97  if (!Fin)
98  {
99  LOG_FATAL("upload %ld Unable to open temp file %s from %s",
101  SafeExit(1);
102  }
103 
104  Sum = SumComputeFile(Fin);
105  fclose(Fin);
106 
107  // Calculate sha256 value
108  rc = calc_sha256sum(GlobalTempFile, SHA256);
109  if (rc != 0)
110  {
111  LOG_FATAL("Unable to calculate SHA256 of %s\n", GlobalTempFile);
112  SafeExit(56);
113  }
114 
115  if ((int)ForceGroup > 0)
116  {
117  rc = chown(GlobalTempFile,-1,ForceGroup);
118  if (rc) LOG_ERROR("chown failed on %s, error: %s", GlobalTempFile, strerror(errno));
119  }
120 
121  if (!Sum)
122  {
123  LOG_FATAL("upload %ld Unable to compute checksum for %s from %s",
125  SafeExit(2);
126  }
127 
128  if (Sum->DataLen <= 0)
129  {
130  LOG_FATAL("upload %ld No bytes downloaded from %s to %s.",
132  SafeExit(3);
133  }
134 
135  Unique = SumToString(Sum);
136  LOG_VERBOSE0("Unique %s",Unique);
137 
138  if (GlobalImportGold)
139  {
140  LOG_VERBOSE0("Import Gold %s",Unique);
141  rc = fo_RepImport(GlobalTempFile,"gold",Unique,1);
142  if (rc != 0)
143  {
144  LOG_FATAL("upload %ld Failed to import %s from %s into repository gold %s",
146  SafeExit(4);
147  }
148  /* Put the file in the "files" repository too */
149  Path = fo_RepMkPath("gold",Unique);
150  if ((int)ForceGroup >= 0)
151  {
152  rc = chown(Path,-1,ForceGroup);
153  if (rc) LOG_ERROR("chown failed on %s, error: %s", Path, strerror(errno));
154  }
155  } /* if GlobalImportGold */
156  else /* if !GlobalImportGold */
157  {
158  Path = GlobalTempFile;
159  } /* else if !GlobalImportGold */
160 
161  LOG_VERBOSE0("Path is %s",Path);
162 
163  if (!Path)
164  {
165  LOG_FATAL("upload %ld Failed to determine repository location for %s in gold",
166  GlobalUploadKey,Unique);
167  SafeExit(5);
168  }
169 
170  LOG_VERBOSE0("Import files %s",Path);
171 
172  if (fo_RepImport(Path,"files",Unique,1) != 0)
173  {
174  LOG_FATAL("upload %ld Failed to import %s from %s into files",
175  GlobalUploadKey,Unique,Path);
176  SafeExit(6);
177  }
178 
179  if ((int)ForceGroup >= 0)
180  {
181  rc = chown(Path,-1,ForceGroup);
182  if (rc) LOG_ERROR("chown failed on %s, error: %s", Path, strerror(errno));
183  }
184 
185  if (Path != GlobalTempFile)
186  {
187  if(Path)
188  {
189  free(Path);
190  Path = NULL;
191  }
192  }
193 
194  /* Now update the DB */
195  /* Break out the sha1, md5, len components **/
196  SHA1 = Unique;
197  MD5 = Unique+41; /* 40 for sha1 + 1 for '.' */
198  Len = Unique+41+33; /* 32 for md5 + 1 for '.' */
199  /* Set the pfile */
200  memset(SQL,'\0',STRMAX);
201  snprintf(SQL,STRMAX-1,"SELECT pfile_pk FROM pfile WHERE pfile_sha1 = '%.40s' AND pfile_md5 = '%.32s' AND pfile_size = %s;",
202  SHA1,MD5,Len);
203  result = PQexec(pgConn, SQL); /* SELECT */
204  if (fo_checkPQresult(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(7);
205 
206  /* See if pfile needs to be added */
207  if (PQntuples(result) <=0)
208  {
209  /* Insert it */
210  memset(SQL,'\0',STRMAX);
211  snprintf(SQL,STRMAX-1,"INSERT INTO pfile (pfile_sha1, pfile_md5, pfile_sha256, pfile_size) VALUES ('%.40s','%.32s','%.64s',%s)",
212  SHA1,MD5,SHA256,Len);
213  PQclear(result);
214  result = PQexec(pgConn, SQL);
215  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(8);
216  PQclear(result);
217  result = PQexec(pgConn, "SELECT currval('pfile_pfile_pk_seq')");
218  if (fo_checkPQresult(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(182);
219  }
220 
221  PfileKey = atol(PQgetvalue(result,0,0));
222  LOG_VERBOSE0("pfile_pk = %ld",PfileKey);
223 
224  /* Update the DB so the pfile is linked to the upload record */
225  PQclear(result);
226  result = PQexec(pgConn, "BEGIN");
227  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(-1);
228 
229  memset(SQL,0,STRMAX);
230  snprintf(SQL,STRMAX-1,"SELECT * FROM upload WHERE upload_pk=%ld FOR UPDATE;",GlobalUploadKey);
231  PQclear(result);
232  result = PQexec(pgConn, SQL);
233  if (fo_checkPQresult(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(-1);
234 
235  memset(SQL,0,STRMAX);
236  snprintf(SQL,STRMAX-1,"UPDATE upload SET pfile_fk=%ld WHERE upload_pk=%ld",
237  PfileKey,GlobalUploadKey);
238  LOG_VERBOSE0("SQL=%s\n",SQL);
239  PQclear(result);
240  result = PQexec(pgConn, SQL);
241  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(9);
242  PQclear(result);
243  result = PQexec(pgConn, "COMMIT;");
244  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(92);
245  PQclear(result);
246 
247  /* Clean up */
248  if (Sum)
249  {
250  free(Sum);
251  Sum = NULL;
252  }
253 
254  if (Unique)
255  {
256  free(Unique);
257  Unique = NULL;
258  }
259 } /* DBLoadGold() */
260 
261 
269 int TaintURL(char *Sin, char *Sout, int SoutSize)
270 {
271  int i;
272  int si;
273  memset(Sout,'\0',SoutSize);
274  SoutSize--; /* always keep the EOL */
275  for(i=0,si=0; (si<SoutSize) && (Sin[i] != '\0'); i++)
276  {
277  if (Sin[i] == '#') return(0); /* end at the start of comment */
278  if (!strchr("'`",Sin[i]) && !isspace(Sin[i])) Sout[si++] = Sin[i];
279  else
280  {
281  if (si+3 >= SoutSize) return(0); /* no room */
282  snprintf(Sout+si,4,"%%%02X",Sin[i]);
283  si+=3;
284  }
285  }
286  return(Sin[i]=='\0');
287 } /* TaintURL() */
288 
289 
300 char *PrepareWgetDest(char *TempFile, char *TempFileDir, char *TempFileDirectory)
301 {
302  if (TempFile && TempFile[0])
303  {
304  /* Delete the temp file if it exists */
305  unlink(TempFile);
306  return TempFileDirectory;
307  }
308  else if(TempFileDir && TempFileDir[0])
309  {
310  return TempFileDir;
311  }
312 
313  return NULL;
314 }
315 
316 
328 int GetURL(char *TempFile, char *URL, char *TempFileDir)
329 {
330  char *cmd;
331  char TaintedURL[STRMAX];
332  char TempFileDirectory[STRMAX+128];
333  char *delete_tmpdir_cmd;
334  int rc;
335  int res;
336 
337  memset(TempFileDirectory,'\0',STRMAX+128);
338 
339  /* save each upload files in /srv/fossology/repository/localhost/wget/wget.xxx.dir/ */
340  sprintf(TempFileDirectory, "%s.dir", TempFile);
341  res = asprintf(&delete_tmpdir_cmd, "rm -rf %s", TempFileDirectory);
342  if (res == -1)
343  {
344  ASPRINTF_MEM_ERROR_LOG;
345  SafeExit(ASPRINTF_MEM_ERROR);
346  }
347 #if 1
348  char WgetArgs[]="--no-check-certificate --progress=dot -rc -np -e robots=off";
349 #else
350  /* wget < 1.10 does not support "--no-check-certificate" */
351  char WgetArgs[]="--progress=dot -rc -np -e robots=off";
352 #endif
353 
354  if (!TaintURL(URL,TaintedURL,STRMAX))
355  {
356  LOG_FATAL("Failed to taint the URL '%s'",URL);
357  SafeExit(10);
358  }
359 
360  /*
361  Wget options:
362  --progress=dot :: display a new line as it progresses.
363  --no-check-certificate :: download HTTPS files even if the cert cannot
364  be validated. (Neal has many issues with SSL and does not view it
365  as very secure.) Without this, some caching proxies and web sites
366  with old certs won't download. Granted, in theory a bad cert should
367  prevent downloads. In reality, 99.9% of bad certs are because the
368  admin did not notice that they expired and not because of a hijacking
369  attempt.
370  */
371 
372  struct stat sb;
373  int rc_system =0;
374  char no_proxy[STRMAX] = {0};
375  char proxy[STRMAX] = {0};
376  char proxy_temp[STRMAX] = {0};
377 
378  /* http_proxy is optional so don't error if it doesn't exist */
380  if (GlobalProxy[0] && GlobalProxy[0][0])
381  {
382  snprintf(proxy_temp, STRMAX-1, "export http_proxy='%s' ;", GlobalProxy[0]);
383  strcat(proxy, proxy_temp);
384  }
385  if (GlobalProxy[1] && GlobalProxy[1][0])
386  {
387  snprintf(proxy_temp, STRMAX-1, "export https_proxy='%s' ;", GlobalProxy[1]);
388  strcat(proxy, proxy_temp);
389  }
390  if (GlobalProxy[2] && GlobalProxy[2][0])
391  {
392  snprintf(proxy_temp, STRMAX-1, "export ftp_proxy='%s' ;", GlobalProxy[2]);
393  strcat(proxy, proxy_temp);
394  }
395  if (GlobalProxy[3] && GlobalProxy[3][0])
396  {
397  snprintf(no_proxy, STRMAX-1, "-e no_proxy='%s'", GlobalProxy[3]);
398  }
399 
400  char *dest;
401 
402  dest = PrepareWgetDest(TempFile, TempFileDir, TempFileDirectory);
403 
404  if (dest) {
405  res = asprintf(&cmd," %s /usr/bin/wget -q %s -P '%s' '%s' %s %s 2>&1",
406  proxy, WgetArgs, dest, TaintedURL, GlobalParam, no_proxy);
407  }
408  else
409  {
410  res = asprintf(&cmd," %s /usr/bin/wget -q %s '%s' %s %s 2>&1",
411  proxy, WgetArgs, TaintedURL, GlobalParam, no_proxy);
412  }
413 
414  if (res == -1)
415  {
416  ASPRINTF_MEM_ERROR_LOG;
417  free(delete_tmpdir_cmd);
418  SafeExit(ASPRINTF_MEM_ERROR);
419  }
420 
421  /* the command is like
422  ". /usr/local/etc/fossology/Proxy.conf;
423  /usr/bin/wget -q --no-check-certificate --progress=dot -rc -np -e robots=off -P
424  '/srv/fossology/repository/localhost/wget/wget.xxx.dir/'
425  'http://a.org/file' -l 1 -R index.html* 2>&1"
426  */
427  LOG_VERBOSE0("CMD: %s", cmd);
428  rc = system(cmd);
429 
430  if (WIFEXITED(rc) && (WEXITSTATUS(rc) != 0))
431  {
432  LOG_FATAL("upload %ld Download failed; Return code %d from: %s",GlobalUploadKey,WEXITSTATUS(rc),cmd);
433  unlink(GlobalTempFile);
434  rc_system = system(delete_tmpdir_cmd);
435  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
436  free(delete_tmpdir_cmd);
437  SafeExit(12);
438  }
439 
440  /* Run from scheduler! store /srv/fossology/repository/localhost/wget/wget.xxx.dir/<files|directories> to one temp file */
441  if (TempFile && TempFile[0])
442  {
443  char* tmpfile_path;
444  /* for one url http://a.org/test.deb, TempFilePath should be /srv/fossology/repository/localhost/wget/wget.xxx.dir/a.org/test.deb */
445  int Position = GetPosition(TaintedURL);
446  if (0 == Position)
447  {
448  LOG_FATAL("path %s is not http://, https://, or ftp://", TaintedURL);
449  unlink(GlobalTempFile);
450  rc_system = system(delete_tmpdir_cmd);
451  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
452  free(delete_tmpdir_cmd);
453  SafeExit(26);
454  }
455  res = asprintf(&tmpfile_path, "%s/%s", TempFileDirectory, TaintedURL + Position);
456  if (res == -1)
457  {
458  ASPRINTF_MEM_ERROR_LOG;
459  free(delete_tmpdir_cmd);
460  SafeExit(ASPRINTF_MEM_ERROR);
461  }
462 
463  if (!stat(tmpfile_path, &sb))
464  {
465  if (S_ISDIR(sb.st_mode))
466  {
467  res = asprintf(&cmd, "find '%s' -mindepth 1 -type d -empty -exec rmdir {} \\; > /dev/null 2>&1", tmpfile_path);
468  if (res == -1)
469  {
470  ASPRINTF_MEM_ERROR_LOG;
471  free(tmpfile_path);
472  free(delete_tmpdir_cmd);
473  SafeExit(ASPRINTF_MEM_ERROR);
474  }
475  rc_system = system(cmd); // delete all empty directories downloaded
476  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, cmd)
477  free(cmd);
478 
479  res = asprintf(&cmd, "tar -cf '%s' -C '%s' ./ 1>/dev/null", TempFile, tmpfile_path);
480  if (res == -1)
481  {
482  ASPRINTF_MEM_ERROR_LOG;
483  free(tmpfile_path);
484  free(delete_tmpdir_cmd);
485  SafeExit(ASPRINTF_MEM_ERROR);
486  }
487  }
488  else
489  {
490  res = asprintf(&cmd, "mv '%s' '%s' 2>&1", tmpfile_path, TempFile);
491  if (res == -1)
492  {
493  ASPRINTF_MEM_ERROR_LOG;
494  free(tmpfile_path);
495  free(delete_tmpdir_cmd);
496  SafeExit(ASPRINTF_MEM_ERROR);
497  }
498  }
499 
500  free(tmpfile_path);
501 
502  rc_system = system(cmd);
503  if (rc_system != 0)
504  {
505  systemError(__LINE__, rc_system, cmd)
506  free(cmd);
507  unlink(GlobalTempFile);
508  rc_system = system(delete_tmpdir_cmd);
509  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
510  free(delete_tmpdir_cmd);
511  SafeExit(24); // failed to store the temperary directory(one file) as one temperary file
512  }
513 
514  }
515  else
516  {
517  res = asprintf(&cmd, "find '%s' -type f -exec mv {} %s \\; > /dev/null 2>&1", TempFileDirectory, TempFile);
518  if (res == -1)
519  {
520  ASPRINTF_MEM_ERROR_LOG;
521  free(delete_tmpdir_cmd);
522  SafeExit(ASPRINTF_MEM_ERROR);
523  }
524  rc_system = system(cmd);
525  if (rc_system != 0)
526  {
527  systemError(__LINE__, rc_system, cmd)
528  free(cmd);
529  unlink(GlobalTempFile);
530  rc_system = system(delete_tmpdir_cmd);
531  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
532  free(delete_tmpdir_cmd);
533  SafeExit(24); // failed to store the temperary directory(one file) as one temperary file
534  }
535 
536  }
537  }
538 
539  if (TempFile && TempFile[0] && !IsFile(TempFile,1))
540  {
541  LOG_FATAL("upload %ld File %s not created from URL: %s, CMD: %s",GlobalUploadKey,TempFile,URL, cmd);
542  free(cmd);
543  unlink(GlobalTempFile);
544  rc_system = system(delete_tmpdir_cmd);
545  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
546  free(delete_tmpdir_cmd);
547  SafeExit(15);
548  }
549 
550  free(cmd);
551 
552  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
553  rc_system = system(delete_tmpdir_cmd);
554  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
555  LOG_VERBOSE0("upload %ld Downloaded %s to %s",GlobalUploadKey,URL,TempFile);
556 
557  free(delete_tmpdir_cmd);
558 
559  return(0);
560 } /* GetURL() */
561 
567 {
568  char *command = NULL;
569  char *tmp_file_directory;
570  char *delete_tmpdir_cmd;
571  char *tmp_home;
572 
573  int rc = 0;
574  int resethome = 0; // 0: default; 1: home is null before setting, should rollback
575  char *homeenv = NULL;
576  int res;
577 
578  homeenv = getenv("HOME");
579  if(NULL == homeenv) resethome = 1;
580 
581  /* We need HOME to point to where .gitconfig is installed
582  * path is the repository path and .gitconfig is installed in its parent directory
583  */
584  res = asprintf(&tmp_home, "%s/..", fo_config_get(sysconfig, "FOSSOLOGY", "path", NULL));
585  if (res == -1)
586  {
587  return ASPRINTF_MEM_ERROR;
588  }
589 
590  setenv("HOME", tmp_home, 1);
591  free(tmp_home);
592 
593  /* save each upload files in /srv/fossology/repository/localhost/wget/wget.xxx.dir/ */
594  res = asprintf(&tmp_file_directory, "%s.dir", GlobalTempFile);
595  if (res == -1)
596  {
597  ASPRINTF_MEM_ERROR_LOG;
598  return ASPRINTF_MEM_ERROR;
599  }
600 
601  res = asprintf(&delete_tmpdir_cmd, "rm -rf %s", tmp_file_directory);
602  if (res == -1)
603  {
604  ASPRINTF_MEM_ERROR_LOG;
605  free(tmp_file_directory);
606  return ASPRINTF_MEM_ERROR;
607  }
608 
609  command = GetVersionControlCommand(1);
610  if (!command)
611  {
612  free(tmp_file_directory);
613  return ASPRINTF_MEM_ERROR;
614  }
615  rc = system(command);
616  free(command);
617 
618  if (resethome) // rollback
619  unsetenv("HOME");
620  else
621  setenv("HOME", homeenv, 1);
622 
623  if (rc != 0)
624  {
625  command = GetVersionControlCommand(-1);
626  if (!command)
627  {
628  ASPRINTF_MEM_ERROR_LOG;
629  free(tmp_file_directory);
630  return ASPRINTF_MEM_ERROR;
631  }
632  systemError(__LINE__, rc, command)
637  LOG_FATAL("please make sure the URL of repo is correct, also add correct proxy for your version control system, command is:%s, GlobalTempFile is:%s, rc is:%d. \n", command, GlobalTempFile, rc);
638  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
639  rc = system(delete_tmpdir_cmd);
640  if (!WIFEXITED(rc)) systemError(__LINE__, rc, delete_tmpdir_cmd)
641  free(command);
642  free(tmp_file_directory);
643  free(delete_tmpdir_cmd);
644  return 1;
645  }
646 
647  res = asprintf(&command, "tar -cf '%s' -C '%s' ./ 1>/dev/null", GlobalTempFile, tmp_file_directory);
648  if (res == -1)
649  {
650  ASPRINTF_MEM_ERROR_LOG;
651  free(tmp_file_directory);
652  free(delete_tmpdir_cmd);
653  return ASPRINTF_MEM_ERROR;
654  }
655  free(tmp_file_directory);
656  rc = system(command);
657  if (rc != 0)
658  {
659  systemError(__LINE__, rc, command)
660  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
661  rc = system(delete_tmpdir_cmd);
662  if (!WIFEXITED(rc)) systemError(__LINE__, rc, delete_tmpdir_cmd)
663  LOG_FATAL("DeleteTempDirCmd is:%s\n", delete_tmpdir_cmd);
664  free(delete_tmpdir_cmd);
665  return 1;
666  }
667 
668  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
669  rc = system(delete_tmpdir_cmd);
670  if (!WIFEXITED(rc)) systemError(__LINE__, rc, delete_tmpdir_cmd)
671  free(delete_tmpdir_cmd);
672 
673  return 0; // succeed to retrieve source
674 }
675 
683 void SetEnv (char *S, char *TempFileDir)
684 {
685  int SLen,GLen; /* lengths for S and global string */
686 
687  GlobalUploadKey = -1;
688  memset(GlobalTempFile,'\0',STRMAX);
689  memset(GlobalURL,'\0',URLMAX);
690  if (!S) return;
691 
692  /* first value is the upload_pk */
693  GlobalUploadKey = atol(S);
694  while(S[0] && isdigit(S[0])) S++;
695  while(S[0] && isspace(S[0])) S++; /* skip spaces */
696 
697 #if 1
698  /* second value is the temp file location */
700  SLen=0;
701  GLen=0;
702  while((GLen < STRMAX-4) && S[SLen] && !isspace(S[SLen]))
703  {
704  if ((S[SLen] == '\'') || isspace(S[SLen]) || !isprint(S[SLen]))
705  {
706  sprintf(GlobalTempFile+GLen,"%%%02x",(unsigned char)(S[SLen]));
707  GLen += 3;
708  }
709  else GlobalTempFile[GLen++] = S[SLen];
710  SLen++;
711  }
712  S+=SLen;
713  while(S[0] && isspace(S[0])) S++; /* skip spaces */
714 #endif
715  if (TempFileDir)
716  {
717  memset(GlobalTempFile,'\0',STRMAX);
718  snprintf(GlobalTempFile,STRMAX-1,"%s/wget.%d",TempFileDir,getpid());
719  }
720 
721  /* third value is the URL location -- taint any single-quotes */
722  SLen=0;
723  GLen=0;
724  while((GLen < STRMAX-4) && S[SLen])
725  {
726  if ((S[SLen] == '\\') && isprint(S[SLen+1])) // in file path, if include '\ ', that mean this file name include spaces
727  {
728  LOG_FATAL("S[SLen] is:%c\n", S[SLen]);
729  GlobalURL[GLen++] = ' ';
730  SLen += 2;
731  continue;
732  }
733  else if ((S[SLen] != '\\') && isspace(S[SLen])) break;
734  else if ((S[SLen] == '\'') || isspace(S[SLen]) || !isprint(S[SLen]))
735  {
736  sprintf(GlobalURL+GLen,"%%%02x",(unsigned char)(S[SLen]));
737  GLen += 3;
738  }
739  else GlobalURL[GLen++] = S[SLen];
740  SLen++;
741  }
742  S+=SLen;
743 
744  while(S[0] && isspace(S[0])) S++; /* skip spaces */
745 
746  char Type[][4] = {"SVN", "Git", "CVS"};
747  int i = 0; // type index
748 
749  memset(GlobalType,'\0',STRMAX);
750  strncpy(GlobalType, S, 3);
751  if ((0 == strcmp(GlobalType, Type[i++])) || (0 == strcmp(GlobalType, Type[i++])) || (0 == strcmp(GlobalType, Type[i++])))
752  {
753  S += 3;
754  }
755  else
756  {
757  memset(GlobalType,'\0',STRMAX);
758  }
759 
760  strncpy(GlobalParam, S, sizeof(GlobalParam) - 1); // get the parameters, kind of " -A rpm -R fosso -l 1* "
761  LOG_VERBOSE0(" upload %ld wget_agent globals loaded:\n upload_pk = %ld\n tmpfile=%s URL=%s GlobalParam=%s\n",GlobalUploadKey, GlobalUploadKey,GlobalTempFile,GlobalURL,GlobalParam);
762 } /* SetEnv() */
763 
764 
772 char *PathCheck(char *DirPath)
773 {
774  char *NewPath;
775  char *subs;
776  char TmpPath[2048];
777  char HostName[2048];
778 
779  NewPath = strdup(DirPath);
780 
781  if ((subs = strstr(NewPath,"%H")) )
782  {
783  /* hostname substitution */
784  gethostname(HostName, sizeof(HostName));
785 
786  *subs = 0;
787  snprintf(TmpPath, sizeof(TmpPath), "%s%s%s", NewPath, HostName, subs+2);
788  free(NewPath);
789  NewPath = strdup(TmpPath);
790  }
791 
792  if ((subs = strstr(NewPath, "%R")) )
793  {
794  /* repo location substitution */
795  *subs = 0;
796 
797  snprintf(TmpPath, sizeof(TmpPath), "%s%s%s", NewPath, fo_config_get(sysconfig, "FOSSOLOGY", "path", NULL), subs+2);
798  free(NewPath);
799  NewPath = strdup(TmpPath);
800  }
801 
802  return(NewPath);
803 }
804 
819 int Archivefs(char *Path, char *TempFile, char *TempFileDir, struct stat Status)
820 {
821  char *cmd;
822  int rc_system = 0;
823  int res;
824 
825  res = asprintf(&cmd , "mkdir -p '%s' >/dev/null 2>&1", TempFileDir);
826  if (res == -1)
827  {
828  ASPRINTF_MEM_ERROR_LOG;
829  return 0;
830  }
831 
832  rc_system = system(cmd);
833  if (!WIFEXITED(rc_system))
834  {
835  LOG_FATAL("[%s:%d] Could not create temporary directory", __FILE__, __LINE__);
836  systemError(__LINE__, rc_system, cmd)
837  free(cmd);
838  return 0;
839  }
840  free(cmd);
841 
842  if (S_ISDIR(Status.st_mode)) /* directory? */
843  {
844  res = asprintf(&cmd, "tar %s -cf '%s' -C '%s' ./ 1>/dev/null", GlobalParam, TempFile, Path);
845  if (res == -1)
846  {
847  ASPRINTF_MEM_ERROR_LOG;
848  return 0;
849  }
850  rc_system = system(cmd);
851  if (!WIFEXITED(rc_system))
852  {
853  systemError(__LINE__, rc_system, cmd)
854  free(cmd);
855  return 0;
856  }
857  free(cmd);
858  } else if (strstr(Path, "*")) // wildcards
859  {
860  /* for the wildcards upload, keep the path */
861  /* copy * files to TempFileDir/temp primarily */
862  res = asprintf(&cmd, "mkdir -p '%s/temp' > /dev/null 2>&1 && cp -r %s '%s/temp' > /dev/null 2>&1", TempFileDir, Path, TempFileDir);
863  if (res == -1)
864  {
865  ASPRINTF_MEM_ERROR_LOG;
866  return 0;
867  }
868  rc_system = system(cmd);
869  if (rc_system != 0)
870  {
871  systemError(__LINE__, rc_system, cmd)
872  free(cmd);
873  return 0;
874  }
875  free(cmd);
876  res = asprintf(&cmd, "tar -cf '%s' -C %s/temp ./ 1> /dev/null && rm -rf %s/temp > /dev/null 2>&1", TempFile, TempFileDir, TempFileDir);
877  if (res == -1)
878  {
879  ASPRINTF_MEM_ERROR_LOG;
880  return 0;
881  }
882  rc_system = system(cmd);
883  if (rc_system != 0)
884  {
885  systemError(__LINE__, rc_system, cmd)
886  free(cmd);
887  return 0;
888  }
889  free(cmd);
890  } else if(S_ISREG(Status.st_mode)) /* regular file? */
891  {
892  res = asprintf(&cmd, "cp '%s' '%s' >/dev/null 2>&1", Path, TempFile);
893  if (res == -1)
894  {
895  ASPRINTF_MEM_ERROR_LOG;
896  return 0;
897  }
898  rc_system = system(cmd);
899  if (rc_system != 0)
900  {
901  systemError(__LINE__, rc_system, cmd)
902  free(cmd);
903  return 0;
904  }
905  free(cmd);
906  } else return 0; /* neither a directory nor a regular file */
907 
908  return 1;
909 }
910 
916 void GetProxy()
917 {
918  int i = 0;
919  int count_temp = 0;
920  char *http_proxy_host = NULL;
921  char *http_proxy_port = NULL;
922  char *http_temp = NULL;
923 
924  for (i = 0; i < 6; i++)
925  {
926  GlobalProxy[i++] = NULL;
927  }
928  GError* error1 = NULL;
929  GError* error2 = NULL;
930  GError* error3 = NULL;
931  GError* error4 = NULL;
932 
933  i = 0;
934  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "http_proxy", &error1);
935  trim(GlobalProxy[i++]);
936  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "https_proxy", &error2);
937  trim(GlobalProxy[i++]);
938  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "ftp_proxy", &error3);
939  trim(GlobalProxy[i++]);
940  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "no_proxy", &error4);
941  trim(GlobalProxy[i++]);
942 
943 
944  if (GlobalProxy[0] && GlobalProxy[0][0])
945  {
946  http_proxy_port = strrchr(GlobalProxy[0], ':');
947  strncpy(GlobalHttpProxy, GlobalProxy[0], (http_proxy_port - GlobalProxy[0]));
948  http_proxy_port++;
949 
950  if (http_proxy_port && http_proxy_port[0])
951  {
952  /* exclude '/' in http_proxy_port and 'http://' in http_proxy_host */
953  http_temp = strchr(http_proxy_port, '/');
954  if (http_temp && http_temp[0])
955  {
956  count_temp = http_temp - http_proxy_port;
957  http_proxy_port[count_temp] = 0;
958  }
960  GlobalProxy[5] = http_proxy_port;
961 
962  http_proxy_host = strrchr(GlobalHttpProxy, '/');
963  if (http_proxy_host && http_proxy_host[0])
964  {
965  http_proxy_host++;
966  GlobalProxy[4] = http_proxy_host;
967  }
968  }
969  }
970 }
971 
976 void Usage(char *Name)
977 {
978  printf("Usage: %s [options] [OBJ]\n",Name);
979  printf(" -h :: help (print this message), then exit.\n");
980  printf(" -i :: Initialize the DB connection then exit (nothing downloaded)\n");
981  printf(" -g group :: Set the group on processed files (e.g., -g fossy).\n");
982  printf(" -G :: Do NOT copy the file to the gold repository.\n");
983  printf(" -d dir :: directory for downloaded file storage\n");
984  printf(" -k key :: upload key identifier (number)\n");
985  printf(" -A acclist :: Specify comma-separated lists of file name suffixes or patterns to accept.\n");
986  printf(" -R rejlist :: Specify comma-separated lists of file name suffixes or patterns to reject.\n");
987  printf(" -l depth :: Specify recursion maximum depth level depth. The default maximum depth is 5.\n");
988  printf(" -c configdir :: Specify the directory for the system configuration.\n");
989  printf(" -C :: run from command line.\n");
990  printf(" -v :: verbose (-vv = more verbose).\n");
991  printf(" -V :: print the version info, then exit.\n");
992  printf(" OBJ :: if a URL is listed, then it is retrieved.\n");
993  printf(" if a file is listed, then it used.\n");
994  printf(" if OBJ and Key are provided, then it is inserted into\n");
995  printf(" the DB and repository.\n");
996  printf(" no file :: process data from the scheduler.\n");
997 } /* Usage() */
998 
1007 {
1008 #define PREFIXMAX 10
1009 
1010  const char needle[] = " ";
1011  const char needle2[] = "//";
1012  int index = 0;
1013  char *username = NULL;
1014  char *password = NULL;
1015  char http[PREFIXMAX] = "";
1016  char URI[FILEPATH] = "";
1017  char *token = NULL;
1018  char *temp = NULL;
1019  char *additionalParams = NULL;
1020 
1021  if (strstr(GlobalParam, "password") && strstr(GlobalParam, "username"))
1022  {
1023  temp = strstr(GlobalURL, needle2);
1024  if (!temp || (temp - GlobalURL) < 3)
1025  {
1026  return;
1027  }
1028  strcpy(URI, temp + 2);
1029  if (strlen(GlobalURL) - strlen(URI) > PREFIXMAX - 1)
1030  {
1031  return;
1032  }
1033 
1034  strncpy(http, GlobalURL, strlen(GlobalURL) - strlen(URI));
1035  /* get the first token */
1036  token = strtok(GlobalParam, needle);
1037  /* walk through other tokens */
1038  while( token != NULL )
1039  {
1040  if (1 == index) username = token;
1041  if (3 == index) {
1042  password = token;
1043  additionalParams = token + strlen(token) + 1;
1044  break;
1045  }
1046  token = strtok(NULL, needle);
1047  index++;
1048  }
1049  snprintf(GlobalURL, URLMAX-1, "%s%s:%s@%s", http, username, password, URI);
1050 
1051  if (strlen(additionalParams) > 0) {
1052  memmove(GlobalParam, additionalParams, strlen(additionalParams) +1);
1053  }
1054  else {
1055  memset(GlobalParam,'\0',STRMAX);
1056  }
1057  }
1058 }
1059 
1064 {
1065  const char needle[] = " ";
1066  int index = 0;
1067  int secondIndex = 0;
1068  char *username = NULL;
1069  char *token = NULL;
1070  char newParam[STRMAX];
1071  char *beg = NULL;
1072  char *end = NULL;
1073 
1074  memset(newParam, '\0', STRMAX);
1075  // SVN if parameters exists
1076  if (strstr(GlobalParam, "password") && strstr(GlobalParam, "username")) {
1077  /* get the first token */
1078  token = strtok(GlobalParam, needle);
1079  /* walk through other tokens */
1080  while( token != NULL )
1081  {
1082  if (1 == index) { //username is the first parameter
1083  username = token;
1084  break;
1085  }
1086  token = strtok(NULL, needle);
1087  index++;
1088  }
1089  // Create new parameters with masked password
1090  sprintf(newParam, " --username %s --password ****", username);
1091  memset(GlobalParam, '\0', STRMAX);
1092  strcpy(GlobalParam, newParam);
1093  }
1094  // GIT
1095  else {
1096  // First : from http://
1097  index = strcspn(GlobalURL, ":");
1098  // Second after username
1099  secondIndex = strcspn(GlobalURL + index + 1, ":");
1100  index = index + secondIndex + 1;
1101  if(index < strlen(GlobalURL)) { // Contains second :
1102  beg = (char *)malloc(index + 2);
1103  memset(beg, '\0', index + 2);
1104  strncpy(beg, GlobalURL, index + 1);
1105  // Place where password ends
1106  end = strchr(GlobalURL, '@');
1107  sprintf(newParam, "%s****%s", beg, end);
1108  strcpy(GlobalURL, newParam);
1109  }
1110  }
1111 }
1112 
1118 char* GetVersionControlCommand(int withPassword)
1119 {
1120  char Type[][4] = {"SVN", "Git", "CVS"};
1121  char *command;
1122  char *tmpfile_dir;
1123  int res;
1124 
1126  res = asprintf(&tmpfile_dir, "%s.dir", GlobalTempFile);
1127  if (res == -1)
1128  {
1129  return NULL;
1130  }
1131 
1132  if(withPassword < 0) MaskPassword();
1133  if (0 == strcmp(GlobalType, Type[0]))
1134  {
1135  if (GlobalProxy[0] && GlobalProxy[0][0])
1136  {
1137  res = asprintf(&command, "svn --config-option servers:global:http-proxy-host=%s --config-option servers:global:http-proxy-port=%s export %s %s %s --no-auth-cache >/dev/null 2>&1", GlobalProxy[4], GlobalProxy[5], GlobalURL, GlobalParam, tmpfile_dir);
1138  }
1139  else
1140  {
1141  res = asprintf(&command, "svn export %s %s %s --no-auth-cache >/dev/null 2>&1", GlobalURL, GlobalParam, tmpfile_dir);
1142  }
1143  }
1144  else if (0 == strcmp(GlobalType, Type[1]))
1145  {
1147  if (GlobalProxy[0] && GlobalProxy[0][0])
1148  {
1149  res = asprintf(&command, "git config --global http.proxy %s && git clone %s %s %s && rm -rf %s/.git", GlobalProxy[0], GlobalURL, GlobalParam, tmpfile_dir, tmpfile_dir);
1150  }
1151  else
1152  {
1153  res = asprintf(&command, "git clone %s %s %s >/dev/null 2>&1 && rm -rf %s/.git", GlobalURL, GlobalParam, tmpfile_dir, tmpfile_dir);
1154  }
1155  }
1156  if (res == -1)
1157  {
1158  free(tmpfile_dir);
1159  return NULL;
1160  }
1161 
1162  return command;
1163 }
char * SumToString(Cksum *Sum)
Return string representing a Cksum. NOTE: The calling function must free() the string!
Definition: checksum.c:237
Cksum * SumComputeFile(FILE *Fin)
Compute the checksum, allocate and return a string containing the sum value.
Definition: checksum.c:115
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:690
char * fo_config_get(fo_conf *conf, const char *group, const char *key, GError **error)
Gets an element based on its group name and key name. If the group or key is not found,...
Definition: fossconfig.c:336
int fo_checkPQresult(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres SELECT.
Definition: libfossdb.c:170
int fo_checkPQcommand(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres commands (not select) If an error occured, write the error to s...
Definition: libfossdb.c:204
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:352
int fo_RepImport(char *Source, char *Type, char *Filename, int Link)
Import a file into the repository.
Definition: libfossrepo.c:812
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
fo_conf * sysconfig
Store check sum of a file.
Definition: checksum.h:33
uint64_t DataLen
Size of the file.
Definition: checksum.h:36
int IsFile(char *Fname, int Link)
Given a filename, is it a file?
Definition: wget_agent.c:38
char GlobalParam[STRMAX]
Additional parameters.
Definition: wget_agent.c:27
void Usage(char *Name)
Here are some suggested options.
Definition: wget_agent.c:976
char * GetVersionControlCommand(int withPassword)
get the command to run to get files from version control system
Definition: wget_agent.c:1118
char GlobalType[STRMAX]
Type of download (FILE/version control)
Definition: wget_agent.c:26
int GetURL(char *TempFile, char *URL, char *TempFileDir)
Do the wget.
Definition: wget_agent.c:328
char * GlobalProxy[6]
Proxy from fossology.conf.
Definition: wget_agent.c:28
int GetPosition(char *URL)
Get the position (ending + 1) of http|https|ftp:// of one url.
Definition: wget_agent.c:66
char GlobalHttpProxy[STRMAX]
HTTP proxy command to use.
Definition: wget_agent.c:29
int TaintURL(char *Sin, char *Sout, int SoutSize)
Given a URL string, taint-protect it.
Definition: wget_agent.c:269
char * PrepareWgetDest(char *TempFile, char *TempFileDir, char *TempFileDirectory)
Prepare directory for wget.
Definition: wget_agent.c:300
PGconn * pgConn
For the DB.
Definition: wget_agent.c:22
char SQL[STRMAX]
For DB.
Definition: wget_agent.c:20
char GlobalURL[URLMAX]
URL to download.
Definition: wget_agent.c:25
int GetVersionControl()
Get source code from version control system.
Definition: wget_agent.c:566
void SetEnv(char *S, char *TempFileDir)
Convert input pairs into globals.
Definition: wget_agent.c:683
long GlobalUploadKey
Input for this system.
Definition: wget_agent.c:23
gid_t ForceGroup
Set to group id to be used for download files.
Definition: wget_agent.c:31
void MaskPassword()
Get the username from GlobalParam and create new parameters without password.
Definition: wget_agent.c:1063
void SafeExit(int rc)
Closes the connection to the server, free the database connection, and exit.
Definition: wget_agent.c:53
void replace_url_with_auth()
Translate authentication of git clone.
Definition: wget_agent.c:1006
char GlobalTempFile[STRMAX]
Temp file to be used.
Definition: wget_agent.c:24
char * PathCheck(char *DirPath)
Check if path contains a "%H", "%R".
Definition: wget_agent.c:772
void DBLoadGold()
Insert a file into the database and repository.
Definition: wget_agent.c:81
void GetProxy()
Get proxy from fossology.conf.
Definition: wget_agent.c:916
int GlobalImportGold
Set to 0 to not store file in gold repository.
Definition: wget_agent.c:30
int Archivefs(char *Path, char *TempFile, char *TempFileDir, struct stat Status)
Copy downloaded files to temporary directory.
Definition: wget_agent.c:819