FOSSology  4.4.0
Open Source License Compliance by Open Source Software
mktop1k.php
1 #!/usr/bin/php
2 
3 <?php
4 /*
5  mktop1k.php
6  SPDX-FileCopyrightText: © 2007 Hewlett-Packard Development Company, L.P.
7 
8  SPDX-License-Identifier: GPL-2.0-only
9 */
10 
27 // FIXME: this should bet a global from pathinclude? $LIBDIR = '/usr/local/lib';
28 require_once("FIXMETOBERELATIVE/pathinclude.php");
29 require_once("$LIBDIR/lib_projxml.h.php");
30 //require_once("./lib_projxml.h.php"); // dev copy
31 
32 
33 $usage = <<< USAGE
34 Usage: mktop1k [-h] -i <in-file> -o <out-file> [-n nnn]
35  Where: -h optional help, displays this message
36  <in-file> path to an uncompressed Freshmeat rdf XML file
37  <out-file> path to filename where the xml output will be generated.
38  -n nnn optional parameter to indicate how many projects to
39  extract.
40 
41  Default is 1000.
42 
43  The projects are always extracted in priority order.
44  For example, -n 10 will get the top 10 Freshmeat packages.
45  A range of numbers is not supported.
46 
47 USAGE;
48 
49 if ($argc <= 4) {
50  echo $usage;
51  exit(1);
52 }
53 
54 // default number of projects to get.
55 $HowMany_projects = 1000;
56 
57 for ($i = 1; $i < $argc; $i++) {
58  switch ($argv[$i]) {
59  case '-i':
60  $i++;
61  if (isset($argv[$i])) {
62  $in_file = $argv[$i];
63  }
64  else {
65  die("ERROR: Must specify an uncompressed filename after -i");
66  }
67  break;
68  case '-h':
69  echo $usage;
70  exit(0);
71  break;
72  case '-n':
73  $i++;
74  if (isset($argv[$i])) {
75  $HowMany_projects = (int) $argv[$i];
76  }
77  else {
78  die("ERROR: Must specify a number between 1-1000 after -n");
79  }
80  break;
81  case '-o':
82  $i++;
83  if (isset($argv[$i])) {
84  $out_file = $argv[$i];
85  }
86  else {
87  die("ERROR: Must specify an uncompressed filename after -o");
88  }
89  break;
90  default:
91  die("ERROR: Unknown argument: $argv[$i]\n$usage");
92  break;
93  }
94 }
95 
96 $F1 = fopen("$in_file", 'r') or die("can't open file: $php_errormsg\n");
97 
98 /* look for the top 1000 projects, when found, write the project
99  entry to a file.
100 
101  NOTE: I'm bothered by something here... while one gets the top
102  1000, there could be drastic differences (not likely between any two
103  days, but possible)....It doesn't really affect this code, but could
104  affect users of the output files.
105  */
106 
107 $Output = fopen("$out_file", 'w') or die("Can' open: $php_errormsg\n");
108 
109 echo "Extracting the top $HowMany_projects projects from:\n$in_file\n";
110 echo "\nWriting the top $HowMany_projects projects to: $out_file\n";
111 
112 // need a valid doc, write the header 1st, and open tag
113 write_hdr($Output);
114 
115 while(false != ($line = fgets($F1, 1024))) {
116  # echo "Line is:\n$line\n";
117 
118  if (preg_match('/<project>/', $line)) {
119  $proj_mark = ftell($F1);
120  }
121  elseif (preg_match('/<popularity_rank>[0-9].*</', $line)) {
122  $pos = strpos($line, '>');
123  $rank_pos = $pos + 1;
124  $rank_end = strpos($line, '</', $rank_pos);
125  $rank_len = $rank_end - $rank_pos;
126  $rank = substr($line, $rank_pos, $rank_len);
127  if ((int)$rank <= $HowMany_projects){
128  //pdbg("Processing rank:$rank");
129  write_entry($F1, $proj_mark, $Output);
130  }
131  }
132 
133 }
134 
135 // write the end tag and close up shop
136 
137 close_tag($Output);
138 fclose($F1);
139 fclose($Output);
140 
141 echo "Done\n";
char A[MAXCMD]
input for this system
Definition: finder.c:26
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:63