FOSSology  4.4.0
Open Source License Compliance by Open Source Software
test_scanners.cc
1 /*
2  SPDX-FileCopyrightText: © 2014-15, 2018 Siemens AG
3 
4  SPDX-License-Identifier: GPL-2.0-only
5 */
6 
7 #include <cppunit/TestFixture.h>
8 #include <cppunit/extensions/HelperMacros.h>
9 
10 #include "regex.hpp"
11 #include "regscan.hpp"
12 #include "copyrightUtils.hpp"
13 #include "cleanEntries.hpp"
14 #include <list>
15 #include <cstring>
16 #include <ostream>
17 
18 using namespace std;
19 
25 ostream& operator<<(ostream& out, const list<match>& l)
26 {
27  for (auto m = l.begin(); m != l.end(); ++m)
28  out << '[' << m->start << ':' << m->end << ':' << m->type << ']';
29  return out;
30 }
31 
35 const char testContent[] = "© 2007 Hugh Jackman\n\n"
36  "Copyright 2004 my company\n\n"
37  "Copyrights by any strange people\n\n"
38  "(C) copyright 2007-2011, 2013 my favourite company Google\n\n"
39  "(C) 2007-2011, 2013 my favourite company Google\n\n"
40  "if (c) { return -1 } \n\n"
41  "Written by: me, myself and Irene.\n\n"
42  "Authors all the people at ABC\n\n"
43  "<author>Author1</author>"
44  "<head>All the people</head>"
45  "<author>Author1 Author2 Author3</author>"
46  "<author>Author4</author><b>example</b>"
47  "Apache\n\n"
48  "This file is protected under pants 1 , 2 ,3\n\n"
49  "Do not modify this document\n\n"
50  "the shuttle is a space vehicle designed by NASA\n\n"
51  "visit http://mysite.org/FAQ or write to info@mysite.org\n\n"
52  "maintained by benjamin drieu <benj@debian.org>\n\n"
53  "* Copyright (c) 1989, 1993\n" // Really just one newline here!
54  "* The Regents of the University of California. All rights reserved.\n\n"
55  "to be licensed as a whole"
56  "/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */";
57 
58 class scannerTestSuite : public CPPUNIT_NS :: TestFixture {
59  CPPUNIT_TEST_SUITE (scannerTestSuite);
60  CPPUNIT_TEST (copyscannerTest);
61  CPPUNIT_TEST (regAuthorTest);
62  CPPUNIT_TEST (regIpraTest);
63  CPPUNIT_TEST (regEccTest);
64  CPPUNIT_TEST (regUrlTest);
65  CPPUNIT_TEST (regEmailTest);
66  CPPUNIT_TEST (regKeywordTest);
67  CPPUNIT_TEST (cleanEntries);
68 
69  CPPUNIT_TEST_SUITE_END ();
70 
71 private:
79  void scannerTest (const scanner& sc, const char* content, const string& type, list<const char*> expectedStrings)
80  {
81  list<match> matches;
82  list<match> expected;
83  sc.ScanString(content, matches);
84 
85  for (auto s = expectedStrings.begin(); s != expectedStrings.end(); ++s)
86  {
87  const char * p = strstr(content, *s);
88  if (p)
89  {
90  int pos = p - content;
91  expected.push_back(match(pos, pos+strlen(*s), type));
92  }
93  // else: expected string is not contained in original string
94  }
95  CPPUNIT_ASSERT_EQUAL(expected, matches);
96  }
97 
98 protected:
107  {
108  // Test copyright matcher
110 
111  scannerTest(sc, testContent, "statement", { "© 2007 Hugh Jackman",
112  "Copyright 2004 my company",
113  "Copyrights by any strange people",
114  "(C) copyright 2007-2011, 2013 my favourite company Google",
115  "(C) 2007-2011, 2013 my favourite company Google",
116  "Copyright (c) 1989, 1993\n* The Regents of the University of California. All rights reserved."
117  });
118  }
119 
128  {
129  regexScanner sc("author", "copyright");
130  scannerTest(sc, testContent, "author", {
131  "Written by: me, myself and Irene.",
132  "Authors all the people at ABC",
133  "Author1",
134  "Author1 Author2 Author3",
135  "Author4",
136  "maintained by benjamin drieu <benj@debian.org>"
137  });
138  }
139 
147  void regIpraTest () {
148  regexScanner sc("ipra", "ipra");
149  scannerTest(sc, testContent, "ipra", { "US patents 1 , 2 ,3" });
150  }
151 
159  void regEccTest () {
160  regexScanner sc("ecc", "ecc");
161  scannerTest(sc, testContent, "ecc", { "space vehicle designed by NASA" });
162  }
163 
171  void regUrlTest () {
172  regexScanner sc("url", "copyright");
173  scannerTest(sc, testContent, "url", { "http://mysite.org/FAQ" });
174  }
175 
183  void regEmailTest () {
184  regexScanner sc("email", "copyright",1);
185  scannerTest(sc, testContent, "email", { "info@mysite.org", "benj@debian.org" });
186  }
187 
195  void regKeywordTest () {
196  regexScanner sc("keyword", "keyword");
197  scannerTest(sc, testContent, "keyword", {"patent", "licensed as", "stolen from"});
198  }
199 
208  void cleanEntries () {
209  // Binary content
210  string actualFileContent;
211  ReadFileToString("../testdata/testdata142", actualFileContent);
212 
213  vector<string> binaryStrings;
214  std::stringstream *ss = new std::stringstream(actualFileContent);
215  string temp;
216 
217  while (std::getline(*ss, temp)) {
218  binaryStrings.push_back(temp);
219  }
220 
221  // Simulate matches. Each line is a match
222  vector<match> matches;
223  int pos = 0;
224  int size = binaryStrings.size();
225  for (int i = 0; i < size; i++)
226  {
227  int length = binaryStrings[i].length();
228  matches.push_back(
229  match(pos, pos + length, "statement"));
230  pos += length + 1;
231  }
232 
233  // Expected data
234  string expectedFileContent;
235  ReadFileToString("../testdata/testdata142_exp", expectedFileContent);
236 
237  delete(ss);
238  ss = new std::stringstream(expectedFileContent);
239  vector<string> expectedStrings;
240  while (std::getline(*ss, temp)) {
241  expectedStrings.push_back(temp);
242  }
243 
244  vector<string> actualStrings;
245  for (size_t i = 0; i < matches.size(); i ++)
246  {
247  actualStrings.push_back(cleanMatch(actualFileContent, matches[i]));
248  }
249 
250  CPPUNIT_ASSERT(expectedStrings == actualStrings);
251  }
252 };
253 
254 CPPUNIT_TEST_SUITE_REGISTRATION( scannerTestSuite );
Implementation of scanner class for copyright.
Definition: copyscan.hpp:18
Provides a regex scanner using predefined regexs.
Definition: regscan.hpp:21
void regIpraTest()
Test Ipra scanner.
void regKeywordTest()
Test copyright scanner for keywords.
void scannerTest(const scanner &sc, const char *content, const string &type, list< const char * > expectedStrings)
Runs scanner on content and check matches against expectedStrings.
void regUrlTest()
Test copyright scanner for URL.
void regEmailTest()
Test copyright scanner for email.
void copyscannerTest()
Test copyright scanner.
void regEccTest()
Test ECC scanner.
void regAuthorTest()
Test copyright scanner for author.
void cleanEntries()
Test cleanMatch() to remove non-UTF8 text and extra spaces.
Abstract class to provide interface to scanners.
Definition: scanners.hpp:52
virtual void ScanString(const string &s, list< match > &results) const =0
Scan the given string and add matches to results.
int s
The socket that the CLI will use to communicate.
Definition: fo_cli.c:37
bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
Definition: scanners.cc:21
Store the results of a regex match.
Definition: scanners.hpp:28
std::ostream & operator<<(std::ostream &os, const std::vector< int > &x)
<< operator overload to appends a vector to an ostream object
Definition: testUtils.hpp:27