7 #include <cppunit/TestFixture.h>
8 #include <cppunit/extensions/HelperMacros.h>
11 #include "regscan.hpp"
12 #include "copyrightUtils.hpp"
13 #include "cleanEntries.hpp"
25 ostream&
operator<<(ostream& out,
const list<match>& l)
27 for (
auto m = l.begin(); m != l.end(); ++m)
28 out <<
'[' << m->start <<
':' << m->end <<
':' << m->type <<
']';
35 const char testContent[] =
"© 2007 Hugh Jackman\n\n"
36 "Copyright 2004 my company\n\n"
37 "Copyrights by any strange people\n\n"
38 "(C) copyright 2007-2011, 2013 my favourite company Google\n\n"
39 "(C) 2007-2011, 2013 my favourite company Google\n\n"
40 "if (c) { return -1 } \n\n"
41 "Written by: me, myself and Irene.\n\n"
42 "Authors all the people at ABC\n\n"
43 "<author>Author1</author>"
44 "<head>All the people</head>"
45 "<author>Author1 Author2 Author3</author>"
46 "<author>Author4</author><b>example</b>"
48 "This file is protected under pants 1 , 2 ,3\n\n"
49 "Do not modify this document\n\n"
50 "the shuttle is a space vehicle designed by NASA\n\n"
51 "visit http://mysite.org/FAQ or write to info@mysite.org\n\n"
52 "maintained by benjamin drieu <benj@debian.org>\n\n"
53 "* Copyright (c) 1989, 1993\n"
54 "* The Regents of the University of California. All rights reserved.\n\n"
55 "to be licensed as a whole"
56 "/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */";
60 CPPUNIT_TEST (copyscannerTest);
61 CPPUNIT_TEST (copyscannerDotPrefixedNameTest);
62 CPPUNIT_TEST (copyscannerBareKeywordDiscardTest);
63 CPPUNIT_TEST (copyscannerCopyrightedStatementTest);
64 CPPUNIT_TEST (copyscannerBinaryNoiseTest);
65 CPPUNIT_TEST (copyscannerSpdxFullLineTest);
66 CPPUNIT_TEST (copyscannerSpdxArrayTest);
67 CPPUNIT_TEST (copyscannerProseExceptionTest);
68 CPPUNIT_TEST (regAuthorTest);
69 CPPUNIT_TEST (regIpraTest);
70 CPPUNIT_TEST (regEccTest);
71 CPPUNIT_TEST (regUrlTest);
72 CPPUNIT_TEST (regEmailTest);
73 CPPUNIT_TEST (regKeywordTest);
74 CPPUNIT_TEST (cleanEntries);
76 CPPUNIT_TEST_SUITE_END ();
86 void scannerTest (
const scanner& sc,
const char* content,
const string& type, list<const char*> expectedStrings)
92 for (
auto s = expectedStrings.begin();
s != expectedStrings.end(); ++
s)
94 const char * p = strstr(content, *
s);
97 int pos = p - content;
98 expected.push_back(
match(pos, pos+strlen(*
s), type));
102 CPPUNIT_ASSERT_EQUAL(expected, matches);
118 scannerTest(sc, testContent,
"statement", {
"© 2007 Hugh Jackman",
119 "Copyright 2004 my company",
120 "Copyrights by any strange people",
121 "(C) copyright 2007-2011, 2013 my favourite company Google",
122 "(C) 2007-2011, 2013 my favourite company Google",
123 "Copyright (c) 1989, 1993\n* The Regents of the University of California."
135 const char* content1 =
"Copyright \xc2\xa9 .NET Foundation and contributors\n";
136 scannerTest(sc, content1,
"statement",
137 {
"Copyright \xc2\xa9 .NET Foundation and contributors"});
140 const char* content2 =
"\xc2\xa9 .NET Foundation and contributors\n";
141 scannerTest(sc, content2,
"statement",
142 {
"\xc2\xa9 .NET Foundation and contributors"});
145 const char* content3 =
"Copyright \xc2\xa9 2021 .NET Foundation\n";
146 scannerTest(sc, content3,
"statement",
147 {
"Copyright \xc2\xa9 2021 .NET Foundation"});
159 const char* bare[] = {
166 for (
int i = 0; bare[i]; ++i)
170 CPPUNIT_ASSERT_MESSAGE(
171 string(
"Expected no match for bare keyword: ") + bare[i],
176 const char*
valid[] = {
177 "Copyright 2021 .NET Foundation\n",
178 "Copyright (c) 2004 My Company\n",
179 "Copyright \xc2\xa9 .NET Foundation and contributors\n",
182 for (
int i = 0;
valid[i]; ++i)
186 CPPUNIT_ASSERT_MESSAGE(
187 string(
"Expected one match for valid copyright: ") +
valid[i],
188 matches.size() == 1);
202 const char*
valid[] = {
203 "Copyrighted (C) 1994 Normunds Saumanis (normunds@rx.tech.swh.lv)\n",
204 "Copyrighted (C) 1994, 1995, 1996 Normunds Saumanis (normunds@fi.ibm.com)\n",
205 "copyrighted (C) 1993 by Hartmut Schirmer\n",
206 "copyrighted 1992 by Mark Adler version c10p1, 10 January 1993\n",
207 "copyrighted 1990 Mark Adler\n",
208 "Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)\n",
209 "Copyright (C) Torres Martinez\n",
210 "Copyright (C) Tobias Schmidt\n",
211 "Copyright (C) Tomas Novak\n",
214 for (
int i = 0;
valid[i]; ++i)
219 CPPUNIT_ASSERT_MESSAGE(
220 string(
"Expected active match for: ") +
valid[i],
221 !matches.empty() && matches.front().is_enabled);
236 const char* noise[] = {
238 "\xc2\xa9 sjw\xc2\xa8noise\n",
239 "\xc2\xa9 OMr\xc2\xa5more\n",
240 "\xc2\xa9 tGa\xc3\x89garbage\n",
242 "\xc2\xa9 ZgU,\xc2\xb5garbage\n",
243 "\xc2\xa9 VJs0\xc3\x93noise\n",
245 "\xc2\xa9 NuXnHl{\xc2\xa4" "noise\n",
247 "\xc2\xa9 KtCtdy\x22s\xc3\xa8noise\n",
251 for (
int i = 0; noise[i]; ++i)
255 bool hasActive = !matches.empty() && matches.front().is_enabled;
256 CPPUNIT_ASSERT_MESSAGE(
257 string(
"Expected no active match for binary noise string #") + to_string(i),
273 const char content[] =
274 "SPDX-FileCopyrightText = [\n"
275 "\"2026 Fraunhofer-Institut f\xC3\xBCr Produktionstechnik und Automatisierung IPA\",\n"
276 "\"2026 Hilscher Gesellschaft f\xC3\xBCr Systemautomation mbH\",\n"
277 "\"2026 Siemens AG\",\n"
283 CPPUNIT_ASSERT_EQUAL_MESSAGE(
"Expected 4 matches total", (
size_t)4, matches.size());
285 auto it = matches.begin();
286 CPPUNIT_ASSERT_MESSAGE(
"Header must be inactive", !it->is_enabled);
288 for (
int i = 1; i <= 3; ++i, ++it)
289 CPPUNIT_ASSERT_MESSAGE(
290 string(
"Array element ") + to_string(i) +
" must be active", it->is_enabled);
296 const char content[] =
297 "SPDX-FileCopyrightText = [\n"
298 "\"2026 Company A\",\n"
299 "\"2026 Company B\",\n"
301 "Copyright 2021 Google LLC\n";
306 bool foundGoogle =
false;
307 for (
auto& m : matches) {
308 int len = m.end - m.start;
309 if (len > 0 && strncmp(content + m.start,
"Copyright 2021 Google", 21) == 0)
312 CPPUNIT_ASSERT_MESSAGE(
313 "Copyright after unclosed SPDX array must still be detected", foundGoogle);
328 const char content[] =
329 "// SPDX-FileCopyrightText: 2026 Fraunhofer-Institut f\xC3\xBCr Produktionstechnik und Automatisierung IPA\n"
330 "// SPDX-FileCopyrightText: 2026 Hilscher Gesellschaft f\xC3\xBCr Systemautomation mbH\n"
331 "// SPDX-FileCopyrightText: 2026 Siemens AG\n";
336 CPPUNIT_ASSERT_EQUAL_MESSAGE(
"Expected 3 SPDX matches", (
size_t)3, matches.size());
339 const char* lineStarts[3];
340 lineStarts[0] = strstr(content,
"SPDX-FileCopyrightText: 2026 Fraunhofer");
341 lineStarts[1] = strstr(content,
"SPDX-FileCopyrightText: 2026 Hilscher");
342 lineStarts[2] = strstr(content,
"SPDX-FileCopyrightText: 2026 Siemens");
345 for (
auto& m : matches)
347 CPPUNIT_ASSERT_MESSAGE(
348 string(
"Match ") + to_string(i) +
" must be active",
351 int expectedStart = lineStarts[i] - content;
352 int expectedEnd = (int)(strchr(lineStarts[i],
'\n') - content);
354 CPPUNIT_ASSERT_EQUAL_MESSAGE(
355 string(
"Match ") + to_string(i) +
" start",
356 expectedStart, m.start);
357 CPPUNIT_ASSERT_EQUAL_MESSAGE(
358 string(
"Match ") + to_string(i) +
" end must reach line end",
382 const char* prose[] = {
383 "copyrights appearing in this test file\n",
384 "copyrights appears in the documentation\n",
385 "COPYRIGHT TO DETECT This section uses the standard header\n",
386 "copyright work that can be distributed\n",
387 "copyright protection under the terms of this License\n",
388 "copyrighted interfaces, the original copyright holder\n",
389 "copyright in the work, if the License is applied\n",
390 "copyright of this Package, but belong to whoever generated\n",
391 "copyright on material distributed under this License\n",
394 for (
int i = 0; prose[i]; ++i)
398 bool hasActive = !matches.empty() && matches.front().is_enabled;
399 CPPUNIT_ASSERT_MESSAGE(
400 string(
"Expected no active match for prose: ") + prose[i],
405 const char*
valid[] = {
406 "Copyright (C) 2021 Toronto Inc.\n",
407 "Copyright 2021 Workday Inc.\n",
408 "Copyright (C) 2021 Interface Logic Inc.\n",
409 "Copyright (C) 2021 In-N-Out Burgers\n",
412 for (
int i = 0;
valid[i]; ++i)
416 CPPUNIT_ASSERT_MESSAGE(
417 string(
"Expected active match for: ") +
valid[i],
418 !matches.empty() && matches.front().is_enabled);
432 scannerTest(sc, testContent,
"author", {
433 "Written by: me, myself and Irene.",
434 "Authors all the people at ABC",
436 "Author1 Author2 Author3",
438 "maintained by benjamin drieu <benj@debian.org>"
451 scannerTest(sc, testContent,
"ipra", {
"US patents 1 , 2 ,3" });
463 scannerTest(sc, testContent,
"ecc", {
"space vehicle designed by NASA" });
475 scannerTest(sc, testContent,
"url", {
"http://mysite.org/FAQ" });
487 scannerTest(sc, testContent,
"email", {
"info@mysite.org",
"benj@debian.org" });
499 scannerTest(sc, testContent,
"keyword", {
"patent",
"licensed as",
"stolen from"});
512 string actualFileContent;
515 vector<string> binaryStrings;
516 std::stringstream *ss =
new std::stringstream(actualFileContent);
519 while (std::getline(*ss, temp)) {
520 binaryStrings.push_back(temp);
524 vector<match> matches;
526 int size = binaryStrings.size();
527 for (
int i = 0; i < size; i++)
529 int length = binaryStrings[i].length();
531 match(pos, pos + length,
"statement"));
536 string expectedFileContent;
540 ss =
new std::stringstream(expectedFileContent);
541 vector<string> expectedStrings;
542 while (std::getline(*ss, temp)) {
543 expectedStrings.push_back(temp);
546 vector<string> actualStrings;
547 for (
size_t i = 0; i < matches.size(); i ++)
549 actualStrings.push_back(cleanMatch(actualFileContent, matches[i]));
552 CPPUNIT_ASSERT(expectedStrings == actualStrings);
Implementation of scanner class for copyright.
void ScanString(const string &s, list< match > &results) const
Scan a given string for copyright statements.
Provides a regex scanner using predefined regexs.
void regIpraTest()
Test Ipra scanner.
void copyscannerCopyrightedStatementTest()
Regression: "copyrighted" statements and names like "Tom" must not be falsely deactivated by REG_EXCE...
void copyscannerBinaryNoiseTest()
Regression: binary-file content with a short ASCII prefix before non-ASCII bytes must not be reported...
void regKeywordTest()
Test copyright scanner for keywords.
void copyscannerSpdxFullLineTest()
Regression: SPDX-FileCopyrightText entries must be detected as individual single-line statements and ...
void scannerTest(const scanner &sc, const char *content, const string &type, list< const char * > expectedStrings)
Runs scanner on content and check matches against expectedStrings.
void copyscannerBareKeywordDiscardTest()
Test that bare copyright keywords produce no matches.
void copyscannerDotPrefixedNameTest()
Test copyright scanner with dot-prefixed names like .NET Foundation.
void regUrlTest()
Test copyright scanner for URL.
void regEmailTest()
Test copyright scanner for email.
void copyscannerTest()
Test copyright scanner.
void regEccTest()
Test ECC scanner.
void copyscannerProseExceptionTest()
Regression: license-prose strings that contain "copyright" as a common noun must be deactivated,...
void regAuthorTest()
Test copyright scanner for author.
void cleanEntries()
Test cleanMatch() to remove non-UTF8 text and extra spaces.
void copyscannerSpdxArrayTest()
Regression: SPDX-FileCopyrightText = [...] TOML array format must yield one active match per quoted e...
Abstract class to provide interface to scanners.
virtual void ScanString(const string &s, list< match > &results) const =0
Scan the given string and add matches to results.
int s
The socket that the CLI will use to communicate.
int valid
If the information stored in buffer is valid.
bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
Store the results of a regex match.
std::ostream & operator<<(std::ostream &os, const std::vector< int > &x)
<< operator overload to appends a vector to an ostream object