8 #include "copyscan.hpp"
11 #include "regexConfProvider.hpp"
13 const string copyrightType(
"statement");
26 rx::regex_constants::icase);
29 rx::regex_constants::icase);
33 rx::regex_constants::icase);
35 rx::regex_constants::icase);
38 regExceptionCopy = rx::regex(rcp.
getRegexValue(
"copyright",
"REG_EXCEPTION_COPY"),
39 rx::regex_constants::icase);
40 regRemoveFileStmt = rx::regex(rcp.
getRegexValue(
"copyright",
"REG_REMOVE_FILE_STATEMENT"),
41 rx::regex_constants::icase);
42 regStripLicenseTrail = rx::regex(rcp.
getRegexValue(
"copyright",
"REG_STRIP_LICENSE_TRAIL"),
43 rx::regex_constants::icase);
44 regStripTrademarkTrail = rx::regex(rcp.
getRegexValue(
"copyright",
"REG_STRIP_TRADEMARK_TRAIL"),
45 rx::regex_constants::icase);
46 regStripAllRightReserveTrail = rx::regex(rcp.
getRegexValue(
"copyright",
"REG_ALL_RIGHT_RESERVE_TRAIL"),
47 rx::regex_constants::icase);
62 string::const_iterator begin =
s.begin();
63 string::const_iterator pos = begin;
64 string::const_iterator end =
s.end();
72 string::const_iterator foundPos = results[0].first;
86 string::const_iterator j = find(foundPos, end,
'\n');
89 string::const_iterator beginOfLine = j;
91 string::const_iterator endOfLine = find(beginOfLine, end,
'\n');
97 || !rx::regex_match(beginOfLine, endOfLine,
regNonBlank))
104 string raw = string(foundPos, j);
105 string cleaned = Cleanup(raw);
107 if (cleaned.empty()) {
112 if (cleaned.size() > 300)
113 cleaned = cleaned.substr(0, 300);
115 out.push_back(
match(foundPos - begin, (foundPos - begin) + cleaned.size(), copyrightType));
121 pos = results[0].second;
126 string hCopyrightScanner::Cleanup(
const string &raw)
const {
127 if (rx::regex_search(raw, regExceptionCopy)) {
130 if (rx::regex_match(raw, regRemoveFileStmt)) {
133 string cleaned = raw;
134 cleaned = rx::regex_replace(cleaned, regStripLicenseTrail,
"");
135 cleaned = rx::regex_replace(cleaned, regStripTrademarkTrail,
"");
136 cleaned = rx::regex_replace(cleaned, regStripAllRightReserveTrail,
"");
138 RemoveNoisePatterns(cleaned);
139 TrimPunctuation(cleaned);
140 NormalizeCopyright(cleaned);
141 StripSuffixes(cleaned);
146 void hCopyrightScanner::TrimPunctuation(
string &text)
const{
147 const string trimCharsAll =
",\'\"-:;&@!";
148 const string trimStartOnly =
".>)]\\/";
149 const string trimEndOnly =
"<([\\/";
151 size_t start = text.find_first_not_of(trimCharsAll);
152 size_t end = text.find_last_not_of(trimCharsAll);
154 if (
start == string::npos) {
161 while (!text.empty() && trimStartOnly.find(text.front()) != string::npos) {
165 while (!text.empty() && trimEndOnly.find(text.back()) != string::npos) {
170 void hCopyrightScanner::RemoveNoisePatterns(
string& text)
const{
171 const vector<string> patterns = {
172 "<p>",
"<a href",
"date-of-software",
"date-of-document",
173 " $ ",
" ? ",
"</a>",
"( )",
"()"
176 for (
const auto& word : patterns) {
178 while ((pos = text.find(word)) != string::npos) {
179 text.replace(pos, word.length(),
" ");
184 void hCopyrightScanner::NormalizeCopyright(
string& text)
const {
185 const vector<pair<string, string>> replacements = {
186 {
"SPDX-FileCopyrightText",
"Copyright"},
187 {
"AssemblyCopyright",
"Copyright"},
188 {
"AppCopyright",
"Copyright"},
189 {
"JCOPYRIGHT",
"Copyright"},
190 {
"COPYRIGHT Copyright",
"Copyright"},
191 {
"Copyright Copyright",
"Copyright"},
192 {
"Copyright copyright",
"Copyright"},
193 {
"copyright copyright",
"Copyright"},
194 {
"copyright Copyright",
"Copyright"},
195 {
"copyright\"Copyright",
"Copyright"},
196 {
"copyright\" Copyright",
"Copyright"}
199 for (
const auto& pair : replacements) {
200 const string& from = pair.first;
201 const string& to = pair.second;
204 while ((pos = text.find(from)) != string::npos) {
205 text.replace(pos, from.length(), to);
210 void hCopyrightScanner::StripSuffixes(
string& text)
const{
211 const vector<string> suffixes = {
212 "copyright",
",",
"year",
"parts",
"0",
"1",
"author",
"all",
"some",
"and"
215 for (
const auto& suffix : suffixes) {
216 if (text.length() > suffix.length() + 1 &&
217 text.size() >= suffix.size() &&
218 text.compare(text.size() - suffix.size(), suffix.size(), suffix) == 0)
220 text.erase(text.size() - suffix.size());
Provide regex using conf file.
void maybeLoad(const std::string &identity)
Check if identity already loaded in RegexMap, if not load them.
const char * getRegexValue(const std::string &name, const std::string &key)
Get the regex as string from the RegexMap.
void ScanString(const string &s, list< match > &results) const
Scan a given string for copyright statements.
rx::regex regSpdxCopyright
hCopyrightScanner()
Constructor for default hCopyrightScanner.
rx::regex regSimpleCopyright
int s
The socket that the CLI will use to communicate.
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
list_t type structure used to keep various lists. (e.g. there are multiple lists).
Store the results of a regex match.