15 #include "cleanEntries.hpp"
18 using std::stringstream;
19 using std::ostream_iterator;
29 string cleanGeneral(string::const_iterator sBegin, string::const_iterator sEnd)
32 rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex(
"[[:space:]\\x0-\\x1f]{2,}"),
" ");
34 string::size_type len =
s.length();
38 char cEnd =
s[len - 1];
39 if (cBegin ==
' ' && cEnd ==
' ')
40 return s.substr(1, len - 2);
41 else if (cBegin ==
' ')
44 return s.substr(0, len - 1);
47 return s ==
" " ?
"" :
s;
56 string cleanSpdxStatement(string::const_iterator sBegin, string::const_iterator sEnd)
59 rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex(
"spdx-filecopyrighttext:", rx::regex_constants::icase),
" ");
71 string cleanStatement(string::const_iterator sBegin, string::const_iterator sEnd)
74 rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex(
"\n[[:space:][:punct:]]*"),
" ");
76 return cleanSpdxStatement(
s.begin(),
s.end());
88 string cleanNonPrint(string::const_iterator sBegin, string::const_iterator sEnd)
90 string s(sBegin, sEnd);
91 const unsigned char *in =
reinterpret_cast<const unsigned char*
>(
s.c_str());
94 icu::UnicodeString out;
95 for (
int i = 0; i < len;)
99 U8_NEXT(in, i, len, uniChar);
107 U16_NEXT(in, i, len, uniChar);
108 if (U_IS_UNICODE_CHAR(uniChar) && uniChar > 0)
117 out.toUTF8String(ret);
129 string cleanMatch(
const string& sText,
const match& m)
131 string::const_iterator it = sText.begin();
134 string utfCompatibleText;
136 unicodeStr.toUTF8String(utfCompatibleText);
138 if (m.type ==
"statement")
139 return cleanStatement(utfCompatibleText.begin(), utfCompatibleText.end());
141 return cleanGeneral(utfCompatibleText.begin(), utfCompatibleText.end());
string cleanGeneral(string::const_iterator sBegin, string::const_iterator sEnd)
Trim space at beginning and end.
int s
The socket that the CLI will use to communicate.
icu::UnicodeString recodeToUnicode(const std::string &input)
Store the results of a regex match.