FOSSology  4.4.0
Open Source License Compliance by Open Source Software
FossologyUnicodeClean.cc
1 /*
2  SPDX-FileCopyrightText: © 2019 Siemens AG
3  Author: Gaurav Mishra <mishra.gaurav@siemens.com>
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
9 
10 using namespace std;
11 
16 {
17  this->flush();
18  if (this->destinationFile.is_open())
19  {
20  this->destinationFile.close();
21  }
22  if (this->sourceFile.is_open())
23  {
24  this->sourceFile.close();
25  }
26 }
27 
35  string &destination) : sourceFile(NULL), destinationFile(NULL),
36  bufferSize (0), stopRead(false)
37 {
38  if ((!source.empty() && !destination.empty()) && (source == destination))
39  {
40  cerr << "Input and Output files can not be same.\n";
41  cerr << "Input: " << source << "\nOutput: " << destination;
42  cerr << " passed" << endl;
43  exit(-3);
44  }
45  if (!source.empty())
46  {
47  sourceFile.open(source, ios::in | ios::binary);
48  if (sourceFile.fail())
49  {
50  cerr << "Unable to open " << source << endl;
51  cerr << "Error: " << strerror(errno) << endl;
52  exit(-1);
53  }
54  }
55  if (!destination.empty())
56  {
57  destinationFile.open(destination, ios::out | ios::binary | ios::trunc);
58  if (destinationFile.fail())
59  {
60  cerr << "Unable to open " << destination << endl;
61  cerr << "Error: " << strerror(errno) << endl;
62  exit(-2);
63  }
64  }
65  this->buffer.reserve(MAX_BUFFER_LEN);
66 }
67 
73 {
74  string input;
75  input = this->dirtyRead();
76  while (!this->stopRead)
77  {
78  icu::UnicodeString output = fo::recodeToUnicode(input);
79  this->write(output);
80  input = this->dirtyRead();
81  }
82  this->flush();
83 }
84 
90 {
91  string input;
92  if (sourceFile.eof() || cin.eof())
93  {
94  this->stopRead = true;
95  return "";
96  }
97  if (sourceFile && sourceFile.is_open())
98  {
99  std::getline(sourceFile, input, '\n');
100  }
101  else
102  {
103  std::getline(cin, input, '\n');
104  }
105  return input;
106 }
107 
115 void FossologyUnicodeClean::write(const icu::UnicodeString &output)
116 {
117  this->buffer.push_back(output);
118  this->bufferSize++;
119  if (this->bufferSize == MAX_BUFFER_LEN)
120  {
121  this->flush();
122  }
123 }
124 
132 {
133  if (destinationFile && destinationFile.is_open())
134  {
135  for (size_t i = 0; i < this->buffer.size(); i++)
136  {
137  string temp;
138  buffer[i].toUTF8String(temp);
139  destinationFile << temp << "\n";
140  }
141  }
142  else
143  {
144  for (size_t i = 0; i < this->buffer.size(); i++)
145  {
146  string temp;
147  buffer[i].toUTF8String(temp);
148  cout << temp << "\n";
149  }
150  }
151  buffer.clear();
152  bufferSize = 0;
153 }
154 
163 bool parseCliOptions(int argc, char **argv, string &input, string &output)
164 {
165  boost::program_options::options_description desc("fo_unicode_clean "
166  ": recognized options");
167  desc.add_options()
168  (
169  "help,h", "shows help"
170  )
171  (
172  "input,i",
173  boost::program_options::value<string>(),
174  "file to read"
175  )
176  (
177  "output,o",
178  boost::program_options::value<string>(),
179  "output file"
180  )
181  ;
182 
183  boost::program_options::variables_map vm;
184 
185  try
186  {
187  boost::program_options::store(
188  boost::program_options::command_line_parser(argc,
189  argv).options(desc).run(), vm);
190 
191  if (vm.count("help") > 0)
192  {
193  cout << desc << endl;
194  cout << "If no input passed, read from STDIN." << endl;
195  cout << "If no output passed, print to STDOUT." << endl;
196  exit(0);
197  }
198 
199  if (vm.count("input"))
200  {
201  input = vm["input"].as<string>();
202  }
203  if (vm.count("output"))
204  {
205  output = vm["output"].as<string>();
206  }
207  return true;
208  }
209  catch (boost::bad_any_cast&)
210  {
211  cout << "wrong parameter type" << endl;
212  cout << desc << endl;
213  return false;
214  }
215  catch (boost::program_options::error&)
216  {
217  cout << "wrong command line arguments" << endl;
218  cout << desc << endl;
219  return false;
220  }
221 }
222 
223 int main(int argc, char **argv)
224 {
225  string input, output;
226  if (parseCliOptions(argc, argv, input, output))
227  {
228  FossologyUnicodeClean obj(input, output);
229  obj.startConvert();
230  return 0;
231  }
232  return -4;
233 }
#define MAX_BUFFER_LEN
FossologyUnicodeClean(std::string &source, std::string &destination)
const std::string dirtyRead()
void flush()
Flush the buffers and reset the internal buffer.
void write(const icu::UnicodeString &output)
Write the string to file/stream.
bool parseCliOptions(int argc, char **argv, CliOptions &dest, std::vector< std::string > &fileNames, std::string &directoryToScan)
Parse the options sent by CLI to CliOptions object.
icu::UnicodeString recodeToUnicode(const std::string &input)
Definition: libfossUtils.cc:32