FOSSology  4.4.0
Open Source License Compliance by Open Source Software
encoding.c
1 /*
2  Author: Daniele Fognini
3  SPDX-FileCopyrightText: © 2015 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
8 #include "encoding.h"
9 
10 #ifdef HAVE_CHARDET
11 #include <uchardet.h>
12 #else
13 #include <magic.h>
14 #endif
15 
16 #include <string.h>
17 #include <stdio.h>
18 
19 iconv_t guessConverter(const char* buffer, size_t len)
20 {
21  char* const target = "utf-8";
22 
23  iconv_t iconvCookie = NULL;
24 
25  gchar* encoding = guessEncoding(buffer, len);
26  if (encoding && (strcmp(encoding, target) != 0))
27  {
28  iconvCookie = iconv_open(target, encoding);
29  g_free(encoding);
30  }
31 
32  return iconvCookie;
33 }
34 
35 gchar* guessEncoding(const char* buffer, size_t len) {
36  gchar* result = NULL;
37 #ifdef HAVE_CHARDET
38  uchardet_t cd = uchardet_new();
39  if (!uchardet_handle_data(cd, buffer, len)) {
40  uchardet_data_end(cd);
41 
42  const char* chardet = uchardet_get_charset(cd);
43 
44  if (chardet && strcmp(chardet, "")!=0) {
45  result = g_strdup(chardet);
46  }
47  }
48 
49  uchardet_delete(cd);
50 #else
51  magic_t cookie = magic_open(MAGIC_MIME);
52  magic_load(cookie, NULL);
53 
54  const char* resp = magic_buffer(cookie, buffer, len);
55 
56  if (!resp)
57  {
58  printf("magic error: %s\n", magic_error(cookie));
59  goto done;
60  }
61 
62  char* charset = strstr(resp, "charset=");
63 
64  if (!charset)
65  {
66  goto done;
67  }
68 
69  charset += 8; // len of "charset="
70 
71  result = g_strdup(charset);
72 
73 done:
74  magic_close(cookie);
75 #endif
76  return result;
77 }
char buffer[2048]
The last thing received from the scheduler.