example_tc_encoder.cpp 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. /**
  2. * Tencent is pleased to support the open source community by making Tars available.
  3. *
  4. * Copyright (C) 2016 THL A29 Limited, a Tencent company. All rights reserved.
  5. *
  6. * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  7. * in compliance with the License. You may obtain a copy of the License at
  8. *
  9. * https://opensource.org/licenses/BSD-3-Clause
  10. *
  11. * Unless required by applicable law or agreed to in writing, software distributed
  12. * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  13. * CONDITIONS OF ANY KIND, either express or implied. See the License for the
  14. * specific language governing permissions and limitations under the License.
  15. */
  16. #include "util/tc_encoder.h"
  17. #include "util/tc_file.h"
  18. #include "util/tc_http.h"
  19. #include <errno.h>
  20. #include <iostream>
  21. using namespace tars;
  22. bool isUTF8(const string &buffer)
  23. {
  24. if(buffer.length() <= strlen("charset=utf"))
  25. {
  26. return false;
  27. }
  28. size_t len = strlen("charset");
  29. for(size_t i = 0; i < buffer.length() - len; i++)
  30. {
  31. if(strncasecmp(buffer.c_str() + i, "charset", len) != 0)
  32. continue;
  33. if(i > 0 && buffer[i-1] != ' '
  34. && buffer[i-1] != '\t'
  35. && buffer[i-1] != ';'
  36. && buffer[i-1] != '\"'
  37. && buffer[i-1] != '\'')
  38. continue;
  39. int flag = 0;
  40. for(size_t j = i + len; j < buffer.length(); j++)
  41. {
  42. if(flag == 0)
  43. {
  44. if(buffer[j] == ' ' || buffer[j] == '\t')
  45. continue;
  46. if(buffer[j] == '=')
  47. {
  48. flag = 1;
  49. continue;
  50. }
  51. break;
  52. }
  53. else if(flag == 1)
  54. {
  55. if(buffer[j] == ' ' || buffer[j] == '\t')
  56. continue;
  57. string tmp = TC_Common::trimleft(buffer.c_str() + j, "\" \t'");
  58. if(strncmp(tmp.c_str(), "utf", 3) == 0)
  59. return true;
  60. return false;
  61. }
  62. else
  63. {
  64. break;
  65. }
  66. }
  67. }
  68. return false;
  69. }
  70. int main(int argc, char *argv[])
  71. {
  72. try
  73. {
  74. /*
  75. string s = "我们";
  76. // string v = gbk2utf8(s);
  77. // cout << s << ":" << v << TC_Encoder::utf82gbk(v) << endl;
  78. string n = TC_File::load2str("./content.html");
  79. cout << n << endl;
  80. */
  81. string s = "<meta content=\"text/html\" charset=\"utf-8\" http-equiv=\"content-type\"/>";
  82. cout << isUTF8(s) << endl;
  83. /*
  84. TC_HttpRequest stHttpReq;
  85. stHttpReq.setCacheControl("no-cache");
  86. stHttpReq.setUserAgent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; InfoPath.1; .NET CLR 1.1.4322)");
  87. stHttpReq.setGetRequest("http://cn.engadget.com/");
  88. string sSendBuffer = stHttpReq.encode();
  89. TC_HttpResponse stHttpRsp;
  90. int iRet = stHttpReq.doRequest(stHttpRsp, 60000);
  91. if(iRet != 0)
  92. {
  93. return 0;
  94. }
  95. string sRspHeader = stHttpRsp.encode();
  96. sRspHeader = sRspHeader.substr(0, sRspHeader.find("\r\n\r\n"));
  97. cout << "[preview] http response:" << sRspHeader << endl;
  98. string content = stHttpRsp.getContent();
  99. string contentType = TC_Common::lower(stHttpRsp.getContentType());
  100. cout << "[preview] contentType:" << contentType << endl;
  101. string tmp = TC_Common::lower(content).substr(0, 512);
  102. cout << "[preview] tmp:" << tmp << endl;
  103. if(contentType.find("utf8") == string::npos
  104. && contentType.find("utf-8") == string::npos
  105. && !isUTF8(tmp))
  106. {
  107. cout << "[preview] gbk to utf8" << endl;
  108. // content = gbk2utf8(content);
  109. }
  110. cout << isUTF8(tmp) << endl;
  111. */
  112. }
  113. catch(exception &ex)
  114. {
  115. cout << ex.what() << endl;
  116. }
  117. return 0;
  118. }