```
void test_pageGetUTF8(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<std::string>
{
std::string rv;
auto status = response.status_code();
std::cout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len << std::endl;
std::string data = response.extract_utf8string(false).get();
if (data.size() == len){
rv = "OK";//data;
}
else{
rv = "Failed to retrieve the whole body";
}
}
else{
rv = "error ";
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<std::string> data){
std::cout << data.get() << std::endl;
}).wait();
}
void test_pageGetUTF16(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<utility::string_t>
{
utility::string_t rv;
auto status = response.status_code();
std::wcout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len <<std::endl;
utility::string_t data = response.extract_utf16string(false).get();
if (data.size() == len){
rv = U("OK");//data;
}
else{
rv = U("Failed to retrieve the whole body");
}
}
else{
rv = U("error ");
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<utility::string_t> data){
std::wcout << data.get() << std::endl <<std::endl;
}).wait();
}
int _tmain(int argc, _TCHAR* argv[])
{
test_pageGetUTF8(U("http://www.codeproject.com"));
test_pageGetUTF16(U("http://www.codeproject.com"));
test_pageGetUTF8(U("https://duckduckgo.com/"));
test_pageGetUTF16(U("https://duckduckgo.com/"));
return 0;
}
```
It works fine when tested with the second URL (both utf8 and utf16) but when tested with codeproject.com, the utf16 version fails to retrieve the whole body (usually around 20 chars shorter)
Comments: Hi Steve, Thanks for your feedback - I understand how different encodings work and how byte lengths for strings with various encodings won't match but the number of "code units" (I hope it's the right term - or call them letters) should be the same regardless of encoding: ``` std::string data = response.extract_utf8string(false).get(); utility::string_t data16 = response.extract_utf16string(false).get(); ``` If I'm not mistaken, data.size() should match data16.size(). Regardless of the above, the last step in test_pageGetUTF16 is to output the content to the console; for codeproject.com, the output is truncated (as in it ends in the middle of some piece of content and doesn't include the closing < / html> tag.
void test_pageGetUTF8(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<std::string>
{
std::string rv;
auto status = response.status_code();
std::cout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len << std::endl;
std::string data = response.extract_utf8string(false).get();
if (data.size() == len){
rv = "OK";//data;
}
else{
rv = "Failed to retrieve the whole body";
}
}
else{
rv = "error ";
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<std::string> data){
std::cout << data.get() << std::endl;
}).wait();
}
void test_pageGetUTF16(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<utility::string_t>
{
utility::string_t rv;
auto status = response.status_code();
std::wcout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len <<std::endl;
utility::string_t data = response.extract_utf16string(false).get();
if (data.size() == len){
rv = U("OK");//data;
}
else{
rv = U("Failed to retrieve the whole body");
}
}
else{
rv = U("error ");
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<utility::string_t> data){
std::wcout << data.get() << std::endl <<std::endl;
}).wait();
}
int _tmain(int argc, _TCHAR* argv[])
{
test_pageGetUTF8(U("http://www.codeproject.com"));
test_pageGetUTF16(U("http://www.codeproject.com"));
test_pageGetUTF8(U("https://duckduckgo.com/"));
test_pageGetUTF16(U("https://duckduckgo.com/"));
return 0;
}
```
It works fine when tested with the second URL (both utf8 and utf16) but when tested with codeproject.com, the utf16 version fails to retrieve the whole body (usually around 20 chars shorter)
Comments: Hi Steve, Thanks for your feedback - I understand how different encodings work and how byte lengths for strings with various encodings won't match but the number of "code units" (I hope it's the right term - or call them letters) should be the same regardless of encoding: ``` std::string data = response.extract_utf8string(false).get(); utility::string_t data16 = response.extract_utf16string(false).get(); ``` If I'm not mistaken, data.size() should match data16.size(). Regardless of the above, the last step in test_pageGetUTF16 is to output the content to the console; for codeproject.com, the output is truncated (as in it ends in the middle of some piece of content and doesn't include the closing < / html> tag.