```
void test_pageGetUTF8(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<std::string>
{
std::string rv;
auto status = response.status_code();
std::cout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len << std::endl;
std::string data = response.extract_utf8string(false).get();
if (data.size() == len){
rv = "OK";//data;
}
else{
rv = "Failed to retrieve the whole body";
}
}
else{
rv = "error ";
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<std::string> data){
std::cout << data.get() << std::endl;
}).wait();
}
void test_pageGetUTF16(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<utility::string_t>
{
utility::string_t rv;
auto status = response.status_code();
std::wcout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len <<std::endl;
utility::string_t data = response.extract_utf16string(false).get();
if (data.size() == len){
rv = U("OK");//data;
}
else{
rv = U("Failed to retrieve the whole body");
}
}
else{
rv = U("error ");
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<utility::string_t> data){
std::wcout << data.get() << std::endl <<std::endl;
}).wait();
}
int _tmain(int argc, _TCHAR* argv[])
{
test_pageGetUTF8(U("http://www.codeproject.com"));
test_pageGetUTF16(U("http://www.codeproject.com"));
test_pageGetUTF8(U("https://duckduckgo.com/"));
test_pageGetUTF16(U("https://duckduckgo.com/"));
return 0;
}
```
It works fine when tested with the second URL (both utf8 and utf16) but when tested with codeproject.com, the utf16 version fails to retrieve the whole body (usually around 20 chars shorter)
Comments: Hi mihai_qwi, The Content-Length header indicates the number of raw bytes in the response body. The message body is coming across the wire as UTF-8, as indicated in the Content-Type header. The std::string contains the UTF-8 string and calling size() on it returns the number of bytes used to store the string, so of course these two values will be equal. However in the second case you are asking us for the response body as a UTF-16 string so a conversion has to be performed. When converting from UTF-8 to UTF016 ASCII characters go from taking one byte in UTF-8 to two bytes in UTF-16, however not all characters work out this way. This means the number of 2 byte characters in the std::wstring don't necessarily have to match the Content-Length header. I bet the codeproject page is returning some characters that take different space when represented in UTF-16. I modified your program to save the results to a file and it looks like all the data is present. I don't believe there is an error here. Regarding the second issue you mention, I'm unable to reproduce this on my machine when I delete the content_ready() calls. Steve
void test_pageGetUTF8(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<std::string>
{
std::string rv;
auto status = response.status_code();
std::cout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len << std::endl;
std::string data = response.extract_utf8string(false).get();
if (data.size() == len){
rv = "OK";//data;
}
else{
rv = "Failed to retrieve the whole body";
}
}
else{
rv = "error ";
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<std::string> data){
std::cout << data.get() << std::endl;
}).wait();
}
void test_pageGetUTF16(std::wstring url){
http_client client(url);
auto query = uri_builder().append_query(L"q", L"test").to_string();
client.request(methods::GET, query)
.then([](http_response response)->pplx::task<utility::string_t>
{
utility::string_t rv;
auto status = response.status_code();
std::wcout << "Status is: " << status << std::endl;
if (status == status_codes::OK)
{
response.content_ready().get();
size_t len = response.headers().content_length();
std::cout << "Length is: " << len <<std::endl;
utility::string_t data = response.extract_utf16string(false).get();
if (data.size() == len){
rv = U("OK");//data;
}
else{
rv = U("Failed to retrieve the whole body");
}
}
else{
rv = U("error ");
rv += status;
}
return pplx::task_from_result(rv);
})
.then([](pplx::task<utility::string_t> data){
std::wcout << data.get() << std::endl <<std::endl;
}).wait();
}
int _tmain(int argc, _TCHAR* argv[])
{
test_pageGetUTF8(U("http://www.codeproject.com"));
test_pageGetUTF16(U("http://www.codeproject.com"));
test_pageGetUTF8(U("https://duckduckgo.com/"));
test_pageGetUTF16(U("https://duckduckgo.com/"));
return 0;
}
```
It works fine when tested with the second URL (both utf8 and utf16) but when tested with codeproject.com, the utf16 version fails to retrieve the whole body (usually around 20 chars shorter)
Comments: Hi mihai_qwi, The Content-Length header indicates the number of raw bytes in the response body. The message body is coming across the wire as UTF-8, as indicated in the Content-Type header. The std::string contains the UTF-8 string and calling size() on it returns the number of bytes used to store the string, so of course these two values will be equal. However in the second case you are asking us for the response body as a UTF-16 string so a conversion has to be performed. When converting from UTF-8 to UTF016 ASCII characters go from taking one byte in UTF-8 to two bytes in UTF-16, however not all characters work out this way. This means the number of 2 byte characters in the std::wstring don't necessarily have to match the Content-Length header. I bet the codeproject page is returning some characters that take different space when represented in UTF-16. I modified your program to save the results to a file and it looks like all the data is present. I don't believe there is an error here. Regarding the second issue you mention, I'm unable to reproduce this on my machine when I delete the content_ready() calls. Steve