I am having trouble with a C# proxy page I am writing which allows me to do cross domain AJAX calls with Javascript.
The problem is with certain pages that contain pound signs £ that are not HTML encoded in the source that I am trying
to extract with the WebRequest and WebResponse objects.
The page is using a charset of iso-8859-1 which I think is the problem as my object is using UTF-8. I have created
two test pages one using UTF-8 the other iso-8859-1
http://www.strictly-software.com/test_pound_iso.htm
http://www.strictly-software.com/test_pound_utf8.htm
I can extract the unencoded pound signs successfully from the UTF-8 page but not the ISO page. Obviously I have no control
over the source content that I want to extract so is there a way of getting the pound signs back instead of ? or squares?
I have tried numerous methods e.g byte arrays, memory streams, changing the encoding object to ASCII, supplying content-types
and charsets etc but nothing seems to work.
The code is below
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
namespace HattrickHeaven
{
public class HTTPRequest
{
private int _status = 0;
private string _statusDesc = "";
private string _responseContent = "";
private string _errorType = "";
private bool _retry = false;
// only here for testing
private bool _debug = true;
private string _debugFile = @"d:\inetpub\wwwroot\hattrickheaven.com\LogFiles\r ipperDebug.txt";
private void ShowDebug(string msg)
{
if(!_debug) return;
if(!String.IsNullOrEmpty(msg))
{
msg += " ";
System.IO.File.AppendAllText(_debugFile,msg,Encodi ng.UTF8);
// System.IO.File.AppendAllText(_debugFile, msg, Encoding.ASCII);
}
}
public HTTPRequest(string URL, WebProxy proxy, string robotAgent)
{
HttpWebRequest client = (HttpWebRequest)WebRequest.Create(URL);
client.Method = "GET";
client.ContentLength = 0;
client.ContentType = "application/x-www-form-urlencoded;charset=charset=UTF-8";
if (proxy != null)
{
client.Proxy = proxy;
}
client.Timeout = 40000;
client.UserAgent = robotAgent;
try
{
// get the response
HttpWebResponse response = (HttpWebResponse)client.GetResponse();
_statusDesc = response.StatusDescription;
_status = Convert.ToInt32(response.StatusCode);
StreamReader ResponseStream = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
_responseContent = ResponseStream.ReadToEnd();
ResponseStream.Close();
response.Close();
}
catch (Exception err)
{
_errorType = err.Message.ToString();
}
}
public int StatusCode
{
get { return _status; }
}
public string StatusDesc
{
get { return _statusDesc; }
}
public string Response
{
get { return _responseContent; }
}
public string ErrorType
{
get { return _errorType; }
}
public bool Retry
{
get { return _retry; }
}
}
}
Thanks for any help