I am having trouble with a C# proxy page I am writing which allows me to do cross domain AJAX calls with Javascript.
The problem is with certain pages that contain pound signs that are not HTML encoded in the source that I am trying
to extract with the WebRequest and WebResponse objects.
The page is using a charset of iso-8859-1 which I think is the problem as my object is using UTF-8. I have created
two test pages one using UTF-8 the other iso-8859-1
http://www.strictly-software.com/test_pound_iso. htm
http://www.strictly-software.com/test_pound_utf8 .htm
I can extract the unencoded pound signs successfully from the UTF-8 page but not the ISO page. Obviously I have no control
over the source content that I want to extract so is there a way of getting the pound signs back instead of ? or squares?
I have tried numerous methods e.g byte arrays, memory streams, changing the encoding object to ASCII, supplying content-types
and charsets etc but nothing seems to work.
The code is below
using System;
using System.Collecti ons.Generic;
using System.Text;
using System.Net;
using System.IO;
namespace HattrickHeaven
{
public class HTTPRequest
{
private int _status = 0;
private string _statusDesc = "";
private string _responseConten t = "";
private string _errorType = "";
private bool _retry = false;
// only here for testing
private bool _debug = true;
private string _debugFile = @"d:\inetpub\ww wroot\hattrickh eaven.com\LogFi les\ripperDebug .txt";
private void ShowDebug(strin g msg)
{
if(!_debug) return;
if(!String.IsNu llOrEmpty(msg))
{
msg += " ";
System.IO.File. AppendAllText(_ debugFile,msg,E ncoding.UTF8);
// System.IO.File. AppendAllText(_ debugFile, msg, Encoding.ASCII) ;
}
}
public HTTPRequest(str ing URL, WebProxy proxy, string robotAgent)
{
HttpWebRequest client = (HttpWebRequest )WebRequest.Cre ate(URL);
client.Method = "GET";
client.ContentL ength = 0;
client.ContentT ype = "applicatio n/x-www-form-urlencoded;char set=charset=UTF-8";
if (proxy != null)
{
client.Proxy = proxy;
}
client.Timeout = 40000;
client.UserAgen t = robotAgent;
try
{
// get the response
HttpWebResponse response = (HttpWebRespons e)client.GetRes ponse();
_statusDesc = response.Status Description;
_status = Convert.ToInt32 (response.Statu sCode);
StreamReader ResponseStream = new StreamReader(re sponse.GetRespo nseStream(), Encoding.UTF8);
_responseConten t = ResponseStream. ReadToEnd();
ResponseStream. Close();
response.Close( );
}
catch (Exception err)
{
_errorType = err.Message.ToS tring();
}
}
public int StatusCode
{
get { return _status; }
}
public string StatusDesc
{
get { return _statusDesc; }
}
public string Response
{
get { return _responseConten t; }
}
public string ErrorType
{
get { return _errorType; }
}
public bool Retry
{
get { return _retry; }
}
}
}
Thanks for any help