Problem with unencoded pound signs in WebRequest

Asked By Rob Reid
18-Nov-09 04:45 PM
Earn up to 10 extra points for answering this tough question.
I am having trouble with a C# proxy page I am writing which allows me to do cross domain AJAX calls with Javascript.
The problem is with certain pages that contain pound signs £ that are not HTML encoded in the source that I am trying
to extract with the WebRequest and WebResponse objects.
The page is using a charset of iso-8859-1 which I think is the problem as my object is using UTF-8. I have created
two test pages one using UTF-8 the other iso-8859-1

http://www.strictly-software.com/test_pound_iso.htm
http://www.strictly-software.com/test_pound_utf8.htm

I can extract the unencoded pound signs successfully from the UTF-8 page but not the ISO page. Obviously I have no control
over the source content that I want to extract so is there a way of getting the pound signs back instead of ? or squares?

I have tried numerous methods e.g byte arrays, memory streams, changing the encoding object to ASCII, supplying content-types
and charsets etc but nothing seems to work.

The code is below

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;

namespace HattrickHeaven
{
    public class HTTPRequest
    {

        private int _status = 0;
        private string _statusDesc = "";
        private string _responseContent = "";
        private string _errorType = "";
        private bool _retry = false;

        // only here for testing
        private bool _debug = true;
        private string _debugFile = @"d:\inetpub\wwwroot\hattrickheaven.com\LogFiles\ripperDebug.txt";

        private void ShowDebug(string msg)
        {
            if(!_debug) return;

            if(!String.IsNullOrEmpty(msg))
            {
                msg += " ";
                System.IO.File.AppendAllText(_debugFile,msg,Encoding.UTF8);                   
              //  System.IO.File.AppendAllText(_debugFile, msg, Encoding.ASCII);                   
            }
        }

        public HTTPRequest(string URL, WebProxy proxy, string robotAgent)
        {
                        
            HttpWebRequest client = (HttpWebRequest)WebRequest.Create(URL);
            client.Method = "GET";
            client.ContentLength = 0;
            client.ContentType = "application/x-www-form-urlencoded;charset=charset=UTF-8";
            
            if (proxy != null)
            {
                client.Proxy = proxy;
            }

            client.Timeout = 40000;
        client.UserAgent = robotAgent;

            try
            {
                // get the response
                HttpWebResponse response = (HttpWebResponse)client.GetResponse();
                
                _statusDesc = response.StatusDescription;
                _status = Convert.ToInt32(response.StatusCode);                         
               
               StreamReader ResponseStream = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
                              
               _responseContent = ResponseStream.ReadToEnd();
               
                ResponseStream.Close();
                response.Close();

            }            
            catch (Exception err)
            {
                _errorType = err.Message.ToString();

            }
            
        }

        public int StatusCode
        {
            get { return _status; }
        }
        public string StatusDesc
        {
            get { return _statusDesc; }
        }
        public string Response
        {
            get { return _responseContent; }
        }
        public string ErrorType
        {
            get { return _errorType; }
        }
        public bool Retry
        {
            get { return _retry; }
        }
    }
}

Thanks for any help
Create New Account