February 15, 2011

Method to Retrieve a Web Page as A String

Here is some sample code for this:
/// <summary>
/// Synchronously reads the contents of a url and returns it as a string
/// </summary>
/// <param name="url">Url to read</param>
/// <param name="requestOK">Whether the request succeeded of failed</param>
/// <param name="msg">Error message when something went wrong</param>
/// <returns>url contents as a string</returns>
public string TryGetUrlContents(string url, out bool requestOK, out string msg)
{
    string webPage = string.Empty;
    WebRequest webRequest = null;
    requestOK = true;
    msg = "No response to the web request";
    try
    {
        webRequest = WebRequest.Create(url);
        //webRequest.Method = "GET"; // Not necessary, it is GET by default                
        requestOK = (webRequest != null);
    
        if (requestOK)
        {
            using (WebResponse response = webRequest.GetResponse())
            {
                // tries to load as a UTF-8 document by default
                // true parameter here tells it to try and work out 
                // the document format if it is not UTF-8
                using (StreamReader responseStream = new 
                       StreamReader(response.GetResponseStream(), true))
                {
                    webPage = responseStream.ReadToEnd();
                    msg = "";
                }
            }
        }
    }
    catch (WebException we)
    {
        HandleException(we);
        msg = "Exception caught tring to make the request:" + we.ToString();
        requestOK = false;
    }

    return webPage;
}

void HandleException(Exception ex)
{
    Debug.WriteLine("Exception caught: " + ex.ToString());   
}
It has been changed to attempt to encode the returned contents correctly. If you do not care whether the retrieval worked correctly or not then use this:
public string TryGetUrlContents(string url)
{
    bool requestOK;
    string msg;
    return TryGetUrlContents(url, out requestOK, out msg);
}

No comments: