jeudi 26 février 2015

Why when downloading html content and saving it as html file on my hard disk the file dosent contain all the content?

I did a simple project for testing using once with httpwebrequest and once with webclient in both cases the html file on my hard disk is 77KB size. While the file size should be abour 363KB.


In the website in chrome when i'm doing right click save as and save it as html file the file size is 363KB.


But in my program the file size is only 77KB.



using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.IO;
using System.Net;

namespace WindowsFormsApplication2
{
public partial class Form1 : Form
{
string appDir = Path.GetDirectoryName(@"C:\Users\chocolade1972\test");
string htmlsTargetDirectory = "Tapuz Htmls";
string imagesTargetDirectory = "Tapuz Images";
string combinedHtmlsDir;
string combinedImagesDir;
String targetHtmls;

public Form1()
{
InitializeComponent();

/*combinedImagesDir = Path.Combine(appDir, imagesTargetDirectory);
if (!Directory.Exists(combinedImagesDir))
{
Directory.CreateDirectory(combinedImagesDir);
}
combinedHtmlsDir = Path.Combine(appDir, htmlsTargetDirectory);
if (!Directory.Exists(combinedHtmlsDir))
{
Directory.CreateDirectory(combinedHtmlsDir);
}

for (int i = 1; i < 49; i++)
{
WebClient wc = new WebClient();
targetHtmls = (combinedHtmlsDir + "\\Html" + i + ".html");
wc.DownloadFile("http://ift.tt/1whXSS6" + i,
targetHtmls);
wc.Dispose();
}*/
getsource();
}

private void getsource()
{
/*combinedHtmlsDir = Path.Combine(appDir, htmlsTargetDirectory);
using (var client = new CookiesAwareWebClient())
{
for (int i = 1; i < 49; i++)
{
targetHtmls = (combinedHtmlsDir + "\\Html" + i + ".html");
client.DownloadFile("http://ift.tt/1whXSS6" + i,
targetHtmls);
}
}*/

string urlAddress = "http://ift.tt/1ByXoy3";

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(urlAddress);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();

if (response.StatusCode == HttpStatusCode.OK)
{
Stream receiveStream = response.GetResponseStream();
StreamReader readStream = null;

if (response.CharacterSet == null)
{
readStream = new StreamReader(receiveStream);
}
else
{
readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
}

string data = readStream.ReadToEnd();

response.Close();
readStream.Close();
combinedHtmlsDir = Path.Combine(appDir, htmlsTargetDirectory);
File.WriteAllText(combinedHtmlsDir + "\\Html1.html", data);
}
}




public class CookiesAwareWebClient : WebClient
{
public CookieContainer CookieContainer { get; private set; }

public CookiesAwareWebClient()
{
CookieContainer = new CookieContainer();
}

protected override WebRequest GetWebRequest(Uri address)
{
var request = base.GetWebRequest(address);
((HttpWebRequest)request).CookieContainer = CookieContainer;
return request;
}
}


private void Form1_Load(object sender, EventArgs e)
{

}
}
}


The code is bit long since i have here all the tests i did httpwebrequest webclient and also with coockies container. And all results give a file name size 77KB.


In this case i'm now the httpwebrequest. And the url is: http://ift.tt/1ByXoy3


Url


I can't figure out why when doing right click and save as or view source i see the whole source of the html but when saving to the hard disk it's only 77KB. It seems like when i'm saving it to the hard disk it's show only part or different source content of the html.


I also posted before when tried to use webbrowser and there for some reason sometimes it's saving the html file code complete 363KB and sometimes only 77KB or 90KB.


What could be the problem ? Is there any way to solve it ?


Aucun commentaire:

Enregistrer un commentaire