Hoppa till innehållet

Wikipedia:Projekt DotNetWikiBot Framework/GameOnBot/Citeweb

Från Wikipedia
//Originally made by sv:Användare:GameOn in 2011
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Xml;
using DotNetWikiBot;
using System.Threading;
using System.Net;
using System.Web;

class MyBot : Bot
{
	private static string getURLPost(string uri, string parameters)
	{
		WebRequest webRequest   = WebRequest.Create (uri);
		byte[]     bytes        = Encoding.ASCII.GetBytes (parameters);
		Stream     outputStream = null;

		webRequest.Method      = "POST";
		webRequest.ContentType = "application/x-www-form-urlencoded";

		try
		{
			webRequest.ContentLength = bytes.Length;
			outputStream = webRequest.GetRequestStream();
			outputStream.Write(bytes, 0, bytes.Length);
		}
		catch (WebException ex)
		{
			Console.WriteLine(ex.Message +  "Error with request!");
		}
		finally
		{
			if (outputStream != null)
				outputStream.Close();
		}

		try
		{
			WebResponse webResponse = webRequest.GetResponse();
			if (webResponse == null) 
				return null;
			StreamReader myStreamReader = new StreamReader(webResponse.GetResponseStream());
			return myStreamReader.ReadToEnd().Trim();
		}
		catch (WebException ex)
		{
			Console.WriteLine(ex.Message + "Error with response!");
		}
		return null;
	}

	private static bool isUrlWorking(string url)
	{
		Uri         urlToCheck     = new Uri(url);
		WebRequest  request        = WebRequest.Create(urlToCheck);
		WebResponse response;

		request.Timeout            = 10000;

		try
		{
			response = request.GetResponse();
		}
		catch (Exception)
		{
			return false;
		}            

		String responseUrl = response.ResponseUri.ToString();

		if (String.Compare(responseUrl, urlToCheck.ToString(), true) != 0)
			return !(responseUrl.IndexOf("404.php") > -1 || responseUrl.IndexOf("404.htm") > -1 || responseUrl.IndexOf("500.php") > -1 || responseUrl.IndexOf("500.htm") > -1);
		else
			return true;
	}

	
	public static void Main()
	{
		Site     site                = new Site("http://sv.wikipedia.org", user, password);
		String   emailStr            = email;
		String   todaysDateStr       = "26 maj 2011";
		String   deadlinkTemplateStr = "{{död länk|datum=2011-05}}";
		String   editCommentStr      = "Lägger in arkiveringsurl i webbref alt. dödmarkerar länk";
		String   archiveUrlStr       = "arkivurl";
		String   archiveDateStr      = "arkivdatum";
		String   categoryStr         = "Artiklar med Webbref som saknar arkiveringsurl";
		String   tmpStr;
		String   url;
		String   orgStr;
		String   newStr;
		bool     changed;
		int      stopPos;
		PageList pl                  = new PageList(site);
		
		long     antal               = 18;

		//Find articles among the recentchanges
//		pl.FillFromRecentChanges(false, false, false, false, true, 1000, 7);
		//Find articles from a category
		pl.FillFromCategory(categoryStr);
		//Find articles from all the links to a template, mostly useful on very small wikis
//		pl.FillFromLinksToPage("Mall:Webbref");
		//Remove namespaces, changes required (or uncomment) for some sisterprojects, like wikisource
		pl.FilterNamespaces(new int[] {0});
		foreach(Page myPage in pl)
		{
			changed = false;
			myPage.Load();
			tmpStr  = myPage.text;
			String[] tmpStrArray = myPage.GetTemplatesWithParams();
			foreach(string tmpStr2 in tmpStrArray)
			{
				//handle several types, webref common on svnews, webbref on other sv-projects
				//Todo: Handle uppercase as well
				if(tmpStr2.StartsWith("webbref") || tmpStr2.StartsWith("webref") || tmpStr2.StartsWith("citeweb") || tmpStr2.StartsWith("cite web"))
				{
					//If we already have an archive added skip to next template
					if(tmpStr2.IndexOf(archiveUrlStr) > 0)
						continue;
					changed = true;
					orgStr  = tmpStr2;
					url     = tmpStr2.Substring(tmpStr2.IndexOf("|url=") + 5);
					stopPos = url.IndexOf("|");
					//Cut off the rest, unless it's the last argument
					if(stopPos > 0)
						url = url.Substring(0, stopPos);
					Console.WriteLine(url);
					//Fix some specialcases regarding URLs
					url = HttpUtility.UrlDecode(url);
					url = url.Trim();
					Console.WriteLine(url);
					//verify that the page isn't dead first, if it is mark it as such
					if(isUrlWorking(url))
					{
						//Todo: Improve this later on so it's more robust for changes on webcitation.org
						tmpStr      = getURLPost("http://www.webcitation.org/archive.php", "url=" + url + "&email=" + emailStr);
						tmpStr      = tmpStr.Substring(tmpStr.IndexOf("An archive of this page should shortly be available at </p><br /><p>") + 83, 29);
						newStr      = orgStr.Replace("|url=" + url, "|url=" + url + "|" + archiveUrlStr + "=http://" + tmpStr + "|" + archiveDateStr + "=" + todaysDateStr);
						myPage.text = myPage.text.Replace(orgStr, newStr);
						antal++;
					}
					else
					{
						//Add {{dead link}}
						Console.WriteLine("Dead link found!");
						newStr      = orgStr + deadlinkTemplateStr;
						myPage.text = myPage.text.Replace(orgStr, newStr);
					}
				}
			}
			if(changed)
			{
				myPage.Save(myPage.text, editCommentStr, true);
				Console.WriteLine("Nytt antal = " + antal.ToString());						
			}
		}
	}
}