Wikipedia:Projekt DotNetWikiBot Framework/GameOnBot/Citeweb
Utseende
//Originally made by sv:Användare:GameOn in 2011 using System; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Xml; using DotNetWikiBot; using System.Threading; using System.Net; using System.Web; class MyBot : Bot { private static string getURLPost(string uri, string parameters) { WebRequest webRequest = WebRequest.Create (uri); byte[] bytes = Encoding.ASCII.GetBytes (parameters); Stream outputStream = null; webRequest.Method = "POST"; webRequest.ContentType = "application/x-www-form-urlencoded"; try { webRequest.ContentLength = bytes.Length; outputStream = webRequest.GetRequestStream(); outputStream.Write(bytes, 0, bytes.Length); } catch (WebException ex) { Console.WriteLine(ex.Message + "Error with request!"); } finally { if (outputStream != null) outputStream.Close(); } try { WebResponse webResponse = webRequest.GetResponse(); if (webResponse == null) return null; StreamReader myStreamReader = new StreamReader(webResponse.GetResponseStream()); return myStreamReader.ReadToEnd().Trim(); } catch (WebException ex) { Console.WriteLine(ex.Message + "Error with response!"); } return null; } private static bool isUrlWorking(string url) { Uri urlToCheck = new Uri(url); WebRequest request = WebRequest.Create(urlToCheck); WebResponse response; request.Timeout = 10000; try { response = request.GetResponse(); } catch (Exception) { return false; } String responseUrl = response.ResponseUri.ToString(); if (String.Compare(responseUrl, urlToCheck.ToString(), true) != 0) return !(responseUrl.IndexOf("404.php") > -1 || responseUrl.IndexOf("404.htm") > -1 || responseUrl.IndexOf("500.php") > -1 || responseUrl.IndexOf("500.htm") > -1); else return true; } public static void Main() { Site site = new Site("http://sv.wikipedia.org", user, password); String emailStr = email; String todaysDateStr = "26 maj 2011"; String deadlinkTemplateStr = "{{död länk|datum=2011-05}}"; String editCommentStr = "Lägger in arkiveringsurl i webbref alt. dödmarkerar länk"; String archiveUrlStr = "arkivurl"; String archiveDateStr = "arkivdatum"; String categoryStr = "Artiklar med Webbref som saknar arkiveringsurl"; String tmpStr; String url; String orgStr; String newStr; bool changed; int stopPos; PageList pl = new PageList(site); long antal = 18; //Find articles among the recentchanges // pl.FillFromRecentChanges(false, false, false, false, true, 1000, 7); //Find articles from a category pl.FillFromCategory(categoryStr); //Find articles from all the links to a template, mostly useful on very small wikis // pl.FillFromLinksToPage("Mall:Webbref"); //Remove namespaces, changes required (or uncomment) for some sisterprojects, like wikisource pl.FilterNamespaces(new int[] {0}); foreach(Page myPage in pl) { changed = false; myPage.Load(); tmpStr = myPage.text; String[] tmpStrArray = myPage.GetTemplatesWithParams(); foreach(string tmpStr2 in tmpStrArray) { //handle several types, webref common on svnews, webbref on other sv-projects //Todo: Handle uppercase as well if(tmpStr2.StartsWith("webbref") || tmpStr2.StartsWith("webref") || tmpStr2.StartsWith("citeweb") || tmpStr2.StartsWith("cite web")) { //If we already have an archive added skip to next template if(tmpStr2.IndexOf(archiveUrlStr) > 0) continue; changed = true; orgStr = tmpStr2; url = tmpStr2.Substring(tmpStr2.IndexOf("|url=") + 5); stopPos = url.IndexOf("|"); //Cut off the rest, unless it's the last argument if(stopPos > 0) url = url.Substring(0, stopPos); Console.WriteLine(url); //Fix some specialcases regarding URLs url = HttpUtility.UrlDecode(url); url = url.Trim(); Console.WriteLine(url); //verify that the page isn't dead first, if it is mark it as such if(isUrlWorking(url)) { //Todo: Improve this later on so it's more robust for changes on webcitation.org tmpStr = getURLPost("http://www.webcitation.org/archive.php", "url=" + url + "&email=" + emailStr); tmpStr = tmpStr.Substring(tmpStr.IndexOf("An archive of this page should shortly be available at </p><br /><p>") + 83, 29); newStr = orgStr.Replace("|url=" + url, "|url=" + url + "|" + archiveUrlStr + "=http://" + tmpStr + "|" + archiveDateStr + "=" + todaysDateStr); myPage.text = myPage.text.Replace(orgStr, newStr); antal++; } else { //Add {{dead link}} Console.WriteLine("Dead link found!"); newStr = orgStr + deadlinkTemplateStr; myPage.text = myPage.text.Replace(orgStr, newStr); } } } if(changed) { myPage.Save(myPage.text, editCommentStr, true); Console.WriteLine("Nytt antal = " + antal.ToString()); } } } }