Wikipedia:Projekt DotNetWikiBot Framework/Lsjbot/Lsj-get-images-from-iw
Utseende
//Bot code by Lsj to locate images used on other languages versions using System; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Xml; using System.Threading; using System.Web; using System.Net; using DotNetWikiBot; class MyBot : Bot { public static bool tryload(Page p, int iattempt) { int itry = 1; while (true) { try { p.Load(); return true; } catch (WebException e) { string message = e.Message; Console.Error.WriteLine(message); itry++; if (itry > iattempt) return false; } } } public static bool trysave(Page p, int iattempt) { int itry = 1; while (true) { try { p.Save(); return true; } catch (WebException e) { string message = e.Message; Console.Error.WriteLine(message); itry++; if (itry > iattempt) return false; } } } //public static List<string> Interwiki(Site site, string title) ////Borrowed from http://sv.wikipedia.org/wiki/Wikipedia:Projekt_DotNetWikiBot_Framework/Innocent_bot/Addbotkopia //{ // List<string> r = new List<string>(); // XmlDocument doc = new XmlDocument(); // string url = "action=wbgetentities&sites=svwiki&titles=" + HttpUtility.UrlEncode(title) + "&languages=sv&format=xml"; // //string tmpStr = site.PostDataAndGetResultHTM(site.site+"/w/api.php", url); // try // { // string tmpStr = site.PostDataAndGetResultHTM(site.site + "/w/api.php", url); // doc.LoadXml(tmpStr); // for (int i = 0; i < doc.GetElementsByTagName("sitelink").Count; i++) // { // string s = doc.GetElementsByTagName("sitelink")[i].Attributes.GetNamedItem("site").Value; // string t = doc.GetElementsByTagName("sitelink")[i].Attributes.GetNamedItem("title").Value; // s = s.Replace("_", "-"); // string t2 = s.Substring(0, s.Length - 4) + ":" + t; // //Console.WriteLine(t2); // r.Add(t2); // } // } // catch (WebException e) // { // string message = e.Message; // Console.Error.WriteLine(message); // } // return r; //} public static void Main() { string makelang = "nl"; string botaccount = "Lsjbot"; Console.Write("Password: "); string password = Console.ReadLine(); Site svsite = new Site("https://"+makelang+".wikipedia.org", botaccount, password); Site cmsite = new Site("https://commons.wikimedia.org", botaccount, password); //Site wdsite = new Site("http://wikidata.org", botaccount, password); string editcomment = "Fixar bilder från iw"; switch (makelang) { case "sv": editcomment = "Fixar bilder från iw"; break; case "ceb": editcomment = "Galeriya sa hulagway"; break; case "nl": editcomment = "Fotogalerij van interwiki"; break; default: editcomment = "Image gallery from interwiki"; break; } svsite.defaultEditComment = editcomment; svsite.minorEditByDefault = false; Console.WriteLine("apipath = "+svsite.apiPath); //Skip images in blacklist: List<string> blacklist = new List<string>(); blacklist.Add("Gatunek niejadalny.svg"); blacklist.Add("Foodlogo3.svg"); List<string> blacktype = new List<string>(); //blacktype.Add(".svg"); //blacktype.Add(".png"); PageList pl = new PageList(svsite); //////////////////////////////////// //Select how to get pages. Uncomment as needed. //////////////////////////////////// //Find articles from a category pl.FillFromCategory("Koning van Zweden"); //pl.FillFromCategoryTree("Personer i Nederländernas historia"); //Find articles from all the links to a template, mostly useful on very small wikis // pl.FillFromLinksToPage("Mall:Taxobox"); //Set specific article: //Page pp = new Page(svsite, "Amsterdam");pl.Add(pp); //Skip all namespaces except regular articles: pl.RemoveNamespaces(new int[] {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,100,101}); /////////////////////////////////////// //Choose what to do with the pix that are found: // nchoice = 0: do nothing, except list on standard output // nchoice = 1: add as gallery in target article // nchoice = 2: add as separate pix in target article // nchoice = 3: add in article discussion // nchoice = 4: list in separate workpage "Användare:Botaccount/Gallery" //////////////////////////////////////// int nchoice = 3; // If ntop is non-zero, don't use all pix but only the ntop most used ones. int ntop = 6; Page pwork = new Page(svsite, "Användare:" + botaccount + "/Gallery"); if (nchoice == 4) { pwork.Load(); } Dictionary<string, Site> sitedict = new Dictionary<string,Site>(); string sbrack = "[]'† ?"; char[] brackets = sbrack.ToCharArray(); //int nfound = 0; DateTime oldtime = DateTime.Now; int nedit = 0; int iremain = pl.Count(); foreach(Page p in pl) { DateTime nexttime = oldtime.AddSeconds(7); //Skip start of alphabet: //if (String.Compare(p.title,"Acacia tortilis") < 0 ) // continue; iremain--; Console.WriteLine(iremain.ToString() + " remaining."); if ( !tryload(p,1)) continue; string origtext = p.text; //find images already in page: List<string> oldpix = p.GetImages(); Console.WriteLine("Oldpix:"); int npix = 0; foreach (string oldpic in oldpix) { Console.WriteLine(oldpic); npix++; } Console.WriteLine("npix = " + npix.ToString()); //if it already has a gallery, skip it: if (p.text.Contains("<gallery>")) continue; //if it doesn't contain "Lsjbot", skip it: //if (!p.text.Contains("Lsjbot")) // continue; //find iw: Dictionary<string, string> newpix = new Dictionary<string, string>(); //string[] iw = p.GetInterWikiLinks(); List<string> iwlist = p.GetInterLanguageLinks(); Console.WriteLine("iwlist.Count " +iwlist.Count); //if (iw.Length == 0) // iwlist = Interwiki(wdsite, p.title); //else //{ // foreach (string iws in iw) // iwlist.Add(iws); //} foreach (string iws in iwlist) { string[] ss = iws.Split(':'); string iwcode = ss[0]; string iwtitle = ss[1]; Console.WriteLine("iw - " + iwcode + ":" + iwtitle); if (iwcode == "nah") continue; if (!sitedict.ContainsKey(iwcode)) { string iwurl = "https://" + iwcode + ".wikipedia.org"; try { try { sitedict.Add(iwcode, new Site(iwurl, botaccount, password)); } catch (WebException e) { Console.WriteLine(e.Message); continue; } } catch (WikiBotException e) { Console.WriteLine(e.Message); continue; } } Page piw = new Page(sitedict[iwcode], iwtitle); try { piw.Load(); } catch (WebException e) { Console.WriteLine(e.Message); continue; } if (!piw.Exists()) { Console.WriteLine("Not found despite iw"); continue; } List<string> iwpix = piw.GetImages(); foreach (string iwpicture in iwpix) { string iwpic = iwpicture; //Remove file prefix: if (iwpic.Contains(":")) iwpic = iwpic.Split(':')[1]; if (newpix.ContainsKey(iwpic)) newpix[iwpic] = newpix[iwpic] + ":" + iwcode; else newpix.Add(iwpic, iwcode); } } bool fromcommons = false; if (newpix.Count == 0) { if ( p.text.Contains("ommonscat|")) { fromcommons = true; string s = ""; if (p.text.IndexOf("{{commonscat|") > 0) { s = p.text.Remove(0, p.text.IndexOf("{{commonscat|")); s = s.Remove(s.IndexOf("}}")); s = s.Remove(0, "{{commonscat|".Length); } else if (p.text.IndexOf("{{Commonscat|") > 0) { s = p.text.Remove(0, p.text.IndexOf("{{Commonscat|")); s = s.Remove(s.IndexOf("}}")); s = s.Remove(0, "{{Commonscat|".Length); } if (String.IsNullOrEmpty(s)) continue; if (s.Contains("|")) s = s.Remove(s.IndexOf("|")); s = "Category:" + s; //Console.WriteLine(s); //Console.ReadLine(); PageList plc = new PageList(cmsite); try { plc.FillFromCategoryTree(s); } catch (WebException e) { Console.WriteLine(e.Message); continue; } foreach (Page pc in plc) { Console.WriteLine("pc = " + pc.title); newpix.Add(pc.title, "cm"); } } } //Check if pix from iw is already used in target article: // //Workaround because a Dictionary can't be modified while iterating over its keys: List<string> dummykeys = new List<string>(); foreach (string dk in newpix.Keys) dummykeys.Add(dk); foreach (string newpic in dummykeys) { //Check if pix from iw is already used in target article: if (p.text.Contains(newpic)) newpix[newpic] = "/// ALREADY USED"; else if (newpic.Contains(":")) { if ( p.text.Contains(newpic.Remove(0,newpic.IndexOf(':')+1))) newpix[newpic] = "/// ALREADY USED"; } if ((!newpic.Contains(".")) || (newpic.LastIndexOf('.') != newpic.Length-4)) { newpix[newpic] = "/// NOT A FILE"; } //Check if pic in blacklist: if (blacklist.Contains(newpic)) newpix[newpic] = "/// BLACKLISTED IMAGE"; foreach (string filetype in blacktype) { if (newpic.Contains(filetype)) newpix[newpic] = "/// BLACKLISTED FILETYPE"; } if (newpix[newpic].Contains("///")) continue; //Check if pic really exists on Commons: if (!fromcommons) { string res = cmsite.indexPath + "?title=" + HttpUtility.UrlEncode("File:" + newpic); //Console.WriteLine("commonsres = " + res); string src = ""; try { src = cmsite.GetWebPage(res); // cmsite.GetPageHTM(res); } catch (WebException e) { newpix[newpic] = "/// NOT FOUND ON COMMONS"; string message = e.Message; if (message.Contains(": (404) ")) { // Not Found Console.Error.WriteLine(Bot.Msg("Page \"{0}\" doesn't exist."), newpic); Console.WriteLine("Image not found " + newpic); continue; } else { Console.Error.WriteLine(message); continue; } } } } int nnew = 0; foreach (string newpic in newpix.Keys) { Console.WriteLine(newpic + " ! " + newpix[newpic]); if (!newpix[newpic].Contains("///")) nnew++; } Console.WriteLine("# new pix = " + nnew.ToString()); if (nnew == 0) continue; //OK, so we found some pix. Now what do we do with them? //First get rid of the ones we don't want: foreach (string newpic in dummykeys) if (newpix[newpic].Contains("///")) newpix[newpic] = ""; //Then figure out which new pix have the most interwiki use: List<string> pixtouse = new List<string>(); if ((ntop > 0) && (ntop < nnew)) { int nused = 0; while (nused < ntop) { string longest = ""; int maxlength = -1; foreach (string newpic in dummykeys) { if (newpix[newpic].Length > maxlength) { longest = newpic; maxlength = newpix[newpic].Length; } } pixtouse.Add(longest); newpix[longest] = ""; nused++; } } else foreach (string newpic in newpix.Keys) if ( newpix[newpic] != "" ) pixtouse.Add(newpic); //Then actually use them, according to nchoice value: string gallerylabel = "Bildgalleri"; string talkpage = "Diskussion"; string disktext = "\n\n==Bilder från interwiki==\nBoten " + botaccount + " har identifierat följande bilder som används på andra språkversioner av den här artikeln:\n\n"; switch (makelang) { case "sv": gallerylabel = "Bildgalleri"; talkpage = "Diskussion"; disktext = "\n\n==Bilder från interwiki==\nBoten " + botaccount + " har identifierat följande bilder som används på andra språkversioner av den här artikeln:\n\n"; break; case "ceb": gallerylabel = "Galeriya sa hulagway"; talkpage = "Hisgot"; break; case "war": gallerylabel = "Image gallery"; talkpage = "Hiruhimangraw"; break; case "nl": gallerylabel = "Image gallery"; talkpage = "Overleg"; disktext = "\n\n==Foto's van interwiki==\nDe bot "+botaccount + " heeft de volgende beelden gebruikt op andere versies van dit artikel geïdentificeerd:\n\n"; break; default: gallerylabel = "Image gallery"; break; } string gallery = "\n\n== "+gallerylabel+" ==\n\n<gallery>\n"; switch (nchoice) { case 1: foreach (string newpic in pixtouse) gallery = gallery + newpic + "\n"; gallery = gallery + "</gallery>\n\n"; int ipos = p.text.IndexOf("[[Kategori"); if ((ipos < 0 ) && (makelang == "war")) ipos = p.text.IndexOf("[[Kaarangay"); string botendtext = "== Källor =="; if (p.text.Contains(botendtext)) { ipos = p.text.IndexOf(botendtext); } if (ipos > 0) p.text = p.text.Insert(ipos, gallery); else p.text += gallery; break; case 2: foreach (string newpic in pixtouse) p.text = p.text.Replace("[[Kategori", "[[Fil:" + newpic + "|thumb|right|]]\n\n" + "[[Kategori"); break; case 3: Page pdisk = new Page(svsite, talkpage + ":" + p.title); if (!tryload(pdisk,2)) continue; pdisk.text = pdisk.text + disktext; gallery = gallery.Replace("== " + gallerylabel + " ==", "=== " + gallerylabel + " ==="); foreach (string newpic in pixtouse) gallery = gallery + newpic + "\n"; gallery = gallery + "</gallery>\n\n"; pdisk.text = pdisk.text + gallery; //Bot.editComment = "Fixar bildförslag från iw"; //isMinorEdit = false; trysave(pdisk,2); //Thread.Sleep(15000);//milliseconds Console.WriteLine("<ret>"); Console.ReadLine(); break; case 4: pwork.text = pwork.text + "===" + p.title + "===\n"; foreach (string newpic in pixtouse) gallery = gallery + newpic + "\n"; gallery = gallery + "</gallery>\n\n"; pwork.text = pwork.text + gallery; break; } //DONE! Now save if needed. //Bot.editComment = editcomment; //isMinorEdit = false; int ntry = 0; if (p.text != origtext) while (ntry < 3) { try { p.Save(); ntry = 999; } catch (WebException e) { Console.WriteLine(e.Message); ntry++; continue; } } if (nchoice == 4) trysave(pwork,3); //Thread.Sleep(4000);//milliseconds Console.WriteLine("nexttime = "+nexttime.ToLongTimeString()); Console.WriteLine("Now = " + DateTime.Now.ToLongTimeString()); while (DateTime.Now.CompareTo(nexttime) < 0) continue; oldtime = DateTime.Now; nedit++; } Console.WriteLine("Total #edits = " + nedit.ToString()); } }