Wikipedia:Projekt DotNetWikiBot Framework/Lsjbot/Lsj-copybox
Utseende
//Reads an infobox from one wiki, and copies the data into an infobox in the corresponding article on another wiki. //Images are checked against Commons to see that they really exist there. //Originally from [[Wikipedia:Projekt DotNetWikiBot Framework/Innocent bot/Ny parameter i Mall Ishockeyspelare]] //Extensively modified by Lsj using System; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic; using System.Xml; using System.Threading; using System.Web; using System.Net; using DotNetWikiBot; class MyBot : Bot { private string CleanLink(string origlink) { string sbrack = "[]'† ?"; char[] brackets = sbrack.ToCharArray(); string s = origlink.Trim(brackets); if (s.Contains("|")) s = s.Remove(s.IndexOf("|")); return s; } public static void Main() { Console.Write("Password: "); string password = Console.ReadLine(); Site svsite = new Site("http://sv.wikipedia.org", "***", "***"); Site ensite = new Site("http://en.wikipedia.org", "***", "***"); Site cmsite = new Site("http://commons.wikimedia.org", "***", "***"); PageList pl = new PageList(ensite); //Select how to get pages. Uncomment as needed. //Find articles from a category pl.FillFromCategoryTree("Cities in the Philippines"); //Find articles from all the links to a template, mostly useful on very small wikis // pl.FillFromLinksToPage("Mall:Taxobox"); //Set specific article: //Page pp = new Page(ensite, "Davao City");pl.Add(pp); //Skip all namespaces except regular articles: pl.RemoveNamespaces(new int[] {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,100,101}); //paramdict är en lista med vilken parameter i källmallen motsvarar vilken parameter i målmallen Dictionary<string, string> paramdict = new Dictionary<string, string>(); //provinsdict är en lista med översättningar av provinsnamn från källspråk till målspråk Dictionary<string, string> provinsdict = new Dictionary<string, string>(); //koden nedan som fyller listorna är genererad i Excel: string si1 = "area_water_km2"; string sg1 = "yta_vatten"; paramdict.Add(si1, sg1); string si2 = "area_land_km2"; string sg2 = "yta_land"; paramdict.Add(si2, sg2); string si3 = "area_total_km2"; string sg3 = "yta"; paramdict.Add(si3, sg3); string si4 = "website"; string sg4 = "url"; paramdict.Add(si4, sg4); string si7 = "leader_title"; string sg7 = "styre_titel"; paramdict.Add(si7, sg7); string si8 = "leader_name"; string sg8 = "styre"; paramdict.Add(si8, sg8); string si9 = "image_seal"; string sg9 = "sigill"; paramdict.Add(si9, sg9); string si10 = "official_name"; string sg10 = "namn"; paramdict.Add(si10, sg10); string si11 = "longs"; string sg11 = "long_s"; paramdict.Add(si11, sg11); string si12 = "longm"; string sg12 = "long_m"; paramdict.Add(si12, sg12); string si13 = "longd"; string sg13 = "long_g"; paramdict.Add(si13, sg13); string si14 = "longEW"; string sg14 = "long_EW"; paramdict.Add(si14, sg14); string si15 = "lats"; string sg15 = "lat_s"; paramdict.Add(si15, sg15); string si16 = "latNS"; string sg16 = "lat_NS"; paramdict.Add(si16, sg16); string si17 = "latm"; string sg17 = "lat_m"; paramdict.Add(si17, sg17); string si18 = "latd"; string sg18 = "lat_g"; paramdict.Add(si18, sg18); string si19 = "subdivision_name2"; string sg19 = "indelning"; paramdict.Add(si19, sg19); string si20 = "settlement_type"; string sg20 = "form"; paramdict.Add(si20, sg20); string si21 = "population_as_of"; string sg21 = "folkmängd_år"; paramdict.Add(si21, sg21); string si22 = "population_footnotes"; string sg22 = "folkmängd_not"; paramdict.Add(si22, sg22); string si23 = "population_total"; string sg23 = "folkmängd"; paramdict.Add(si23, sg23); string si24 = "image_flag"; string sg24 = "fana"; paramdict.Add(si24, sg24); string si25 = "subdivision_name1"; string sg25 = "delstat"; paramdict.Add(si25, sg25); string si26 = "image_caption"; string sg26 = "bildtext"; paramdict.Add(si26, sg26); string si27 = "image_skyline"; string sg27 = "bild"; paramdict.Add(si27, sg27); string si28 = "native_name"; string sg28 = "annat_namn"; paramdict.Add(si28, sg28); string sp1 = "Abra (provins)"; string ep1 = "Abra (province)"; provinsdict.Add(ep1,sp1); string sp2 = "Agusan del Norte"; string ep2 = "Agusan del Norte"; provinsdict.Add(ep2,sp2); string sp3 = "Agusan del Sur"; string ep3 = "Agusan del Sur"; provinsdict.Add(ep3,sp3); string sp4 = "Aklan"; string ep4 = "Aklan"; provinsdict.Add(ep4,sp4); string sp5 = "Albay"; string ep5 = "Albay"; provinsdict.Add(ep5,sp5); string sp6 = "Antique (provins)"; string ep6 = "Antique (province)"; provinsdict.Add(ep6,sp6); string sp7 = "Apayao"; string ep7 = "Apayao"; provinsdict.Add(ep7,sp7); string sp8 = "Aurora (provins)"; string ep8 = "Aurora (province)"; provinsdict.Add(ep8,sp8); string sp9 = "Basilan (provins)"; string ep9 = "Basilan"; provinsdict.Add(ep9,sp9); string sp10 = "Bataan"; string ep10 = "Bataan"; provinsdict.Add(ep10,sp10); string sp11 = "Batanes"; string ep11 = "Batanes"; provinsdict.Add(ep11,sp11); string sp12 = "Batangas"; string ep12 = "Batangas"; provinsdict.Add(ep12,sp12); string sp13 = "Benguet"; string ep13 = "Benguet"; provinsdict.Add(ep13,sp13); string sp14 = "Bergsprovinsen"; string ep14 = "Mountain Province"; provinsdict.Add(ep14,sp14); string sp15 = "Biliran (provins)"; string ep15 = "Biliran"; provinsdict.Add(ep15,sp15); string sp16 = "Bohol"; string ep16 = "Bohol"; provinsdict.Add(ep16,sp16); string sp17 = "Bukidnon"; string ep17 = "Bukidnon"; provinsdict.Add(ep17,sp17); string sp18 = "Bulacan"; string ep18 = "Bulacan"; provinsdict.Add(ep18,sp18); string sp19 = "Cagayan"; string ep19 = "Cagayan"; provinsdict.Add(ep19,sp19); string sp20 = "Camarines Norte"; string ep20 = "Camarines Norte"; provinsdict.Add(ep20,sp20); string sp21 = "Camarines Sur"; string ep21 = "Camarines Sur"; provinsdict.Add(ep21,sp21); string sp22 = "Camiguin"; string ep22 = "Camiguin"; provinsdict.Add(ep22,sp22); string sp23 = "Capiz"; string ep23 = "Capiz"; provinsdict.Add(ep23,sp23); string sp24 = "Catanduanes"; string ep24 = "Catanduanes"; provinsdict.Add(ep24,sp24); string sp25 = "Cavite"; string ep25 = "Cavite"; provinsdict.Add(ep25,sp25); string sp26 = "Cebu"; string ep26 = "Cebu"; provinsdict.Add(ep26,sp26); string sp27 = "Composteladalen"; string ep27 = "Compostela Valley"; provinsdict.Add(ep27,sp27); string sp28 = "Cotabato"; string ep28 = "Cotabato"; provinsdict.Add(ep28,sp28); string sp29 = "Davao del Norte"; string ep29 = "Davao del Norte"; provinsdict.Add(ep29,sp29); string sp30 = "Davao del Sur"; string ep30 = "Davao del Sur"; provinsdict.Add(ep30,sp30); string sp31 = "Davao Oriental"; string ep31 = "Davao Oriental"; provinsdict.Add(ep31,sp31); string sp32 = "Dinagatöarna (provins)"; string ep32 = "Dinagat Islands"; provinsdict.Add(ep32,sp32); string sp33 = "Guimaras"; string ep33 = "Guimaras"; provinsdict.Add(ep33,sp33); string sp34 = "Ifugao"; string ep34 = "Ifugao"; provinsdict.Add(ep34,sp34); string sp35 = "Iloilo (provins)"; string ep35 = "Iloilo"; provinsdict.Add(ep35,sp35); string sp36 = "Isabela (provins)"; string ep36 = "Isabela (province)"; provinsdict.Add(ep36,sp36); string sp37 = "Kalinga"; string ep37 = "Kalinga"; provinsdict.Add(ep37,sp37); string sp38 = "La Union (provins)"; string ep38 = "La Union"; provinsdict.Add(ep38,sp38); string sp39 = "Laguna (provins)"; string ep39 = "Laguna (province)"; provinsdict.Add(ep39,sp39); string sp40 = "Lanao del Norte"; string ep40 = "Lanao del Norte"; provinsdict.Add(ep40,sp40); string sp41 = "Lanao del Sur"; string ep41 = "Lanao del Sur"; provinsdict.Add(ep41,sp41); string sp42 = "Leyte (provins)"; string ep42 = "Leyte"; provinsdict.Add(ep42,sp42); string sp43 = "Maguindanao"; string ep43 = "Maguindanao"; provinsdict.Add(ep43,sp43); string sp44 = "Marinduque"; string ep44 = "Marinduque"; provinsdict.Add(ep44,sp44); string sp45 = "Masbate (provins)"; string ep45 = "Masbate"; provinsdict.Add(ep45,sp45); string sp46 = "Misamis Occidental"; string ep46 = "Misamis Occidental"; provinsdict.Add(ep46,sp46); string sp47 = "Misamis Oriental"; string ep47 = "Misamis Oriental"; provinsdict.Add(ep47,sp47); string sp48 = "Negros Occidental"; string ep48 = "Negros Occidental"; provinsdict.Add(ep48,sp48); string sp49 = "Negros Oriental"; string ep49 = "Negros Oriental"; provinsdict.Add(ep49,sp49); string sp50 = "Norra Ilocos"; string ep50 = "Ilocos Norte"; provinsdict.Add(ep50,sp50); string sp51 = "Norra Samar"; string ep51 = "Northern Samar"; provinsdict.Add(ep51,sp51); string sp52 = "Nueva Ecija"; string ep52 = "Nueva Ecija"; provinsdict.Add(ep52,sp52); string sp53 = "Nueva Vizcaya"; string ep53 = "Nueva Vizcaya"; provinsdict.Add(ep53,sp53); string sp54 = "Occidental Mindoro"; string ep54 = "Occidental Mindoro"; provinsdict.Add(ep54,sp54); string sp55 = "Oriental Mindoro"; string ep55 = "Oriental Mindoro"; provinsdict.Add(ep55,sp55); string sp56 = "Palawan"; string ep56 = "Palawan"; provinsdict.Add(ep56,sp56); string sp57 = "Pampanga"; string ep57 = "Pampanga"; provinsdict.Add(ep57,sp57); string sp58 = "Pangasinan (provins)"; string ep58 = "Pangasinan"; provinsdict.Add(ep58,sp58); string sp59 = "Quezon"; string ep59 = "Quezon"; provinsdict.Add(ep59,sp59); string sp60 = "Quirino"; string ep60 = "Quirino"; provinsdict.Add(ep60,sp60); string sp61 = "Rizal"; string ep61 = "Rizal"; provinsdict.Add(ep61,sp61); string sp62 = "Romblon"; string ep62 = "Romblon"; provinsdict.Add(ep62,sp62); string sp63 = "Samar (provins)"; string ep63 = "Samar"; provinsdict.Add(ep63,sp63); string sp64 = "Sarangani (provins)"; string ep64 = "Sarangani"; provinsdict.Add(ep64,sp64); string sp65 = "Siquijor (provins)"; string ep65 = "Siquijor"; provinsdict.Add(ep65,sp65); string sp66 = "Sorsogon (provins)"; string ep66 = "Sorsogon"; provinsdict.Add(ep66,sp66); string sp67 = "Sultan Kudarat"; string ep67 = "Sultan Kudarat"; provinsdict.Add(ep67,sp67); string sp68 = "Sulu (provins)"; string ep68 = "Sulu"; provinsdict.Add(ep68,sp68); string sp69 = "Surigao del Norte"; string ep69 = "Surigao del Norte"; provinsdict.Add(ep69,sp69); string sp70 = "Surigao del Sur"; string ep70 = "Surigao del Sur"; provinsdict.Add(ep70,sp70); string sp71 = "Södra Cotabato"; string ep71 = "South Cotabato"; provinsdict.Add(ep71,sp71); string sp72 = "Södra Ilocos"; string ep72 = "Ilocos Sur"; provinsdict.Add(ep72,sp72); string sp73 = "Södra Leyte"; string ep73 = "Southern Leyte"; provinsdict.Add(ep73,sp73); string sp74 = "Tarlac"; string ep74 = "Tarlac"; provinsdict.Add(ep74,sp74); string sp75 = "Tawi-Tawi"; string ep75 = "Tawi-Tawi"; provinsdict.Add(ep75,sp75); string sp76 = "Zambales"; string ep76 = "Zambales"; provinsdict.Add(ep76,sp76); string sp77 = "Zamboanga del Norte"; string ep77 = "Zamboanga del Norte"; provinsdict.Add(ep77,sp77); string sp78 = "Zamboanga del Sur"; string ep78 = "Zamboanga del Sur"; provinsdict.Add(ep78,sp78); string sp79 = "Zamboanga Sibugay"; string ep79 = "Zamboanga Sibugay"; provinsdict.Add(ep79,sp79); string sp80 = "Östra Samar"; string ep80 = "Eastern Samar"; provinsdict.Add(ep80,sp80); string sp81 = "Bikolregionen"; string ep81 = "Bicol Region"; provinsdict.Add(ep81,sp81); string sp82 = "Cagayandalen"; string ep82 = "Cagayan Valley"; provinsdict.Add(ep82,sp82); string sp83 = "CALABARZON"; string ep83 = "CALABARZON"; provinsdict.Add(ep83,sp83); string sp84 = "Caraga"; string ep84 = "Caraga"; provinsdict.Add(ep84,sp84); string sp85 = "Centrala Luzon"; string ep85 = "Central Luzon"; provinsdict.Add(ep85,sp85); string sp86 = "Centrala Visayas"; string ep86 = "Central Visayas"; provinsdict.Add(ep86,sp86); string sp87 = "Davaoregionen"; string ep87 = "Davao Region"; provinsdict.Add(ep87,sp87); string sp88 = "Ilocosregionen"; string ep88 = "Ilocos Region"; provinsdict.Add(ep88,sp88); string sp89 = "Kordiljärernas administrativa region"; string ep89 = "Cordillera Administrative Region"; provinsdict.Add(ep89,sp89); string sp90 = "Metro Manila"; string ep90 = "Metro Manila"; provinsdict.Add(ep90,sp90); string sp91 = "MIMAROPA"; string ep91 = "MIMAROPA"; provinsdict.Add(ep91,sp91); string sp92 = "Muslimska Mindanao"; string ep92 = "Autonomous Region in Muslim Mindanao"; provinsdict.Add(ep92,sp92); string sp93 = "Norra Mindanao"; string ep93 = "Northern Mindanao"; provinsdict.Add(ep93,sp93); string sp94 = "SOCCSKSARGEN"; string ep94 = "SOCCSKSARGEN"; provinsdict.Add(ep94,sp94); string sp95 = "Västra Visayas"; string ep95 = "Western Visayas"; provinsdict.Add(ep95,sp95); string sp96 = "Zamboangahalvön"; string ep96 = "Zamboanga Peninsula"; provinsdict.Add(ep96,sp96); string sp97 = "Östra Visayas"; string ep97 = "Eastern Visayas"; provinsdict.Add(ep97,sp97); //Och här börjar vi gå igenom artiklarna: foreach(Page p in pl) { p.Load(); //Remove comments, otherwise the template finding doesn't work properly: string str = p.text; int startPos, endPos, len = 0; while ((startPos = str.LastIndexOf("<!")) != -1) { endPos = str.IndexOf("->", startPos); len = (endPos != -1) ? endPos - startPos + 2 : 2; //Console.WriteLine("Removing " + str.Substring(startPos, len)); str = str.Remove(startPos, len); //str = str.Insert(startPos, new String('_', len)); } p.text = str; Dictionary<string, string> newparamdict = new Dictionary<string, string>(); Dictionary<string, string> oldparameters; string templateTitle = "Infobox settlement"; Regex templateTitleRegex = new Regex("^\\s*(" + Bot.Capitalize(Regex.Escape(templateTitle)) + "|" + Bot.Uncapitalize(Regex.Escape(templateTitle)) + ")\\s*\\|"); bool foundtemplate = false; foreach (string template in p.GetTemplatesWithParams()) { //if (template.Length > 40) // Console.WriteLine(template.Substring(0, 40)); //else // Console.WriteLine(template); if (templateTitleRegex.IsMatch(template)) { //Ok, found the right template. Now get params: oldparameters = ensite.ParseTemplate(template); //Console.WriteLine("op:"); foundtemplate = true; foreach (string op in oldparameters.Keys) { if (paramdict.ContainsKey(op)) { //Console.WriteLine(op); switch (paramdict[op]) { case "region_type": newparamdict.Add(paramdict[op], "[[Filippinernas regioner|Region]]"); break; case "district_type": newparamdict.Add(paramdict[op], "[[Filippinernas provinser|Provins]]"); break; case "delstat": case "indelning": string sbrack = "[]'† ?"; char[] brackets = sbrack.ToCharArray(); string cp = oldparameters[op].Trim(brackets); if (cp.Contains("]")) cp = cp.Remove(cp.IndexOf("]")); if (cp.Contains("|")) cp = cp.Remove(cp.IndexOf("|")); if (cp.Contains("(")) cp = cp.Remove(cp.IndexOf("(")); //string cp = CleanLink(oldparameters[op]); if (provinsdict.ContainsKey(cp)) newparamdict.Add(paramdict[op], "[["+provinsdict[cp]+"]]"); else newparamdict.Add(paramdict[op], cp); break; case "form": if (oldparameters[op].Contains("city")) newparamdict.Add(paramdict[op], "[[Stad]]"); else if (oldparameters[op].Contains("City")) newparamdict.Add(paramdict[op], "[[Stad]]"); else newparamdict.Add(paramdict[op], oldparameters[op]); break; case "url": if (oldparameters[op].Contains("{{URL")) { string ccp = oldparameters[op].Replace("{{URL|", "[http:////").Replace("}}", "]"); newparamdict.Add(paramdict[op], ccp); } else newparamdict.Add(paramdict[op], oldparameters[op]); break; case "sigill": case "bild": Page pcm = new Page(cmsite,"File:"+oldparameters[op]); //pcm.Load(); //Console.WriteLine("LastUser: "+pcm.LastUser); //if ( pcm.Exists()) // newparamdict.Add(paramdict[op], oldparameters[op]); //else // Console.WriteLine("Image not found " + oldparameters[op]); string res = cmsite.site + cmsite.indexPath + "index.php?title=" + HttpUtility.UrlEncode("File:"+oldparameters[op]); string src = ""; try { src = cmsite.GetPageHTM(res); } catch (WebException e) { string message = e.Message; if (message.Contains(": (404) ")) { // Not Found Console.Error.WriteLine(Bot.Msg("Page \"{0}\" doesn't exist."), oldparameters[op]); Console.WriteLine("Image not found " + oldparameters[op]); break; } else { Console.Error.WriteLine(message); break; } } newparamdict.Add(paramdict[op], oldparameters[op]); break; case "folkmängd_år": if (oldparameters[op].Contains("{{asof")) newparamdict.Add(paramdict[op], oldparameters[op].Replace("{{asof|", "").Replace("}}", "")); break; default: newparamdict.Add(paramdict[op], oldparameters[op]); break; } } } //Console.WriteLine("np:"); //foreach (string np in newparamdict.Keys) // Console.WriteLine(np + " = " + newparamdict[np]); newparamdict.Add("land","Filippinerna"); newparamdict.Add("tidszon","[[UTC+8]]"); } } if (!foundtemplate) continue; //find swedish match: string geobox = "{{Stadsfakta|}}\n"; string[] iw = p.GetInterWikiLinks(); foreach (string iws in iw) { bool svexists = false; if (iws.Contains("sv:")) { string svtit = iws.Replace("sv:",""); Page psv = new Page(svsite,svtit); psv.Load(); svexists = true; string origtext = psv.text; if (!psv.text.Contains("eobox") && !psv.text.Contains("tadsfakta")) { //Console.WriteLine("Filling params"); psv.text = geobox + psv.text; foreach (string np in newparamdict.Keys) { //Console.WriteLine(np); psv.SetTemplateParameter("Stadsfakta", np, newparamdict[np], true); } } else Console.WriteLine("Has box already"); //DONE! Now save if needed. Bot.editComment = "Fixar faktamall"; isMinorEdit = false; if (psv.text != origtext) psv.Save(); Thread.Sleep(4000);//milliseconds } } } } }