Hoppa till innehållet

Wikipedia:Projekt DotNetWikiBot Framework/Lsjbot/Lsj-copybox

Från Wikipedia
//Reads an infobox from one wiki, and copies the data into an infobox in the corresponding article on another wiki.
//Images are checked against Commons to see that they really exist there.
//Originally from [[Wikipedia:Projekt DotNetWikiBot Framework/Innocent bot/Ny parameter i Mall Ishockeyspelare]]
//Extensively modified by Lsj


using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Collections.Generic;
using System.Xml;
using System.Threading;
using System.Web;
using System.Net;
using DotNetWikiBot;

class MyBot : Bot
{
	private string CleanLink(string origlink)
	{
        string sbrack = "[]'† ?";
        char[] brackets = sbrack.ToCharArray();
        string s = origlink.Trim(brackets);
        if (s.Contains("|"))
            s = s.Remove(s.IndexOf("|"));
        return s;
	}
        
	public static void Main()
	{
        Console.Write("Password: ");
        string password = Console.ReadLine();
        Site svsite = new Site("http://sv.wikipedia.org", "***", "***");
        Site ensite = new Site("http://en.wikipedia.org", "***", "***");
        Site cmsite = new Site("http://commons.wikimedia.org", "***", "***");
        PageList pl = new PageList(ensite);

        //Select how to get pages. Uncomment as needed.

        //Find articles from a category
        pl.FillFromCategoryTree("Cities in the Philippines");

        //Find articles from all the links to a template, mostly useful on very small wikis
        //        pl.FillFromLinksToPage("Mall:Taxobox");

        //Set specific article:
        //Page pp = new Page(ensite, "Davao City");pl.Add(pp);

        //Skip all namespaces except regular articles:
        pl.RemoveNamespaces(new int[] {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,100,101});


        //paramdict är en lista med vilken parameter i källmallen motsvarar vilken parameter i målmallen
        Dictionary<string, string> paramdict = new Dictionary<string, string>();
        //provinsdict är en lista med översättningar av provinsnamn från källspråk till målspråk
        Dictionary<string, string> provinsdict = new Dictionary<string, string>();

        //koden nedan som fyller listorna är genererad i Excel:

        string si1 = "area_water_km2"; string sg1 = "yta_vatten"; paramdict.Add(si1, sg1);
        string si2 = "area_land_km2"; string sg2 = "yta_land"; paramdict.Add(si2, sg2);
        string si3 = "area_total_km2"; string sg3 = "yta"; paramdict.Add(si3, sg3);
        string si4 = "website"; string sg4 = "url"; paramdict.Add(si4, sg4);
        string si7 = "leader_title"; string sg7 = "styre_titel"; paramdict.Add(si7, sg7);
        string si8 = "leader_name"; string sg8 = "styre"; paramdict.Add(si8, sg8);
        string si9 = "image_seal"; string sg9 = "sigill"; paramdict.Add(si9, sg9);
        string si10 = "official_name"; string sg10 = "namn"; paramdict.Add(si10, sg10);
        string si11 = "longs"; string sg11 = "long_s"; paramdict.Add(si11, sg11);
        string si12 = "longm"; string sg12 = "long_m"; paramdict.Add(si12, sg12);
        string si13 = "longd"; string sg13 = "long_g"; paramdict.Add(si13, sg13);
        string si14 = "longEW"; string sg14 = "long_EW"; paramdict.Add(si14, sg14);
        string si15 = "lats"; string sg15 = "lat_s"; paramdict.Add(si15, sg15);
        string si16 = "latNS"; string sg16 = "lat_NS"; paramdict.Add(si16, sg16);
        string si17 = "latm"; string sg17 = "lat_m"; paramdict.Add(si17, sg17);
        string si18 = "latd"; string sg18 = "lat_g"; paramdict.Add(si18, sg18);
        string si19 = "subdivision_name2"; string sg19 = "indelning"; paramdict.Add(si19, sg19);
        string si20 = "settlement_type"; string sg20 = "form"; paramdict.Add(si20, sg20);
        string si21 = "population_as_of"; string sg21 = "folkmängd_år"; paramdict.Add(si21, sg21);
        string si22 = "population_footnotes"; string sg22 = "folkmängd_not"; paramdict.Add(si22, sg22);
        string si23 = "population_total"; string sg23 = "folkmängd"; paramdict.Add(si23, sg23);
        string si24 = "image_flag"; string sg24 = "fana"; paramdict.Add(si24, sg24);
        string si25 = "subdivision_name1"; string sg25 = "delstat"; paramdict.Add(si25, sg25);
        string si26 = "image_caption"; string sg26 = "bildtext"; paramdict.Add(si26, sg26);
        string si27 = "image_skyline"; string sg27 = "bild"; paramdict.Add(si27, sg27);
        string si28 = "native_name"; string sg28 = "annat_namn"; paramdict.Add(si28, sg28);
        
		
string sp1 = "Abra (provins)"; string ep1 = "Abra (province)"; provinsdict.Add(ep1,sp1);
string sp2 = "Agusan del Norte"; string ep2 = "Agusan del Norte"; provinsdict.Add(ep2,sp2);
string sp3 = "Agusan del Sur"; string ep3 = "Agusan del Sur"; provinsdict.Add(ep3,sp3);
string sp4 = "Aklan"; string ep4 = "Aklan"; provinsdict.Add(ep4,sp4);
string sp5 = "Albay"; string ep5 = "Albay"; provinsdict.Add(ep5,sp5);
string sp6 = "Antique (provins)"; string ep6 = "Antique (province)"; provinsdict.Add(ep6,sp6);
string sp7 = "Apayao"; string ep7 = "Apayao"; provinsdict.Add(ep7,sp7);
string sp8 = "Aurora (provins)"; string ep8 = "Aurora (province)"; provinsdict.Add(ep8,sp8);
string sp9 = "Basilan (provins)"; string ep9 = "Basilan"; provinsdict.Add(ep9,sp9);
string sp10 = "Bataan"; string ep10 = "Bataan"; provinsdict.Add(ep10,sp10);
string sp11 = "Batanes"; string ep11 = "Batanes"; provinsdict.Add(ep11,sp11);
string sp12 = "Batangas"; string ep12 = "Batangas"; provinsdict.Add(ep12,sp12);
string sp13 = "Benguet"; string ep13 = "Benguet"; provinsdict.Add(ep13,sp13);
string sp14 = "Bergsprovinsen"; string ep14 = "Mountain Province"; provinsdict.Add(ep14,sp14);
string sp15 = "Biliran (provins)"; string ep15 = "Biliran"; provinsdict.Add(ep15,sp15);
string sp16 = "Bohol"; string ep16 = "Bohol"; provinsdict.Add(ep16,sp16);
string sp17 = "Bukidnon"; string ep17 = "Bukidnon"; provinsdict.Add(ep17,sp17);
string sp18 = "Bulacan"; string ep18 = "Bulacan"; provinsdict.Add(ep18,sp18);
string sp19 = "Cagayan"; string ep19 = "Cagayan"; provinsdict.Add(ep19,sp19);
string sp20 = "Camarines Norte"; string ep20 = "Camarines Norte"; provinsdict.Add(ep20,sp20);
string sp21 = "Camarines Sur"; string ep21 = "Camarines Sur"; provinsdict.Add(ep21,sp21);
string sp22 = "Camiguin"; string ep22 = "Camiguin"; provinsdict.Add(ep22,sp22);
string sp23 = "Capiz"; string ep23 = "Capiz"; provinsdict.Add(ep23,sp23);
string sp24 = "Catanduanes"; string ep24 = "Catanduanes"; provinsdict.Add(ep24,sp24);
string sp25 = "Cavite"; string ep25 = "Cavite"; provinsdict.Add(ep25,sp25);
string sp26 = "Cebu"; string ep26 = "Cebu"; provinsdict.Add(ep26,sp26);
string sp27 = "Composteladalen"; string ep27 = "Compostela Valley"; provinsdict.Add(ep27,sp27);
string sp28 = "Cotabato"; string ep28 = "Cotabato"; provinsdict.Add(ep28,sp28);
string sp29 = "Davao del Norte"; string ep29 = "Davao del Norte"; provinsdict.Add(ep29,sp29);
string sp30 = "Davao del Sur"; string ep30 = "Davao del Sur"; provinsdict.Add(ep30,sp30);
string sp31 = "Davao Oriental"; string ep31 = "Davao Oriental"; provinsdict.Add(ep31,sp31);
string sp32 = "Dinagatöarna (provins)"; string ep32 = "Dinagat Islands"; provinsdict.Add(ep32,sp32);
string sp33 = "Guimaras"; string ep33 = "Guimaras"; provinsdict.Add(ep33,sp33);
string sp34 = "Ifugao"; string ep34 = "Ifugao"; provinsdict.Add(ep34,sp34);
string sp35 = "Iloilo (provins)"; string ep35 = "Iloilo"; provinsdict.Add(ep35,sp35);
string sp36 = "Isabela (provins)"; string ep36 = "Isabela (province)"; provinsdict.Add(ep36,sp36);
string sp37 = "Kalinga"; string ep37 = "Kalinga"; provinsdict.Add(ep37,sp37);
string sp38 = "La Union (provins)"; string ep38 = "La Union"; provinsdict.Add(ep38,sp38);
string sp39 = "Laguna (provins)"; string ep39 = "Laguna (province)"; provinsdict.Add(ep39,sp39);
string sp40 = "Lanao del Norte"; string ep40 = "Lanao del Norte"; provinsdict.Add(ep40,sp40);
string sp41 = "Lanao del Sur"; string ep41 = "Lanao del Sur"; provinsdict.Add(ep41,sp41);
string sp42 = "Leyte (provins)"; string ep42 = "Leyte"; provinsdict.Add(ep42,sp42);
string sp43 = "Maguindanao"; string ep43 = "Maguindanao"; provinsdict.Add(ep43,sp43);
string sp44 = "Marinduque"; string ep44 = "Marinduque"; provinsdict.Add(ep44,sp44);
string sp45 = "Masbate (provins)"; string ep45 = "Masbate"; provinsdict.Add(ep45,sp45);
string sp46 = "Misamis Occidental"; string ep46 = "Misamis Occidental"; provinsdict.Add(ep46,sp46);
string sp47 = "Misamis Oriental"; string ep47 = "Misamis Oriental"; provinsdict.Add(ep47,sp47);
string sp48 = "Negros Occidental"; string ep48 = "Negros Occidental"; provinsdict.Add(ep48,sp48);
string sp49 = "Negros Oriental"; string ep49 = "Negros Oriental"; provinsdict.Add(ep49,sp49);
string sp50 = "Norra Ilocos"; string ep50 = "Ilocos Norte"; provinsdict.Add(ep50,sp50);
string sp51 = "Norra Samar"; string ep51 = "Northern Samar"; provinsdict.Add(ep51,sp51);
string sp52 = "Nueva Ecija"; string ep52 = "Nueva Ecija"; provinsdict.Add(ep52,sp52);
string sp53 = "Nueva Vizcaya"; string ep53 = "Nueva Vizcaya"; provinsdict.Add(ep53,sp53);
string sp54 = "Occidental Mindoro"; string ep54 = "Occidental Mindoro"; provinsdict.Add(ep54,sp54);
string sp55 = "Oriental Mindoro"; string ep55 = "Oriental Mindoro"; provinsdict.Add(ep55,sp55);
string sp56 = "Palawan"; string ep56 = "Palawan"; provinsdict.Add(ep56,sp56);
string sp57 = "Pampanga"; string ep57 = "Pampanga"; provinsdict.Add(ep57,sp57);
string sp58 = "Pangasinan (provins)"; string ep58 = "Pangasinan"; provinsdict.Add(ep58,sp58);
string sp59 = "Quezon"; string ep59 = "Quezon"; provinsdict.Add(ep59,sp59);
string sp60 = "Quirino"; string ep60 = "Quirino"; provinsdict.Add(ep60,sp60);
string sp61 = "Rizal"; string ep61 = "Rizal"; provinsdict.Add(ep61,sp61);
string sp62 = "Romblon"; string ep62 = "Romblon"; provinsdict.Add(ep62,sp62);
string sp63 = "Samar (provins)"; string ep63 = "Samar"; provinsdict.Add(ep63,sp63);
string sp64 = "Sarangani (provins)"; string ep64 = "Sarangani"; provinsdict.Add(ep64,sp64);
string sp65 = "Siquijor (provins)"; string ep65 = "Siquijor"; provinsdict.Add(ep65,sp65);
string sp66 = "Sorsogon (provins)"; string ep66 = "Sorsogon"; provinsdict.Add(ep66,sp66);
string sp67 = "Sultan Kudarat"; string ep67 = "Sultan Kudarat"; provinsdict.Add(ep67,sp67);
string sp68 = "Sulu (provins)"; string ep68 = "Sulu"; provinsdict.Add(ep68,sp68);
string sp69 = "Surigao del Norte"; string ep69 = "Surigao del Norte"; provinsdict.Add(ep69,sp69);
string sp70 = "Surigao del Sur"; string ep70 = "Surigao del Sur"; provinsdict.Add(ep70,sp70);
string sp71 = "Södra Cotabato"; string ep71 = "South Cotabato"; provinsdict.Add(ep71,sp71);
string sp72 = "Södra Ilocos"; string ep72 = "Ilocos Sur"; provinsdict.Add(ep72,sp72);
string sp73 = "Södra Leyte"; string ep73 = "Southern Leyte"; provinsdict.Add(ep73,sp73);
string sp74 = "Tarlac"; string ep74 = "Tarlac"; provinsdict.Add(ep74,sp74);
string sp75 = "Tawi-Tawi"; string ep75 = "Tawi-Tawi"; provinsdict.Add(ep75,sp75);
string sp76 = "Zambales"; string ep76 = "Zambales"; provinsdict.Add(ep76,sp76);
string sp77 = "Zamboanga del Norte"; string ep77 = "Zamboanga del Norte"; provinsdict.Add(ep77,sp77);
string sp78 = "Zamboanga del Sur"; string ep78 = "Zamboanga del Sur"; provinsdict.Add(ep78,sp78);
string sp79 = "Zamboanga Sibugay"; string ep79 = "Zamboanga Sibugay"; provinsdict.Add(ep79,sp79);
string sp80 = "Östra Samar"; string ep80 = "Eastern Samar"; provinsdict.Add(ep80,sp80);
string sp81 = "Bikolregionen"; string ep81 = "Bicol Region"; provinsdict.Add(ep81,sp81);
string sp82 = "Cagayandalen"; string ep82 = "Cagayan Valley"; provinsdict.Add(ep82,sp82);
string sp83 = "CALABARZON"; string ep83 = "CALABARZON"; provinsdict.Add(ep83,sp83);
string sp84 = "Caraga"; string ep84 = "Caraga"; provinsdict.Add(ep84,sp84);
string sp85 = "Centrala Luzon"; string ep85 = "Central Luzon"; provinsdict.Add(ep85,sp85);
string sp86 = "Centrala Visayas"; string ep86 = "Central Visayas"; provinsdict.Add(ep86,sp86);
string sp87 = "Davaoregionen"; string ep87 = "Davao Region"; provinsdict.Add(ep87,sp87);
string sp88 = "Ilocosregionen"; string ep88 = "Ilocos Region"; provinsdict.Add(ep88,sp88);
string sp89 = "Kordiljärernas administrativa region"; string ep89 = "Cordillera Administrative Region"; provinsdict.Add(ep89,sp89);
string sp90 = "Metro Manila"; string ep90 = "Metro Manila"; provinsdict.Add(ep90,sp90);
string sp91 = "MIMAROPA"; string ep91 = "MIMAROPA"; provinsdict.Add(ep91,sp91);
string sp92 = "Muslimska Mindanao"; string ep92 = "Autonomous Region in Muslim Mindanao"; provinsdict.Add(ep92,sp92);
string sp93 = "Norra Mindanao"; string ep93 = "Northern Mindanao"; provinsdict.Add(ep93,sp93);
string sp94 = "SOCCSKSARGEN"; string ep94 = "SOCCSKSARGEN"; provinsdict.Add(ep94,sp94);
string sp95 = "Västra Visayas"; string ep95 = "Western Visayas"; provinsdict.Add(ep95,sp95);
string sp96 = "Zamboangahalvön"; string ep96 = "Zamboanga Peninsula"; provinsdict.Add(ep96,sp96);
string sp97 = "Östra Visayas"; string ep97 = "Eastern Visayas"; provinsdict.Add(ep97,sp97);

        //Och här börjar vi gå igenom artiklarna:
        
		foreach(Page p in pl)
		{
			p.Load();
        
            //Remove comments, otherwise the template finding doesn't work properly:

            string str = p.text;
            int startPos, endPos, len = 0;
            while ((startPos = str.LastIndexOf("<!")) != -1)
            {
                endPos = str.IndexOf("->", startPos);
                len = (endPos != -1) ? endPos - startPos + 2 : 2;
                //Console.WriteLine("Removing " + str.Substring(startPos, len));
                str = str.Remove(startPos, len);
                //str = str.Insert(startPos, new String('_', len));
            }
            p.text = str;

            Dictionary<string, string> newparamdict = new Dictionary<string, string>(); 
            Dictionary<string, string> oldparameters;
            
            string templateTitle = "Infobox settlement";
            Regex templateTitleRegex = new Regex("^\\s*(" +
                Bot.Capitalize(Regex.Escape(templateTitle)) + "|" +
                Bot.Uncapitalize(Regex.Escape(templateTitle)) +
                ")\\s*\\|");

            bool foundtemplate = false;
                
            foreach (string template in p.GetTemplatesWithParams())
            {
                //if (template.Length > 40)
                //    Console.WriteLine(template.Substring(0, 40));
                //else
                //    Console.WriteLine(template);
                if (templateTitleRegex.IsMatch(template))
                {
                    //Ok, found the right template. Now get params:
                    oldparameters = ensite.ParseTemplate(template);
                    //Console.WriteLine("op:");
                    foundtemplate = true;
                    foreach (string op in oldparameters.Keys)
                    {
                        if (paramdict.ContainsKey(op))
                        {
                            //Console.WriteLine(op);
                            switch (paramdict[op])
                            {
                                case "region_type":
                                    newparamdict.Add(paramdict[op], "[[Filippinernas regioner|Region]]");
                                    break;
                                case "district_type":
                                    newparamdict.Add(paramdict[op], "[[Filippinernas provinser|Provins]]");
                                    break;
                                case "delstat":
                                case "indelning":
                                    string sbrack = "[]'† ?";
                                    char[] brackets = sbrack.ToCharArray();
                                    string cp = oldparameters[op].Trim(brackets);
                                    if (cp.Contains("]"))
                                        cp = cp.Remove(cp.IndexOf("]"));
                                    if (cp.Contains("|"))
                                        cp = cp.Remove(cp.IndexOf("|"));
                                    if (cp.Contains("("))
                                        cp = cp.Remove(cp.IndexOf("("));
                                    //string cp = CleanLink(oldparameters[op]);
                                    if (provinsdict.ContainsKey(cp))
                                        newparamdict.Add(paramdict[op], "[["+provinsdict[cp]+"]]");
                                    else
                                        newparamdict.Add(paramdict[op], cp);
                                    break;
                                case "form":
                                    if (oldparameters[op].Contains("city"))
                                        newparamdict.Add(paramdict[op], "[[Stad]]");
                                    else if (oldparameters[op].Contains("City"))
                                        newparamdict.Add(paramdict[op], "[[Stad]]");
                                    else
                                        newparamdict.Add(paramdict[op], oldparameters[op]);
                                    break;
                                case "url":
                                    if (oldparameters[op].Contains("{{URL"))
                                    {
                                        string ccp = oldparameters[op].Replace("{{URL|", "[http:////").Replace("}}", "]");
                                        newparamdict.Add(paramdict[op], ccp);
                                    }
                                    else
                                        newparamdict.Add(paramdict[op], oldparameters[op]);
                                    break;
                                case "sigill":
                                case "bild":
                                    Page pcm = new Page(cmsite,"File:"+oldparameters[op]);
                                    //pcm.Load();
                                    //Console.WriteLine("LastUser: "+pcm.LastUser);
                                    //if ( pcm.Exists())
                                    //    newparamdict.Add(paramdict[op], oldparameters[op]);
                                    //else
                                    //    Console.WriteLine("Image not found " + oldparameters[op]);        
                                    string res = cmsite.site + cmsite.indexPath + "index.php?title=" +
				                    HttpUtility.UrlEncode("File:"+oldparameters[op]);
			                        string src = "";
			                        try 
                                    {
				                        src = cmsite.GetPageHTM(res);
			                        }
			                        catch (WebException e) {
				                        string message = e.Message;
                                        if (message.Contains(": (404) "))
                                        {		// Not Found
                                            Console.Error.WriteLine(Bot.Msg("Page \"{0}\" doesn't exist."), oldparameters[op]);
                                            Console.WriteLine("Image not found " + oldparameters[op]);
                                            break;
                                        }
                                        else
                                        {
                                            Console.Error.WriteLine(message);
                                            break;
                                        }
			                        }
			                        newparamdict.Add(paramdict[op], oldparameters[op]);
                                    break;
                                case "folkmängd_år":
                                    if (oldparameters[op].Contains("{{asof"))
                                        newparamdict.Add(paramdict[op], oldparameters[op].Replace("{{asof|", "").Replace("}}", ""));
                                    break;
                                default:
                                    newparamdict.Add(paramdict[op], oldparameters[op]);
                                    break;
                            }
                        }
                    }
                        //Console.WriteLine("np:");
                        //foreach (string np in newparamdict.Keys)
                        //    Console.WriteLine(np + " = " + newparamdict[np]);
                    
                    newparamdict.Add("land","Filippinerna");
                    newparamdict.Add("tidszon","[[UTC+8]]");
                    
                }
            }

            if (!foundtemplate)
                continue;

            //find swedish match:
            string geobox = "{{Stadsfakta|}}\n";
            string[] iw = p.GetInterWikiLinks();
            foreach (string iws in iw)
            {
                bool svexists = false;
                if (iws.Contains("sv:"))
                {
                    string svtit = iws.Replace("sv:","");
                    Page psv = new Page(svsite,svtit);
                    psv.Load();
                    svexists = true;
                    string origtext = psv.text;
                    if (!psv.text.Contains("eobox") && !psv.text.Contains("tadsfakta"))
                    {

                        //Console.WriteLine("Filling params");
                        psv.text = geobox + psv.text;
                        foreach (string np in newparamdict.Keys)
                        {
                            //Console.WriteLine(np);
                            psv.SetTemplateParameter("Stadsfakta", np, newparamdict[np], true);
                        }
                    }
                    else
                        Console.WriteLine("Has box already");
                    //DONE!  Now save if needed.

                    Bot.editComment = "Fixar faktamall";
                    isMinorEdit = false;
                    if (psv.text != origtext)
                        psv.Save();
                    Thread.Sleep(4000);//milliseconds
        
                }

            }
			            
            
              
        }
	}
}