أرابيكا:بوت/نقل الإحداثيات

من أرابيكا، الموسوعة الحرة
اذهب إلى التنقل اذهب إلى البحث
بيانات کود البوت
الاسمنقل الإحداثيات
اللغةبايثون
قدراتاضافة الاحداثیات الی المقالات فی ویکیبدیا العربیة
المستخدمعباس
النتيجةستضاف الاحداثیات فی حال وجودها بالمقالة الانجلیزیة الی الصفحة المعادلة فی العربیة
نموذج تحریرنموذج
التحميل[ من هنا]

الکود

# -*- coding: utf-8 -*-
#solving "{{coord|LAT|" problem
"""
This bot will make direct text replacements. It  will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
 
You can run the bot with the following commandline parameters:
 
-file        - Work on all pages given in a local text file.
               Will read any [[wiki link]] and use these articles.
               Argument can also be given as "-file:filename".
-cat         - Work on all pages which are in a specific category.
               Argument can also be given as "-cat:categoryname".
-page        - Only edit a specific page.
               Argument can also be given as "-page:pagetitle". You can give this
               parameter multiple times to edit multiple pages.
-ref         - Work on all pages that link to a certain page.
               Argument can also be given as "-ref:referredpagetitle".
-filelinks   - Works on all pages that link to a certain image.
               Argument can also be given as "-filelinks:ImageName".
-links       - Work on all pages that are linked to from a certain page.
               Argument can also be given as "-links:linkingpagetitle".
-start       - Work on all pages in the wiki, starting at a given page. Choose
               "-start:!" to start at the beginning.
               NOTE: You are advised to use -xml instead of this option; this is
               meant for cases where there is no recent XML dump.
-except:XYZ  - Ignore pages which contain XYZ. If the -regex argument is given,
               XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
               predefined message texts with original and replacements inserted.
-template:XYZ- 
-namespace:n - Number of namespace to process. The parameter can be used
               multiple times. It works in combination with all other
               parameters, except for the -start parameter. If you e.g. want to
               iterate over all user pages starting at User:M, use
               -start:User:M.
-always      - Don't prompt you for each replacement
other:       - 
 
NOTE: Only use either -xml or -file or -page, but don't mix them.
 
Examples:
 
"""
#
# [[Utente:Wiso]] 2007
#
# Distributed under the terms of the GPL licence
#
 
from __future__ import generators
import sys,re,pprint
import wikipedia,pagegenerators,catlib,config
 
__version__ = '$Id: coordbot.py,v 0.1 $'
 
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.`v
msg = u'بوت نقل الإحداثیات من %s'
 
templates = {
    'safe': [
#Every Wiki:
    ( r'\{\{ ?[Cc]oord(.*?)\}\}',r"{{Coord\1|display=title}}\n" ),
    ( r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"{{coord|\1|\2|\3|\4|\5|display=title}}\n" ),
    ( r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}',r"{{coord|\1|\2|\3|\4|\5|\6|\7|display=title}}\n" ),
    ( r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"{{coord|\1|\2|\3|\4|\5|\6|\7|\8|\9|display=title}}\n" ),
    ( r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2\3|display=title}}\n" ),
    ( r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)',r"{{Coord|\1|\2|display=title}}\n" ),
    ( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*',r"{{Coord|\1|\2|N|\3|\4|E|display=title}}\n" ),
# English Wiki:
    ( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\|lat_sec *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*\|lon_sec *= *([0-9\.]+).*',r"{{Coord|\1|\2|\3|N|\4|\5|\6|E|display=title}}\n" ),
( r'.*\|latd *= *([0-9\.]+).*\|*latm *= *([0-9\.]+).*\|*lats *= *([0-9\.]+).*\|*latNS *= (.*?[NS])\n.*\|longd *= *([0-9\.]+).*\|*longm *= *([0-9\.]+).*\|*longs *= *([0-9\.]+).*\| longEW = (.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
( r'.*\|*source_lat_d *= *([0-9\.]+).*\|*source_lat_m *= *([0-9\.]+).*\|*source_lat_s *= *([0-9\.]+).*\|*source_lat_NS *=*(.*?[NS])\n.*\| source_long_d *= *([0-9\.]+).*\|*source_long_m *= *([0-9\.]+).*\|*source_long_s *= *([0-9\.]+).*\| source_long_EW =*(.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
#Italian Wiki:
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|*',r"{{Coord|\1|\2|N|\3|\4|E|display=title}}\n" ),
    ( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*',r"{{Coord|\1|\2|\3|N|\4|\5|\6|E|display=title}}\n" ),
    ( r'..*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|latitudineNS *=(.*?[NS])\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*\n.*\|longitudineEW *=(.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
    ],
    'notsafe': [
    ( r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2|\3|\4|\5|\6\7|display=title}}\n" ),
    ( r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8\9|display=title}}\n" ),
    ]
    }
 #Add Execption (for pages that don't need any Coordinaion or have a Coord:)
exceptions = [ r'\{\{ *?Geobox',
               r'\{\{ ?[Cc]oord',
               ur'\{\{ ?إحداثيات',
               r'\{\{ ?Template:[Cc]oord',
               r'\{\{ ?[mM]ontagna',
               r'\{\{ ?(Template:)?[cC]omune',
               r'\{\{ ?[cC]ittأ ',
               r'\{\{ ?[mM]unicipalitأ ',
              r'\{\{ ?[aA]eroporto\|',
               r'\{\{ ?[Mm]unicipi',
               r'\{\{ ?[iI]nfobox[ _]Azienda\|',
               r'\{\{ ?[Ss]\|aziende',
               r'\{\{ ?[Dd]isambigua\|',
               r'\{\{ ?[Ff]razione',
               r'\{\{ ?[Ss]quadra',
               r'\{\{ ?[Pp]asso ?(\||\n)',
               r'\{\{ ?[Bb]undesland[ _]tedesco'
               ]    
 
 
class CoordRobot:
    """
    A bot that import coordinates from other wikipedia.
    """
    def __init__( self,generator,autoTitle = False,autoText = False ):
        self.generator = generator
        self.compileregex()
 
    def compileregex( self ):
        for key in templates.keys():        
            for i in range( len( templates[key] ) ):
                old,new = templates[key][i]
                oldR = re.compile( old,re.UNICODE )
                templates[key][i] = oldR,new
        for i in range( len( exceptions ) ):
            exceptions[i] = re.compile( exceptions[i] )
 
    def checkExceptions( self,text ):
        for exception in exceptions:
            hit = exception.search( text )
            if hit:
               return hit.group( 0 )
        return None
 
    def change( self,page,new_text ):
        try:
            page.put( new_text )
        except wikipedia.EditConflict:
            wikipedia.output( u'Skipping %s because of edit conflict' % ( page.title() ) )
        except wikipedia.SpamfilterError,url:
            wikipedia.output( u'Cannot change %s because of blacklist entry %s' % ( page.title(),url ) )
        except:
            a = 1
 
 # Spceify the Wiki You want to get the Coords from (Now Italian):
    def run( self ):
 
        trovato_en = False
        sen = wikipedia.Site( 'en' )
        interwiki_list = []
        for page in self.generator: 
          try:
            try:
                if not page.canBeEdited():
                    wikipedia.output( u'Skipping locked page %s' % page.title() )
                    continue
                text_it = page.get()
                match = self.checkExceptions( text_it )
            # skip all pages that contain certain texts
                if match:
                    colors = [None] * 9 + [None] * len( page.title() ) + [None] * 21 + [10] * len( match )
                    wikipedia.output( u'Skipping %s because it contains %s' % ( page.title(),match ) )               
                    continue
                interwiki_list = page.interwiki()
            except wikipedia.NoPage:
                wikipedia.output( u'Page %s not found' % page.title() )
                continue
            except wikipedia.IsRedirectPage:
                wikipedia.output( u'Page %s is a redirect, skip' % page.title() )
                continue
            trovato_en = False
            for page_en in interwiki_list:
                if page_en.site() == sen:
                    trovato_en = True
                    break
            if not trovato_en:
                continue
            wikipedia.output( page.title() )
            wikipedia.output( u'en: %s' % page_en.title() )
 
            try:
                text_en = page_en.get()
            except wikipedia.NoPage:
                wikipedia.output( u'Page %s not found' % page_en.title() )
                continue
            except wikipedia.IsRedirectPage:
                wikipedia.output( u'Page %s is a redirect, follow redirect' % page_en.title() )
                text_en = page_en.get( get_redirect = True )
            coordfind = False
            for old,new in templates['safe']:
                text_en = text_en.replace( '{{Coord missing' ,'' )
                text_en = re.sub( "\|\s*display\s*\=\s*(inline,)?title(,inline)?","",text_en )
                text_en = text_en.replace( '|display=inline' ,'' )
                match = old.search( text_en )
                if not match:
                    if coordfind == False:
                        new_text_it = text_it
 
                    continue
#                colors = [None] * 5 + [13] * len(page.title()) + [None] * 4
#                wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors)
#                pprint.pprint( str( match ) )
                wikipedia.output( u'Coord %s: ' % text_en[match.start():match.end()] )
                template_new = old.sub( new,text_en[match.start():match.end()] )
                template_new = template_new.replace( u'||','|' )
                wikipedia.output( template_new )
 
                if template_new.find( '{{Coord missing' ) != -1:
                    new_text_it = text_it
                else:
                    new_text_it = template_new + text_it
                    coordfind = True
 
#                choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
#                if choice in ['y', 'Y']:
            if new_text_it.find( '{{coord|LAT|' ) != -1:
                continue
            if new_text_it.find( '{{Coord|LAT|' ) != -1:
                continue
            if new_text_it.find( '{{coord|lat|' ) != -1:
                continue
            if new_text_it.find( '{{Coord missing' ) == -1:
                    wikipedia.setAction( msg % page_en.aslink() )
                    self.change( page,new_text_it )
                    coordfind = False
          except:
               continue
 
def main():
  try:
    gen = None
    # summary message
    summary_commandline = None
    # Don't edit pages which contain certain texts.
    exceptions = []
    # commandline paramater.
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    template = None
    PageTitles = []
    autoText = False
    autoTitle = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Load default summary message.
    # BUG WARNING: This is probably incompatible with the -lang parameter.
    wikipedia.setAction( msg )
 
    # Read commandline parameters.
    for arg in wikipedia.handleArgs():
        if arg == '-autotitle':
            autoTitle = True
        elif arg == '-autotext':
            autoText = True
        elif arg.startswith( '-page' ):
            if len( arg ) == 5:
                PageTitles.append( wikipedia.input( u'Which page do you want to chage?' ) )
            else:
                PageTitles.append( arg[6:] )
        elif arg.startswith( '-except:' ):
            exceptions.append( arg[8:] )
        elif arg.startswith( '-template:' ):
            template = arg[10:]
        elif arg.startswith( '-namespace:' ):
            namespaces.append( int( arg[11:] ) )
        elif arg.startswith( '-summary:' ):
            wikipedia.setAction( arg[9:] )
            summary_commandline = True
        else:
            generator = genFactory.handleArg( arg )
            if generator:
                gen = generator
    print namespaces
    if PageTitles:
        pages = [wikipedia.Page( wikipedia.getSite(),PageTitle ) for PageTitle in PageTitles]
        gen = iter( pages )
    if not gen:
        # syntax error, show help text from the top of this file
        wikipedia.showHelp( 'coordbot' )
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
#    gen = pagegenerators.RedirectFilterPageGenerator(gen)
    preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 120 )
    bot = CoordRobot( preloadingGen,autoTitle,autoText )
    bot.run()
  except:
      a = 1
 
 
if __name__ == "__main__":
    try:
 
        main()
    except:
        a = 1