Ticket #3600: dsage_process_pool.patch

File dsage_process_pool.patch, 168.1 KB (added by yi, 13 years ago)
  • sage/dsage/database/client.py

    # HG changeset patch
    # User Yi Qiang <yqiang@gmail.com>
    # Date 1215477042 25200
    # Node ID 00c15d56813e4783a5cbedad933a13769dadd6c7
    # Parent  9fe444b1d4ac3c714604409462eae7421ee44013
    [mq]: dsage_process_pool.patch
    
    diff --git a/sage/dsage/database/client.py b/sage/dsage/database/client.py
    a b  
    11class Client(object):
    22    """
    33    A client of the dsage server.
    4    
     4
    55    """
    6    
     6
    77    def __init__(self, username, public_key):
    88        """
    99        :type username: string
    1010        :param username: username
    11        
     11
    1212        :type public_key: string
    1313        :param public_key: public key of user
    1414
    1515        """
    16        
     16
    1717        self.username = username
    1818        self.public_key = public_key
    1919        self.creation_time = None
     
    2121        self.last_login = None
    2222        self.connected = None
    2323        self.enabled = None
    24        
     24
    2525    def get_username(self):
    2626        return self.username
    27    
     27
    2828    def get_public_key(self):
    2929        return self.public_key
    30    
     30
    3131    def is_connected(self):
    3232        return self.connected
    33    
     33
    3434    def is_enabled(self):
    35         return self.enabled
    36  No newline at end of file
     35        return self.enabled
  • sage/dsage/database/job.py

    diff --git a/sage/dsage/database/job.py b/sage/dsage/database/job.py
    a b  
    11############################################################################
    2 #                                                                     
    3 #   DSAGE: Distributed SAGE                     
    4 #                                                                             
    5 #       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>               
    6 #                                                                           
    7 #  Distributed under the terms of the GNU General Public License (GPL)       
     2#
     3#   DSAGE: Distributed SAGE
     4#
     5#       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>
     6#
     7#  Distributed under the terms of the GNU General Public License (GPL)
    88#
    99#    This code is distributed in the hope that it will be useful,
    1010#    but WITHOUT ANY WARRANTY; without even the implied warranty of
     
    2929class Job(object):
    3030    """
    3131    Defines a Job that gets distributed to clients.
    32    
     32
    3333    """
    3434
    35     def __init__(self, job_id=None, name='Unamed', username=getuser(),
     35    def __init__(self, job_id=None, name=None, username=getuser(),
    3636                 code='', timeout=0, kind='sage', priority=5):
    3737        """
    3838        Represents a job.
    39        
     39
    4040        :type job_id: string
    4141        :param job_id: unique identifier for a job
    42        
     42
    4343        :type name: string
    4444        :param name: name given to a job, not unique
    45        
     45
    4646        :type code: string
    4747        :param code: the code that needs to be executed
    48        
     48
    4949        :type parent: string
    50         :param parent: the job_id of another job 
    51        
     50        :param parent: the job_id of another job
     51
    5252        :type username: string
    5353        :param username: username of person who created job
    54        
     54
    5555        :type timeout: integer
    5656        :param timeout: upper bound for number of seconds this job takes
    57        
     57
    5858        :type priority: integer
    5959        :param priority: a jobs priority from 1-5, 1 being the highest
    60        
     60
    6161        :type kind: string
    62         :param kind: kind of the job (file, string, generator) 
     62        :param kind: kind of the job (file, string, generator)
    6363
    6464        """
    65        
     65
    6666        self.job_id = job_id
    67         self.name = name
     67        if name is None:
     68            self.name = 'Unamed'
     69        else:
     70            self.name = name
    6871        self.username = username
    6972        self.code = code
    7073        self.timeout = timeout
     
    8588
    8689    def __str__(self):
    8790        return "<Job('%s', %s)>" % (self.job_id, self.username)
    88    
     91
    8992    def __repr__(self):
    9093        return self.__str__()
    91    
     94
    9295    def attach(self, var, obj, file_name=None):
    9396        """
    9497        Attaches an object to a job.
    95        
     98
    9699        Parameters:
    97100        var -- the variable name you'd like the worker to use
    98101        obj -- the object you want to attach
    99102        filename -- optional, if your object is a saved sobj
    100        
     103
    101104        """
    102        
     105
    103106        if file_name is not None:
    104107            try:
    105108                s = open(file_name, 'rb').read()
     
    114117            except cPickle.PicklingError:
    115118                print 'Unable to attach your object.'
    116119        self.data.append((var, s, 'object'))
    117        
     120
    118121    def attach_file(self, file_name):
    119122        """
    120123        Attach a file to a job.
    121        
     124
    122125        Parameters:
    123126        file_name -- obvious
    124        
     127
    125128        """
    126        
     129
    127130        f = open(file_name, 'rb').read()
    128131        f = zlib.compress(f)
    129        
     132
    130133        # Strip out any hard coded path in the file name
    131134        file_name = os.path.split(file_name)[1]
    132135        self.data.append((file_name, f, 'file'))
    133    
     136
    134137    def _reduce(self):
    135138        """
    136139        Returns a _reduced form of Job.jdict to be sent over the network.
    137        
     140
    138141        """
    139        
     142
    140143        # dump and compress the data of the job
    141144        jdict = copy.deepcopy(self.__dict__)
    142145        jdict['data'] = cPickle.dumps(self.data, 2)
    143146        jdict['data'] = zlib.compress(jdict['data'])
    144147        # We do not compress jdict['result'] since it's already compressed
    145         jdict['result'] = self.result 
    146        
     148        jdict['result'] = self.result
     149
    147150        # Remove attributes that sqlalchemy put there for us
    148151        for key in jdict.keys():
    149152            if key.startswith('_'):
    150153                del jdict[key]
    151                
     154
    152155        return jdict
     156
    153157
    154158def expand_job(jdict):
    155159    """
    156160    This method recreates a Job object given a jdict.
    157    
     161
    158162    :type jdict: dictionary
    159163    :param jdict: the job dictionary
    160    
    161164    """
    162    
     165
    163166    if jdict is None:
    164167        return None
    165    
     168
    166169    job_id = jdict['job_id']
    167170    job = Job(job_id=job_id)
    168    
     171
    169172    # decompress and load data
    170173    try:
    171174        jdict['data'] = zlib.decompress(jdict['data'])
    172175        jdict['data'] = cPickle.loads(jdict['data'])
    173     except (KeyError, TypeError):
     176    except Exception, msg:
     177        print Exception, msg
    174178        jdict['data'] = None
    175179
    176180    try:
    177181        jdict['result'] = zlib.decompress(jdict['result'])
    178182        jdict['result'] = cPickle.loads(jdict['result'])
    179     except (KeyError, TypeError):
     183    # except (KeyError, TypeError):
     184    except:
    180185        jdict['result'] = None
    181        
     186
    182187    for k, v in jdict.iteritems():
    183188        setattr(job, k, v)
    184189
    185     return job
    186  No newline at end of file
     190    return job
  • sage/dsage/database/jobdb.py

    diff --git a/sage/dsage/database/jobdb.py b/sage/dsage/database/jobdb.py
    a b  
    9393        """
    9494       
    9595
     96class JobDatabaseSA(object):
     97    """
     98    I implement the JobDatabase using SQLAlchemy.
     99    """
    96100
    97 class JobDatabaseSA(object):
    98101    implements(IJobDatabase)
    99102   
    100103    def __init__(self, Session):
    101104        self.sess = Session()
    102         self.failure_threshold = 5
     105        self.failure_threshold = 3
    103106       
    104107    def _shutdown(self):
    105108        self.sess.close()
     
    137140    def update_job(self, job):
    138141        """
    139142        Takes a job object and updates it in the database.
    140        
    141143        """
    142144       
    143145        self.sess.save_or_update(job)
    144146        self.sess.commit()
    145147       
    146148    def store_jdict(self, jdict):
     149        """
     150        I store the jdict to the database.
     151        """
     152
    147153        try:
    148154            job_id = jdict['job_id']
    149             assert job_id != None
    150155            job = self.sess.query(Job).filter_by(job_id=job_id).first()
    151156            for k,v in jdict.iteritems():
    152157                setattr(job, k, v)
     
    328333       
    329334        Parameters:
    330335        jdict -- sage.dsage.database.Job.jdict
    331        
    332336        """
    333        
     337
    334338        try:
    335339            job_id = jdict['job_id']
    336340        except KeyError, msg:
     
    475479       
    476480        """
    477481       
    478         return self._get_jobs_by_parameter('status', 'processing')
    479  No newline at end of file
     482        return self._get_jobs_by_parameter('status', 'processing')
  • sage/dsage/database/worker.py

    diff --git a/sage/dsage/database/worker.py b/sage/dsage/database/worker.py
    a b  
     1"""
     2Worker
     3"""
     4
     5
    16class Worker(object):
     7
    28    def __init__(self, host_info):
    39        for k, v in host_info.iteritems():
    410            setattr(self, k, v)
    5        
    6  No newline at end of file
  • sage/dsage/database/workerdb.py

    diff --git a/sage/dsage/database/workerdb.py b/sage/dsage/database/workerdb.py
    a b  
    2222from sage.dsage.misc.constants import SERVER_LOG
    2323from sage.dsage.database.worker import Worker
    2424
     25
    2526class WorkerDatabaseSA(object):
     27    """
     28    I implement the WorkerDatabase using SQLAlchemy.
     29    """
     30
    2631    def __init__(self, Session):
    2732        self.sess = Session()
    2833        self._set_initial_state()
    29        
     34
    3035    def _set_initial_state(self):
    3136        w = self.sess.query(Worker).all()
    3237        for _w in w:
    3338            _w.connected = False
    3439            self.sess.save_or_update(_w)
    3540        self.sess.commit()
    36        
     41
    3742    def set_authenticated(self, uuid, authenticated):
    3843        w = self.sess.query(Worker).filter_by(uuid=uuid).first()
    3944        w.authenticated = authenticated
    4045        self.sess.save_or_update(w)
    4146        self.sess.commit()
    42    
     47
    4348    def set_busy(self, uuid, busy):
    4449        w = self.sess.query(Worker).filter_by(uuid=uuid).first()
    4550        w.busy = busy
    4651        self.sess.save_or_update(w)
    4752        self.sess.commit()
    48    
     53
    4954    def add_worker(self, host_info):
    5055        w = Worker(host_info)
    5156        self.sess.save(w)
    5257        self.sess.commit()
    53    
     58
    5459    def update_worker(self, host_info):
    5560        uuid = host_info['uuid']
    5661        w = self.sess.query(Worker).filter_by(uuid=uuid).first()
     
    5863            setattr(w, k, v)
    5964        self.sess.save_or_update(w)
    6065        self.sess.commit()
    61    
     66
    6267    def get_worker(self, uuid):
    6368        w = self.sess.query(Worker).filter_by(uuid=uuid).first()
    64        
     69
    6570        return w
    66    
    67     def get_worker_list(self):
    68         w = self.sess.query(Worker).all()
    69        
     71
     72    def get_worker_list(self, filter={}):
     73        w = self.sess.query(Worker).filter_by(**filter).all()
     74
    7075        return w
    71    
     76
    7277    def get_worker_by_job_id(self, job_id):
    7378        w = self.sess.query(Worker).filter_by(job_id=job_id)
    7479
    7580    def get_online_workers(self):
    7681        w = self.sess.query(Worker).filter_by(connected=True).all()
     82
     83        return w
     84   
     85    def get_avail_workers(self):
     86        w = self.sess.query(Worker).filter_by(busy=False, connected=True).all()
    7787       
    7888        return w
    79    
     89       
    8090    def get_worker_count(self, connected, busy):
    81         q = self.sess.query(Worker).filter_by(connected=connected,busy=busy)
     91        q = self.sess.query(Worker).filter_by(connected=connected, busy=busy)
    8292        workers = q.all()
    83        
     93
    8494        count = sum([w.workers for w in workers])
    85        
     95
    8696        return count
    87    
     97
    8898    def get_cpu_speed(self, connected, busy):
    8999        w = self.sess.query(Worker).all()
    90        
     100
    91101        return sum([_w.cpu_speed * _w.cpus for _w in w])
    92        
     102
    93103    def set_connected(self, uuid, connected):
    94104        w = self.sess.query(Worker).filter_by(uuid=uuid).first()
    95105        w.connected = connected
    96106        self.sess.save_or_update(w)
    97107        self.sess.commit()
    98    
     108
    99109
    100110class WorkerDatabase(object):
    101111    """
    102     This table keeps track of workers.
    103    
     112    I implement the WorkerDatabase using raw sqlite.
    104113    """
    105    
     114
    106115    def __init__(self, db_conn, log_file=SERVER_LOG, log_level=0):
    107116        self.log_file = log_file
    108117        self.log_level = log_level
    109118        self.con = db_conn
    110119        self.tablename = 'monitors'
    111    
     120
    112121    def _set_parameter(self, uuid, key, value):
    113122        query = """UPDATE monitors
    114123        SET %s=?
     
    116125        cur = self.con.cursor()
    117126        cur.execute(query, (value, uuid))
    118127        self.con.commit()
    119    
     128
    120129    def set_authenticated(self, uuid, authenticated):
    121130        return self._set_parameter(uuid, 'authenticated', authenticated)
    122    
     131
    123132    def add_worker(self, host_info):
    124133        query = """INSERT INTO monitors
    125134        (uuid,
     
    137146         mem_free)
    138147        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    139148        """
    140        
     149
    141150        uuid = host_info['uuid']
    142151        username = host_info['username']
    143152        hostname = host_info['hostname']
     
    151160        cpu_model = host_info['cpu_model']
    152161        mem_total = host_info['mem_total']
    153162        mem_free = host_info['mem_free']
    154        
     163
    155164        cur = self.con.cursor()
    156165        cur.execute(query, (uuid, username, hostname, ip, workers,
    157166                            sage_version, os_, kernel_version, cpus,
    158167                            cpu_speed, cpu_model, mem_total, mem_free))
    159168        self.con.commit()
    160    
     169
    161170    def update_worker(self, host_info):
    162171        query = """UPDATE monitors
    163172        SET hostname = ?, username = ?, ip = ?, workers = ?, sage_version = ?,
    164173        os = ?, kernel_version = ?, cpus = ?, cpu_speed = ?, cpu_model = ?,
    165174        mem_total = ?, mem_free = ? WHERE uuid = ?
    166175        """
    167        
     176
    168177        uuid = host_info['uuid']
    169178        username = host_info['username']
    170179        hostname = host_info['hostname']
     
    178187        cpu_model = host_info['cpu_model']
    179188        mem_total = host_info['mem_total']
    180189        mem_free = host_info['mem_free']
    181        
     190
    182191        cur = self.con.cursor()
    183192        cur.execute(query, (hostname, username, ip, workers, sage_version,
    184193                            os_, kernel_version, cpus, cpu_speed, cpu_model,
    185194                            mem_total, mem_free, uuid))
    186    
     195
    187196    def get_worker(self, uuid):
    188197        query = """SELECT
    189198        uuid,
     
    195204        os
    196205        FROM monitors
    197206        WHERE uuid = ?"""
    198        
     207
    199208        cur = self.con.cursor()
    200         cur.execute(query, (uuid,))
     209        cur.execute(query, (uuid, ))
    201210        result = cur.fetchone()
    202211        if result is None:
    203212            return result
     
    206215        for k, v in monitor.iteritems():
    207216            if k == 'authenticated':
    208217                monitor[k] = bool(v)
    209        
     218
    210219        return monitor
    211    
     220
    212221    def get_worker_list(self):
    213222        """
    214223        Returns a list of connected monitors.
    215        
     224
    216225        """
    217        
     226
    218227        query = """SELECT * FROM monitors"""
    219228        cur = self.con.cursor()
    220229        cur.execute(query)
     
    226235                 # Convert from 1/0 to python bool
    227236                if k in ('authenticated', 'connected', 'busy'):
    228237                    monitor[k] = bool(v)
    229        
     238
    230239        return monitors
    231    
     240
    232241    def set_connected(self, uuid, connected=True):
    233242        """
    234243        Sets the connected status of a monitor.
    235        
     244
    236245        Parameters:
    237246        uuid -- string
    238247        connected -- bool
    239        
     248
    240249        """
    241        
     250
    242251        cur = self.con.cursor()
    243252        if connected:
    244253            query = """UPDATE monitors SET connected=1, last_connection=?
     
    246255            cur.execute(query, (datetime.datetime.now(), uuid))
    247256        else:
    248257            query = """UPDATE monitors SET connected=0 WHERE uuid=?"""
    249             cur.execute(query, (uuid,))
    250        
     258            cur.execute(query, (uuid, ))
     259
    251260        self.con.commit()
    252    
     261
    253262    def is_connected(self, uuid):
    254263        """
    255264        Returns whether the monitor is connected.
    256        
     265
    257266        """
    258        
     267
    259268        query = """SELECT connected FROM monitors WHERE uuid = ?"""
    260269        cur = self.con.cursor()
    261         cur.execute(query, (uuid,))
     270        cur.execute(query, (uuid, ))
    262271        result = cur.fetchone()[0]
    263        
     272
    264273        return result
    265    
     274
    266275    def set_busy(self, uuid, busy):
    267276        """
    268277        Sets whether or not a worker is doing a job.
    269        
     278
    270279        """
    271        
     280
    272281        if busy:
    273282            query = """UPDATE monitors SET busy=1 WHERE uuid=?"""
    274283        else:
    275284            query = """UPDATE monitors SET busy=0 WHERE uuid=?"""
    276        
     285
    277286        cur = self.con.cursor()
    278         cur.execute(query, (uuid,))
     287        cur.execute(query, (uuid, ))
    279288        self.con.commit()
    280    
     289
    281290    def get_worker_count(self, connected, busy=False):
    282291        """
    283292        Returns the number of workers.
    284        
     293
    285294        Parameters:
    286295        connected -- bool
    287296        busy -- bool
    288        
     297
    289298        """
    290        
     299
    291300        if connected and not busy:
    292301            query = """
    293302            SELECT workers FROM monitors WHERE connected AND NOT busy
     
    304313            query = """
    305314            SELECT workers FROM monitors WHERE NOT connected AND busy
    306315            """
    307        
     316
    308317        cur = self.con.cursor()
    309318        cur.execute(query)
    310        
     319
    311320        result = cur.fetchall()
    312        
     321
    313322        return sum(w[0] for w in result)
    314    
     323
    315324    def get_cpu_speed(self, connected=True, busy=False):
    316325        """
    317326        Returns the aggregate cpu speed in Mhz.
    318        
     327
    319328        Parameters:
    320329        connected -- bool
    321        
     330
    322331        """
    323        
     332
    324333        if connected and busy:
    325334            query = """SELECT cpu_speed, workers FROM monitors
    326335            WHERE connected AND busy"""
     
    329338            WHERE connected"""
    330339        else:
    331340            query = """SELECT cpu_speed, workers FROM monitors"""
    332        
     341
    333342        cur = self.con.cursor()
    334343        cur.execute(query)
    335        
     344
    336345        result = cur.fetchall()
    337        
     346
    338347        cpu_speed = sum([s[0]*s[1] for s in result])
    339        
     348
    340349        return cpu_speed
    341    
     350
    342351    def get_cpu_count(self, connected=True):
    343352        """
    344353        Returns the number of cpus that are available.
    345        
     354
    346355        Parameters:
    347356        connected -- bool
    348        
     357
    349358        """
    350        
     359
    351360        if connected:
    352361            query = """SELECT workers, cpus FROM monitors WHERE connected"""
    353362        else:
    354363            query = """SELECT workers, cpus FROM monitors"""
    355        
     364
    356365        cur = self.con.cursor()
    357366        cur.execute(query)
    358        
     367
    359368        result = cur.fetchall()
    360        
     369
    361370        cpu_count = sum(min(s[0:2]) for s in result)
    362        
    363         return cpu_count
    364  No newline at end of file
     371
     372        return cpu_count
  • sage/dsage/dist_functions/dist_factor.py

    diff --git a/sage/dsage/dist_functions/dist_factor.py b/sage/dsage/dist_functions/dist_factor.py
    a b  
    1616           Robert Bradshaw
    1717           Yi Qiang
    1818   
    19    
    20     sage: d = dsage.start_all(verbose=False, workers=4) # long time
     19    sage: from sage.dsage.misc.misc import test_dsage
     20    sage: d = test_dsage(dsage)
    2121    Going into testing mode...
    22     sage: sleep(5) # long time
    23     sage: f = DistributedFactor(d, 2^125-1) # long time
    24     sage: print f # long time
     22    sage: f = DistributedFactor(d, 2^125-1)
     23    sage: print f 
    2524    Factoring "42535295865117307932921825928971026431"
    2625    Prime factors found so far: [31, 601, 1801]
    27     sage: f.done # long time
     26    sage: f.done
    2827    False
    29     sage: f.wait(timeout=60) # long time
    30     sage: f.done # long time
     28    sage: f.wait(timeout=60)
     29    sage: f.done
    3130    True
    32     sage: print f # long time
     31    sage: print f
    3332    Factoring "42535295865117307932921825928971026431"
    3433    Prime factors found so far: [31, 601, 1801, 269089806001, 4710883168879506001]
    35    
    3634    """
    3735   
    3836    def __init__(self, dsage, n, concurrent=10, B1=2000, curves=50,
  • sage/dsage/dist_functions/dist_function.py

    diff --git a/sage/dsage/dist_functions/dist_function.py b/sage/dsage/dist_functions/dist_function.py
    a b  
    2323
    2424from sage.dsage.database.job import Job
    2525from sage.dsage.interface.dsage_interface import (JobWrapper,
    26                                                   BlockingJobWrapper,
    27                                                   blockingCallFromThread)
     26                                                  BlockingJobWrapper)
     27                                                 
     28from twisted.internet import reactor
     29from twisted.internet.threads import blockingCallFromThread
    2830
    2931class DistributedFunction(object):
    3032    """
     
    112114        Reloads a distributed job from disk.
    113115       
    114116        """
    115         from twisted.internet import reactor
     117       
    116118        from twisted.internet import task
    117119        if dsage.remoteobj is None:
    118120            # XXX This is a hack because dsage.remoteobj is not set yet
     
    164166               self.submit_job(job, job_name, async)
    165167        self.outstanding_jobs = []
    166168
    167     def wait(self, timeout=None):
     169    def wait(self, t=0.5, timeout=None):
    168170        """
    169171        Blocks until the job is completed.
    170172       
     173        t -- the time to wait before polling again.
    171174        """
    172175       
    173176        import signal
    174177        if timeout == None:
    175178            while not self.done:
    176                     time.sleep(0.5)
     179                    time.sleep(t)
    177180        else:
    178181            def handler(signum, frame):
    179182                raise RuntimeError('Maximum wait time exceeded.')
    180183            signal.signal(signal.SIGALRM, handler)
    181184            signal.alarm(timeout)
    182185            while not self.done:
    183                 time.sleep(0.5)
     186                time.sleep(t)
    184187            signal.alarm(0)
    185188       
    186     def start(self):
     189    def start(self, ctime=1.0):
    187190        """
    188191        Starts the Distributed Function. It will submit all jobs in the
    189192        outstanding_jobs queue and also start a checker tasks that polls for
     
    201204        self.checker_task = blockingCallFromThread(self.reactor,
    202205                                                   task.LoopingCall,
    203206                                                   self.check_waiting_jobs)
    204         self.reactor.callFromThread(self.checker_task.start, 5.0, now=True)
    205    
     207        self.reactor.callFromThread(self.checker_task.start, ctime, now=True)
    206208   
    207209    def process_result(self):
    208210        """
     
    216218   
    217219    def check_waiting_jobs(self):
    218220        """
    219         Checks the status of jobs in the waiting queue.
    220        
     221        I check the status of jobs in the waiting queue.
    221222        """
    222223       
    223224        from twisted.internet import reactor
    224225        from twisted.spread import pb
     226
    225227        for wrapped_job in self.waiting_jobs:
    226228            if wrapped_job.killed == True:
    227229                self.waiting_jobs.remove(wrapped_job)
  • sage/dsage/dsage.py

    diff --git a/sage/dsage/dsage.py b/sage/dsage/dsage.py
    a b  
    3535                                       SERVER_TAC, DSAGE_DB)
    3636from sage.dsage.misc.config import check_dsage_dir
    3737from sage.dsage.misc.misc import find_open_port
     38from sage.dsage.misc.misc import write_tac
    3839import sage.plot.plot
     40
    3941
    4042def spawn(cmd, verbose=True, stdout=None, stdin=None):
    4143    """
    4244    Spawns a process and registers it with the SAGE.
    4345    """
    44    
     46
    4547    null = open('/dev/null', 'a')
    4648    if stdout is None:
    4749        stdout = null
    4850    if stdin is None:
    4951        stdin = null
    5052    cmdl = cmd.split(' ')
    51     process = subprocess.Popen(cmdl, shell=False, stdout=stdout, stdin=null)
     53    process = subprocess.Popen(cmdl, shell=False, stdout=stdout, stdin=stdin)
    5254    sage.interfaces.cleaner.cleaner(process.pid, cmd)
    5355    if verbose:
    5456        print 'Spawned %s (pid = %s)\n' % (' '.join(cmdl), process.pid)
    55    
     57
    5658    return process
    5759
    5860
    5961class DistributedSage(object):
    6062    r"""
    6163    Distributed SAGE allows you to do distributed computing in SAGE.
    62    
     64
    6365    To get up and running quickly, run dsage.setup() to run the
    6466    configuration utility.
    65    
     67
    6668    Note that configuration files will be stored in the
    6769    directory \code{\$DOT\_SAGE/dsage}.
    68    
     70
    6971    QUICK-START
    7072
    7173    1.  Launch sage
     
    9294        \code{sage: j}
    9395        \code{4}
    9496    """
    95        
    96     def start_all(self, port=None, workers=2, log_level=0, poll=1.0,
    97                   authenticate=False, failure_threshold=3,
    98                   verbose=True, testing=False):
     97
     98    def start_all(self, port=None, workers=2, log_level=0, authenticate=False,
     99                  failure_threshold=3, verbose=True, testing=False):
    99100        """
    100101        Start the server and worker and returns a connection to the server.
    101        
     102
    102103        """
    103        
     104
    104105        from sage.dsage.interface.dsage_interface import BlockingDSage
    105106        from sage.dsage.misc.misc import find_open_port
    106        
     107
    107108        if port is None:
    108109            port = find_open_port().next()
    109        
     110
    110111        if testing or sage.plot.plot.DOCTEST_MODE:
    111112            test_db = tempfile.NamedTemporaryFile()
    112113            testing = True
     
    122123                        log_level=5,
    123124                        ssl=False,
    124125                        blocking=False,
    125                         poll=0.1,
    126126                        authenticate=authenticate,
    127127                        verbose=False)
    128128        else:
     
    131131                        blocking=False,
    132132                        failure_threshold=failure_threshold,
    133133                        verbose=verbose)
    134        
     134
    135135            self.worker(port=port,
    136136                        workers=workers,
    137137                        log_level=log_level,
    138138                        blocking=False,
    139                         poll=poll,
    140139                        authenticate=authenticate,
    141140                        verbose=verbose)
    142        
     141
    143142        # We want to establish a connection to the server
    144143        tries = 10
    145144        while(tries > 0):
     
    158157            print 'Could not connect to the server.'
    159158            print 'Error msg from last attempt: %s' % (msg)
    160159            return
    161        
     160
    162161        if testing or sage.plot.plot.DOCTEST_MODE:
    163162            d = BlockingDSage(server='localhost', port=port, testing=testing,
    164163                              ssl=False)
    165164        else:
    166165            d = BlockingDSage(server='localhost', port=port)
    167                
     166
    168167        return d
    169    
     168
    170169    def kill_all(self):
    171170        """
    172171        Kills the server and worker.
    173        
     172
    174173        """
    175        
     174
    176175        self.kill_worker()
    177176        self.kill_server()
    178    
     177
    179178    def kill_worker(self):
    180179        try:
    181180            os.kill(self.worker_proc.pid, 9)
     
    183182            del self.worker_proc
    184183        except OSError, msg:
    185184            print 'Error killing worker: %s' % msg
    186    
     185
    187186    def kill_server(self):
    188187        try:
    189188            os.kill(self.server_proc.pid, 9)
     
    191190            del self.server_proc
    192191        except OSError, msg:
    193192            print 'Error killing server: %s' % msg
    194    
     193
     194
    195195    def server(self, blocking=True, port=None, log_level=0, ssl=True,
    196196               db_file=DSAGE_DB,
    197197               log_file=SERVER_LOG,
     
    201201               verbose=True, testing=False, profile=False):
    202202        r"""
    203203        Run the Distributed SAGE server.
    204        
     204
    205205        Doing \code{dsage.server()} will spawn a server process which
    206206        listens by default on port 8081.
    207207        """
     208
    208209        open_ports = find_open_port()
    209210        check_dsage_dir()
    210211        cwd = os.getcwd()
    211212        pid_file = 'server.pid'
    212        
    213         def write_tac(tac):
    214             os.chdir(DSAGE_DIR)
    215             f = open('dsage_server.tac', 'w')
    216             f.writelines(tac)
    217             f.close()
    218213
    219214        if testing or sage.plot.plot.DOCTEST_MODE:
    220215            test_db = tempfile.NamedTemporaryFile()
     
    227222            except:
    228223                pass
    229224            db_file = test_db.name
    230            
     225
    231226        if port != None:
    232227            server_port = port
    233228        else:
    234229            server_port = open_ports.next()
    235230            open_ports.next()
    236            
     231
    237232        tac = SERVER_TAC % (db_file, failure_threshold, ssl, log_level,
    238233                            log_file, privkey, cert, server_port, testing)
    239         write_tac(tac)
    240        
     234        write_tac(tac, 'dsage_server.tac')
     235
    241236        cmd = 'twistd -d %s --pidfile=%s ' % (DSAGE_DIR, pid_file)
    242237        if profile:
    243238            if verbose:
    244239                print 'Launched with profiling enabled...'
    245             cmd += '--nothotshot --profile=dsage_server.profile --savestats '
     240            cmd += '--profile=dsage_server.profile --savestats '
    246241        if blocking:
    247242            cmd += '--nodaemon -y dsage_server.tac'
    248243            cmd += ' | tee -a %s' % (log_file)
     
    265260                    time.sleep(0.1)
    266261                    continue
    267262        os.chdir(cwd)
    268        
    269    
    270     def worker(self, server='localhost', port=8081, workers=2, poll=1.0,
     263
     264    def worker(self, server='localhost', port=8081, workers=2,
    271265               username=getuser(), blocking=True, ssl=True, log_level=0,
    272266               authenticate=True, priority=20,
    273267               privkey=os.path.join(DSAGE_DIR, 'dsage_key'),
    274268               pubkey=os.path.join(DSAGE_DIR, 'dsage_key.pub'),
    275                log_file=WORKER_LOG,
    276                verbose=True):
     269               log_file=WORKER_LOG, verbose=True, profile=False):
    277270        r"""
    278         Run the Distributed SAGE worker.
    279        
     271        Run the Distributed Sage worker.
     272
    280273        Typing \code{sage.worker()} will launch a worker which by
    281274        default connects to localhost on port 8081 to fetch jobs.
    282275        """
    283        
     276
     277        from sage.dsage.worker.monitor import MONITOR_TAC as tac
    284278        check_dsage_dir()
    285         cmd = ('dsage_worker.py -s %s -p %s -u %s -w %s --poll %s -l %s -f %s '
    286                + '--privkey=%s --pubkey=%s --priority=%s')
    287         cmd = cmd % (server, port, username, workers, poll, log_level,
    288                      log_file, privkey, pubkey, priority)
    289         if ssl:
    290             cmd += ' --ssl'
    291         if authenticate:
    292             cmd += ' -a'
    293         if not blocking:
    294             cmd += ' --noblock'
    295             cmd = 'python ' + SAGE_ROOT + '/local/bin/' + cmd
     279        cwd = os.getcwd()
     280        pid_file = 'worker.pid'
     281        tac = tac % (server, port, workers, username, ssl, authenticate,
     282                     priority, log_level, log_file, privkey, pubkey)
     283        write_tac(tac, 'dsage_worker.tac')
     284        cmd = 'twistd -d %s --pidfile=%s ' % (DSAGE_DIR, pid_file)
     285        if profile:
     286            cmd += '--profile=dsage_worker.profile --savestats '
     287        if blocking:
     288            cmd += '--nodaemon -y dsage_worker.tac'
     289            cmd += ' | tee -a %s' % (log_file)
     290            os.system(cmd)
     291        else:
     292            try:
     293                os.remove(pid_file)
     294            except:
     295                pass
     296            cmd += '--logfile=%s -y dsage_worker.tac' % (log_file)
    296297            self.worker_proc = spawn(cmd, verbose=verbose)
    297         else:
    298             cmd = 'python ' + SAGE_ROOT + '/local/bin/' + cmd
    299             os.system(cmd)
    300    
     298            while True:
     299                try:
     300                    pid = int(open(pid_file).read())
     301                    sage.interfaces.cleaner.cleaner(pid, cmd)
     302                    break
     303                except:
     304                    time.sleep(0.1)
     305                    continue
     306        os.chdir(cwd)
    301307   
    302308    def setup(self, template=None):
    303309        r"""
    304310        This is the setup utility which helps you configure dsage.
    305        
     311
    306312        Type \code{dsage.setup()} to run the configuration for the server,
    307313        worker and client.  Alternatively, if you want to run the
    308314        configuration for just one parts, you can launch
    309315        \code{dsage.setup_server()}, \code{dsage.setup\_worker()}
    310316        or \code{dsage.setup()}.
    311        
     317
    312318        """
    313        
     319
    314320        from sage.dsage.scripts.dsage_setup import setup
    315321        setup(template=template)
    316    
    317    
     322
    318323    def setup_server(self, *args):
    319324        """
    320325        This method runs the configuration utility for the server.
    321        
     326
    322327        """
    323        
     328
    324329        from sage.dsage.scripts.dsage_setup import setup_server
    325330        setup_server(*args)
    326    
    327    
     331
    328332    def setup_worker(self):
    329333        """
    330334        This method runs the configuration utility for the worker.
    331        
     335
    332336        """
    333        
     337
    334338        from sage.dsage.scripts.dsage_setup import setup_worker
    335339        setup_worker()
    336    
    337    
     340
    338341    def setup_client(self):
    339342        """
    340343        This method runs the configuration utility for the client.
    341        
     344
    342345        """
    343        
     346
    344347        from sage.dsage.scripts.dsage_setup import setup_client
    345348        setup_client()
  • sage/dsage/interface/dsage_interface.py

    diff --git a/sage/dsage/interface/dsage_interface.py b/sage/dsage/interface/dsage_interface.py
    a b  
    2626import time
    2727from getpass import getuser
    2828
    29 # This is a version of blockingCallFromThread that delays importing
    30 # twisted.internet until it is first used.
    31 def blockingCallFromThread(*args, **kwds):
    32     from twisted.internet.threads import blockingCallFromThread
    33     return blockingCallFromThread(*args, **kwds)
     29from twisted.cred.credentials import Anonymous
     30from twisted.internet.threads import blockingCallFromThread
     31from twisted.internet import reactor
    3432
    3533from sage.dsage.database.job import Job, expand_job
    3634from sage.dsage.misc.misc import random_str
    3735from sage.dsage.misc.constants import DSAGE_DIR
    3836
     37
    3938class DSageThread(threading.Thread):
    4039    """
    4140    DSage thread
    42    
     41
    4342    """
    44    
     43
    4544    def run(self):
    46         from twisted.internet import reactor
    4745        if not reactor.running:
    4846            try:
    4947                reactor.run(installSignalHandlers=0)
     
    6664    log_level -- int (Default: 0)
    6765    ssl -- int (Default: 1)
    6866    """
    69    
     67
    7068    def __init__(self, server='localhost', port=8081,
    7169                 username=getuser(),
    7270                 pubkey_file=os.path.join(DSAGE_DIR, 'dsage_key.pub'),
    7371                 privkey_file=os.path.join(DSAGE_DIR, 'dsage_key'),
    7472                 log_level=0, ssl=True, testing=False):
    75        
     73
    7674        from twisted.cred import credentials
    7775        from twisted.conch.ssh import keys
    7876        from twisted.spread import banana
    7977        banana.SIZE_LIMIT = 100*1024*1024 # 100 MegaBytes
    80        
     78
    8179        self.server = server
    8280        self.port = port
    8381        self.username = username
     
    9088        self.result = None
    9189        self.info_str = 'Connected to: %s:%s'
    9290        self._testing = testing
    93        
     91
    9492        if not self._testing:
    9593            self._pubkey = keys.Key.fromFile(self._pubkey_file)
    9694            try:
     
    110108        else:
    111109            self.username = 'tester'
    112110        self.connect()
    113    
    114    
     111
    115112    def __repr__(self):
    116113        return self.__str__()
    117    
     114
    118115    def __str__(self):
    119116        if self.is_connected():
    120117            return self.info_str % (self.server, self.port)
    121118        else:
    122119            return 'Not connected.'
    123    
     120
    124121    def __call__(self, cmd, user_vars=None, load_files=[], job_name=None):
    125122        cmd = ['ans = %s\n' % (cmd),
    126123               'print ans\n',
    127124               "DSAGE_RESULT = ans\n"]
    128        
     125
    129126        return self.eval(''.join(cmd), user_vars=user_vars,
    130127                                       load_files=load_files,
    131128                                       job_name=job_name)
    132    
     129
    133130    def __getstate__(self):
    134131        d = copy.copy(self.__dict__)
    135132        d['remoteobj'] = None
    136        
     133
    137134        return d
    138    
     135
    139136    def _getpassphrase(self):
    140137        import getpass
    141138        passphrase = getpass.getpass('Passphrase (Hit enter for None): ')
    142        
     139
    143140        return passphrase
    144    
     141
    145142    def _catch_failure(self, failure):
    146143        print "Error connecting: %s" % failure.getErrorMessage()
    147    
     144
    148145    def _connected(self, remoteobj):
    149146        if self._log_level > 0:
    150147            print 'Connected to remote server.\r'
    151148        self._remoteobj = remoteobj
    152149        self._remoteobj.notifyOnDisconnect(self._disconnected)
    153    
     150
    154151    def _disconnected(self, remoteobj):
    155152        print '[DSage] Closed connection to %s' % (self.server)
    156153        self.info_str = 'Not connected.'
    157    
     154
    158155    def _got_my_jobs(self, jobs, job_name):
    159156        from sage.dsage.errors.exceptions import NoJobException
    160157        if jobs == None:
     
    162159        if job_name:
    163160            return [JobWrapper(self._remoteobj, job)
    164161                    for job in jobs if job.name == job_name]
    165    
     162
    166163    def _killed_job(self, job_id):
    167164        pass
    168    
     165
    169166    def restore(self, remoteobj):
    170167        """
    171168        This method restores a connection to the server.
    172        
     169
    173170        """
    174        
     171
    175172        self._remoteobj = remoteobj
    176    
     173
    177174    def connect(self):
    178175        """
    179176        This methods establishes the conection to the remote server.
    180        
     177
    181178        """
    182        
     179
    183180        from twisted.internet import reactor
    184181        from sage.dsage.twisted.pb import ClientFactory
    185182        factory = ClientFactory(self._login, (), {})
    186183        factory.continueTrying = False # Do not attempt to reconnect
    187        
     184
    188185        if self.ssl == 1:
    189186            # Old, uses OpenSSL, SAGE uses GNUTLS now
    190187            # from twisted.internet import ssl
     
    198195            reactor.connectTLS(self.server, self.port, factory, cred)
    199196        else:
    200197            reactor.connectTCP(self.server, self.port, factory)
    201    
     198
    202199    def _login(self, *args, **kwargs):
    203         from twisted.cred.credentials import Anonymous
    204200        if self._testing:
    205201            d = self.factory.login(Anonymous(), None)
    206202        else:
    207203            d = self.factory.login(self._creds, None)
    208204        d.addCallback(self._connected)
    209205        d.addErrback(self._catch_failure)
    210        
     206
    211207        return d
    212    
     208
    213209    def disconnect(self):
    214210        print 'Disconnecting from server.'
    215         self._remoteobj = None
    216    
     211        t = self._remoteobj.broker.transport
     212        d = blockingCallFromThread(reactor, t.loseConnection)
     213
    217214    def eval(self, cmd, timeout=0, user_vars=None, job_name=None):
    218215        """
    219216        eval evaluates a command
    220        
     217
    221218        Parameters:
    222219        cmd -- the sage command to be evaluated (str)
    223220        globals -- a dict (see help for python's eval method)
    224221        job_name -- an alphanumeric job name
    225        
     222
    226223        """
    227        
     224
    228225        self.is_connected()
    229226        if not job_name or not isinstance(job_name, str):
    230227            job_name = 'default job'
    231        
     228
    232229        kind = 'sage'
    233        
     230
    234231        # We have to convert timeout to a python int so it will not cause
    235232        # security exceptions with twisted.
    236        
    237         job = Job(job_id=None, code=cmd, name=job_name,     
     233
     234        job = Job(job_id=None, code=cmd, name=job_name,
    238235                  username=self.username, timeout=timeout, kind=kind)
    239        
     236
    240237        wrapped_job = JobWrapper(self._remoteobj, job)
    241238        if user_vars is not None:
    242239            for k, v in user_vars.iteritems():
    243240                job.attach(k, v)
    244        
     241
    245242        return wrapped_job
    246    
     243
    247244    def eval_file(self, fname, job_name, async=False):
    248245        """
    249246        eval_file allows you to evaluate the contents of an entire file.
    250        
     247
    251248        Parameters:
    252249            fname -- file name of the file you wish to evaluate
    253        
     250
    254251        """
    255        
     252
    256253        self.is_connected()
    257        
     254
    258255        kind = 'file'
    259256        cmd = open(fname).read()
    260257        job = Job(job_id=None, code=cmd, name=job_name,
    261258                  username=self.username, kind=kind)
    262        
     259
    263260        if async:
    264261            wrapped_job = JobWrapper(self._remoteobj, job)
    265262        else:
    266263            wrapped_job = BlockingJobWrapper(self._remoteobj, job)
    267        
     264
    268265        return wrapped_job
    269    
     266
    270267    def send_job(self, job):
    271268        """
    272269        Sends a Job object to the server.
    273        
     270
    274271        """
    275        
     272
    276273        if not isinstance(job, Job):
    277274            raise TypeError
    278275        wrapped_job = JobWrapper(self._remoteobj, job)
    279276        return wrapped_job
    280    
     277
    281278    def _got_job_id(self, id_, job):
    282279        job.job_id = id_
    283280        job.username = self.username
     
    285282        d = self._remoteobj.callRemote('submit_job', pickled_job)
    286283        d.addErrback(self._catch_failure)
    287284        # d.addCallback(self._submitted, job)
    288        
     285
    289286        return JobWrapper(self._remoteobj, job)
    290    
     287
    291288    def eval_dir(self, dir_, job_name):
    292289        from twisted.internet import defer
    293290        self.is_connected()
     
    303300            deferreds.append(d)
    304301        d_list = defer.DeferredList(deferreds)
    305302        return d_list
    306    
     303
    307304    def kill(self, job_id, async=False):
    308305        """
    309306        Kills a job given the job id.
    310        
     307
    311308        Parameters:
    312309        job_id -- job id
    313        
     310
    314311        """
    315        
     312
    316313        if async:
    317314            d = self._remoteobj.callRemote('kill_job', job_id)
    318315            d.addCallback(self._killed_job)
     
    321318            job_id = blockingCallFromThread(self._remoteobj.callRemote,
    322319                                               'kill_job',
    323320                                               job_id)
    324                                                
    325    
     321
    326322    def get_my_jobs(self, is_active=False, job_name=None):
    327323        """
    328324        This method returns a list of jobs that belong to you.
    329        
     325
    330326        Parameters:
    331327        is_active -- set to true to get only active jobs (bool)
    332        
     328
    333329        Use this method if you get disconnected from the server and wish to
    334330        retrieve your old jobs back.
    335        
     331
    336332        """
    337        
     333
    338334        self.is_connected()
    339        
     335
    340336        d = self._remoteobj.callRemote('get_jobs_by_username',
    341337                                      self.username,
    342338                                      is_active,
    343339                                      job_name)
    344340        d.addCallback(self._got_my_jobs, job_name)
    345341        d.addErrback(self._catch_failure)
    346        
     342
    347343        return d
    348    
     344
    349345    def cluster_speed(self):
    350346        """
    351347        Returns the speed of the cluster.
    352        
     348
    353349        """
    354        
     350
    355351        self.is_connected()
    356        
     352
    357353        return self._remoteobj.callRemote('get_cluster_speed')
    358    
    359     def is_connected(self):       
    360         if self._remoteobj == None:
    361             return False
    362         if self._remoteobj.broker.disconnected:
    363             raise False
    364         return True
     354
     355    def is_connected(self):
     356        return self._remoteobj or self._remoteobj.broker.disconnected
    365357
    366358
    367359class BlockingDSage(DSage):
    368360    """
    369361    This is the blocking version of the DSage interface.
    370    
    371362    """
    372     def __init__(self, server='localhost', port=8081,
    373                 username=getuser(),
     363
     364    def __init__(self, server='localhost', port=8081, username=getuser(),
    374365                 pubkey_file=os.path.join(DSAGE_DIR, 'dsage_key.pub'),
    375366                 privkey_file=os.path.join(DSAGE_DIR, 'dsage_key'),
    376367                 log_level=0, ssl=True, testing=False):
     
    380371        DSage.__init__(self, server=server, port=port, username=username,
    381372                       pubkey_file=pubkey_file, privkey_file=privkey_file,
    382373                       log_level=log_level, ssl=ssl, testing=testing)
    383    
    384    
     374
    385375    def connect(self):
    386376        """
    387377        This methods establishes the conection to the remote server.
    388        
     378
    389379        """
    390        
     380
    391381        from twisted.internet import reactor
    392382        from sage.dsage.twisted.pb import ClientFactory
    393        
     383
    394384        self.factory = ClientFactory(self._login, (), {})
    395385        self.factory.continueTrying = False
    396        
     386
    397387        if self.ssl:
    398388            from gnutls.interfaces.twisted import X509Credentials
    399389            cred = X509Credentials()
    400             blockingCallFromThread(reactor, reactor.connectTLS, self.server, 
     390            blockingCallFromThread(reactor, reactor.connectTLS, self.server,
    401391                                   self.port, self.factory, cred)
    402392        else:
    403393            blockingCallFromThread(reactor, reactor.connectTCP, self.server,
    404394                                   self.port, self.factory)
    405    
     395
    406396    def _login(self, *args, **kwargs):
    407         from twisted.cred.credentials import Anonymous
    408397        if self._testing:
    409398            d = self.factory.login(Anonymous(), None)
    410399        else:
    411400            d = self.factory.login(self._creds, None)
    412401        d.addCallback(self._connected)
    413402        d.addErrback(self._catch_failure)
    414        
     403
    415404        return d
    416405
    417406    def job_results_iter(self, jobs):
    418407        """
    419408        Returns an iterator that yields results of jobs as they come in.
    420        
    421         INPUT: 
     409
     410        INPUT:
    422411            jobs -- a list of tuples (x, j) where x is (args, kwds) and j is
    423412                    a job object
    424        
     413
    425414        OUTPUT:
    426415            (x, job)
    427416
    428417        """
    429        
     418
    430419        import time
    431420        out_list = []
    432        
     421
    433422        while len(out_list) != len(jobs):
    434             for x,j in jobs:
     423            for x, j in jobs:
    435424                if j not in out_list:
    436425                    j.get_job()
    437426                    if j.status in ('completed', 'killed'):
    438427                        out_list.append(j)
    439428                        yield (x, j)
    440429                    time.sleep(0.2)
    441    
     430
    442431    def block_on_jobs(self, jobs):
    443432        """
    444433        Blocks on a list of jobs until all the jobs are completed.
    445        
     434
    446435        INPUT:
    447436            jobs -- a list of jobs which are not completed
    448        
     437
    449438        OUTPUT:
    450439            jobs -- a list of completed jobs
    451            
     440
    452441        EXAMPLE:
    453442            sage: from sage.dsage.misc.misc import find_open_port
    454443            sage: port = find_open_port().next()
     
    462451            True
    463452            sage: def f(n):
    464453            ...     return n*n
    465             ... 
     454            ...
    466455            sage: j = d.block_on_jobs(d.map(f, [25,12,25,32,12]))
    467             sage: j # random
     456            sage: j
    468457            [625, 144, 625, 1024, 144]
    469458        """
    470        
     459
    471460        out_list = []
    472        
     461
    473462        while len(out_list) != len(jobs):
    474463            for j in jobs:
    475464                if j not in out_list:
     
    477466                    if j.status in ('completed', 'killed'):
    478467                        out_list.append(j)
    479468        return out_list
    480        
     469
    481470    def map(self, f, *args):
    482471        """
    483472        Apply function to every item of iterable and return a list of the
    484473        results. If additional iterable arguments are passed, function must
    485474        take that many arguments and is applied to the items from all
    486475        iterables in parallel.
    487        
     476
    488477        INPUT:
    489478            f -- a function
    490479            *args -- iterables containing the parameters to the function
    491            
     480
    492481        EXAMPLE:
    493482            sage: from sage.dsage.misc.misc import find_open_port
    494483            sage: port = find_open_port().next()
     
    502491            True
    503492            sage: def f(n):
    504493            ...     return n*n
    505             ... 
     494            ...
    506495            sage: j = d.map(f, [25,12,25,32,12])
    507496            sage: j
    508497            [No output yet.,
     
    513502        """
    514503
    515504        from itertools import izip
    516        
    517         jobs = [self.eval_function(f, (a, {}), job_name=f.__name__) 
     505
     506        jobs = [self.eval_function(f, (a, {}), job_name=f.__name__)
    518507                for a in izip(*args)]
    519                    
     508
    520509        return jobs
    521    
    522     def parallel_iter(self, f, inputs):       
     510
     511    def parallel_iter(self, f, inputs):
    523512        """
    524513        dsage parallel iterator implementation.
    525514
     
    532521        OUTPUT:
    533522            iterator over 2-tuples (inputs[i], f(inputs[i])),
    534523            where the order may be completely random
    535        
     524
    536525        EXAMPLE:
    537526            sage: from sage.dsage.misc.misc import find_open_port
    538527            sage: port = find_open_port().next()
     
    549538            ... def f(n,m):
    550539            ...     return n+m
    551540            ...
    552             sage: list(f([(1,2), (5, 10/3)])) # random
    553             [(((5, 10/3), {}), 25/3), (((1, 2), {}), 3)]
     541            sage: f([(1,2), (5, 10/3)])
     542            [((1, 2), 3), ((5, 10/3), 25/3)]
    554543        """
    555        
     544
    556545        jobs = []
    557546        for x in inputs:
    558547            job = self.eval_function(f, x, job_name=f.__name__)
    559548            jobs.append((x, job))
    560        
    561         return self.job_results_iter(jobs) 
    562    
     549
     550        return self.job_results_iter(jobs)
     551
    563552    def eval_function(self, f, arguments, job_name=None):
    564553        """
    565554        Takes a function and it's arguments, pickles it, and creates a job
    566555        which executes the function with the arguments.
    567        
     556
    568557        INPUT:
    569558            f -- function
    570559            arguments -- tuple(tuple, dict) --> *args, **kwds
    571            
     560
    572561        OUTPUT:
    573562            job wrapper representing the function evaluated at input.
    574            
     563
    575564        EXAMPLE:
    576565            sage: from sage.dsage.misc.misc import find_open_port
    577566            sage: port = find_open_port().next()
     
    590579            sage: j.wait()
    591580            sage: j
    592581            625
    593            
     582
    594583        """
    595584
    596585        from sage.misc.fpickle import pickle_function
     
    607596        job.attach('args', arguments[0])
    608597        job.attach('kwds', arguments[1])
    609598        wrapped_job = BlockingJobWrapper(self._remoteobj, job)
    610        
     599
    611600        return wrapped_job
    612    
     601
    613602    def eval(self, cmd, user_vars=None, job_name=None, timeout=600,
    614603             load_files=[], priority=5, async=False):
    615604        """
    616605        eval evaluates a command
    617        
     606
    618607        Parameters:
    619608        cmd -- the sage command to be evaluated (str)
    620609        user_vars -- a dict of predefined variables you want to use.
     
    624613        load_files -- list of files to load before executing the job
    625614        priority -- priority of the job created (0-5)
    626615        async -- whether to use the async implementation of the method
    627        
     616
    628617        """
    629        
    630         self.is_connected()       
     618
     619        self.is_connected()
    631620        kind = 'sage'
    632        
    633         job = Job(job_id=None, code=cmd, name=job_name,     
     621
     622        job = Job(job_id=None, code=cmd, name=job_name,
    634623                  username=self.username, timeout=timeout, priority=priority,
    635624                  kind=kind)
    636        
     625
    637626        for fname in load_files:
    638627            if os.path.exists(fname):
    639628                job.attach_file(fname)
    640        
     629
    641630        if user_vars is not None:
    642631            for k, v in user_vars.iteritems():
    643632                job.attach(k, v)
    644        
     633
    645634        if async:
    646635            wrapped_job = JobWrapper(self._remoteobj, job)
    647636        else:
    648637            wrapped_job = BlockingJobWrapper(self._remoteobj, job)
    649        
     638
    650639        return wrapped_job
    651    
     640
    652641    def send_job(self, job, async=False):
    653642        """
    654643        Sends a Job object to the server.
    655        
     644
    656645        Parameters:
    657646        job -- a Job object to send to the remote server
    658647        async -- if True, use async method of doing remote task
    659        
     648
    660649        """
    661        
     650
    662651        if not isinstance(job, Job):
    663652            raise TypeError
    664653        if async:
    665654            wrapped_job = JobWrapper(self._remoteobj, job)
    666655        else:
    667656            wrapped_job = BlockingJobWrapper(self._remoteobj, job)
    668        
     657
    669658        return wrapped_job
    670    
     659
    671660    def get_my_jobs(self, status='new'):
    672661        """
    673662        This method returns a list of jobs that belong to you.
    674        
     663
    675664        Parameters:
    676665        active -- set to true to get only active jobs (bool)
    677        
     666
    678667        Use this method if you get disconnected from the server and wish to
    679668        retrieve your old jobs back.
    680        
     669
    681670        """
    682        
     671
    683672        self.is_connected()
    684         from twisted.internet import reactor       
    685673        jdicts = blockingCallFromThread(reactor, self._remoteobj.callRemote,
    686674                                        'get_jobs_by_username',
    687675                                        self.username, status)
    688        
     676
    689677        return [expand_job(jdict) for jdict in jdicts]
    690    
    691    
     678
    692679    def kill_all(self):
    693680        """
    694681        Kills all of your active jobs.
    695        
     682
    696683        """
    697        
     684
    698685        active_jobs = self.get_my_jobs(active=True)
    699        
     686
    700687        for job in active_jobs:
    701688            self.kill(job.job_id)
    702    
     689
    703690    def cluster_speed(self):
    704691        """
    705692        Returns the speed of the cluster.
    706        
     693
    707694        """
    708        
     695
    709696        self.is_connected()
    710         from twisted.internet import reactor       
     697
    711698        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    712699                                         'get_cluster_speed')
    713    
     700
    714701    def get_workers_list(self):
    715702        """Returns a list of monitors connected to the server.
    716        
     703
    717704        """
    718        
     705
    719706        self.is_connected()
    720         from twisted.internet import reactor       
     707
    721708        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    722709                                         'get_worker_list')
    723    
     710
    724711    def get_client_list(self):
    725712        """
    726713        Returns a list of clients connected to the server.
    727714        """
    728        
     715
    729716        self.is_connected()
    730         from twisted.internet import reactor       
     717
    731718        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    732719                                         'get_client_list')
    733    
     720
    734721    def get_worker_count(self):
    735722        """
    736723        Returns the number of busy and free workers.
    737        
     724
    738725        """
    739        
     726
    740727        self.is_connected()
    741         from twisted.internet import reactor       
     728
    742729        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    743730                                         'get_worker_count')
    744    
     731
    745732    def web_server_url(self):
    746733        """
    747734        Returns the web server url.
    748735        """
    749        
     736
    750737        self.is_connected()
    751         from twisted.internet import reactor       
     738
    752739        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    753740                                      'web_server_url')
    754    
     741
    755742    def web_view(self):
    756743        """
    757744        Opens the dsage server's web interface in a browser.
    758        
     745
    759746        """
    760        
     747
    761748        from sage.server.misc import open_page
    762749        url = self.web_server_url()
    763750        address = url.split(':')[1].strip('/')
    764751        port = int(url.split(':')[2].strip('/'))
    765752        open_page(address, port, False)
    766        
     753
    767754    def server_log(self, n=50):
    768         from twisted.internet import reactor
    769755        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    770756                                      'read_log', n, 'server')
    771    
     757
    772758    def worker_log(self, n=50):
    773         from twisted.internet import reactor
    774759        return blockingCallFromThread(reactor, self._remoteobj.callRemote,
    775760                                      'read_log', n, 'worker')
    776                                      
    777                                      
     761
     762
    778763class JobWrapper(object):
    779764    """
    780765    Represents a remote job.
    781    
     766
    782767    Parameters:
    783768        remoteobj -- the PB server's remoteobj
    784769        job -- a Job object (job)
    785    
     770
    786771    """
    787    
     772
    788773    def __init__(self, remoteobj, job):
    789774        self._remoteobj = remoteobj
    790775        self._update_job(job._reduce())
     
    796781        d.addCallback(self._got_job_id)
    797782        d.addCallback(self._got_jdict)
    798783        d.addErrback(self._catch_failure)
    799    
     784
    800785    def __repr__(self):
    801786        return self.job_id
    802        
     787
    803788    def __str__(self):
    804789        if self.status == 'completed' and not self.output:
    805790            return 'No output. (Done)'
    806791        elif not self.output:
    807792            return 'No output yet. (Not done)'
    808        
     793
    809794        return self.output
    810    
     795
    811796    def __getstate__(self):
    812797        d = copy.copy(self.__dict__)
    813798        d['remoteobj'] = None
    814799        d['sync_job_task'] = None
    815        
     800
    816801        return d
    817    
     802
    818803    def _update_job(self, jdict):
    819804        self._jdict = jdict
    820805        job = expand_job(jdict)
     
    826811        timeout = 0.5
    827812        while self._job.result is None:
    828813            reactor.iterate(timeout)
    829    
     814
    830815    def save(self, filename=None):
    831816        if filename is None:
    832817            filename = str(self._job.name)
    833818        filename += '.sobj'
    834819        f = open(filename, 'w')
    835820        cPickle.dump(self, f, 2)
    836        
     821
    837822        return filename
    838    
     823
    839824    def restore(self, dsage):
    840825        self._remoteobj = dsage.remoteobj
    841    
     826
    842827    def _catch_failure(self, failure):
    843828        from twisted.internet import error
    844829        from twisted.spread import pb
     
    848833            pass
    849834            # print "Error: ", failure.getErrorMessage()
    850835            # print "Traceback: ", failure.printTraceback()
    851    
     836
    852837    def _got_job_id(self, job_id):
    853838        self.job_id = job_id
    854839        try:
    855840            d = self._remoteobj.callRemote('get_job_by_id', job_id)
    856841        except Exception, msg:
    857842            raise
    858        
     843
    859844        return d
    860    
     845
    861846    def _got_jdict(self, jdict):
    862847        self.job_id = jdict['job_id']
    863848        self._update_job(jdict)
    864    
     849
    865850    def get_job(self):
    866851        from sage.dsage.errors.exceptions import NotConnectedException
    867        
     852
    868853        if self._remoteobj is None:
    869854            raise NotConnectedException
    870855        if self.job_id is None:
     
    873858            d = self._remoteobj.callRemote('get_job_by_id', self.job_id)
    874859        except Exception, msg:
    875860            raise
    876        
     861
    877862        d.addCallback(self._got_jdict)
    878863        d.addErrback(self._catch_failure)
    879        
     864
    880865        return d
    881    
     866
    882867    def get_job_output(self):
    883868        if self._remoteobj == None:
    884869            return
     
    887872                                           self.job_id)
    888873        except Exception, msg:
    889874            raise
    890        
     875
    891876        d.addCallback(self._got_job_output)
    892877        d.addErrback(self._catch_failure)
    893        
     878
    894879        return d
    895    
     880
    896881    def _got_job_output(self, output):
    897882        self.output = output
    898    
     883
    899884    def get_job_result(self):
    900885        if self._remoteobj == None:
    901886            return
     
    904889                                           self.job_id)
    905890        except Exception, msg:
    906891            raise
    907        
     892
    908893        d.addCallback(self._got_job_result)
    909894        d.addErrback(self._catch_failure)
    910        
     895
    911896        return d
    912    
     897
    913898    def _got_job_result(self, result):
    914899        self.result = result
    915    
     900
    916901    def sync_job(self):
    917902        from twisted.spread import pb
    918903        if self._remoteobj == None:
     
    926911                if self.sync_job_task.running:
    927912                    self.sync_job_task.stop()
    928913            return
    929        
     914
    930915        try:
    931916            d = self._remoteobj.callRemote('sync_job', self.job_id)
    932917        except pb.DeadReferenceError:
     
    934919                if self.sync_job_task.running:
    935920                    self.sync_job_task.stop()
    936921            return
    937        
     922
    938923        d.addCallback(self._got_jdict)
    939924        d.addErrback(self._catch_failure)
    940    
     925
    941926    def write_result(self, filename):
    942927        result_file = open(filename, 'w')
    943        
     928
    944929        # skip the first element since that is not the actual result
    945930        for line in self.result:
    946931            line = str(line)
    947932            result_file.write(line)
    948933        result_file.close()
    949    
     934
    950935    def kill(self):
    951936        """
    952937        Kills the current job.
    953        
     938
    954939        """
    955        
     940
    956941        if self.job_id is not None:
    957942            try:
    958943                d = self._remoteobj.callRemote('kill_job', self.job_id)
    959944            except Exception, msg:
    960                 print 'Unable to kill %s because %s'  % (self.job_id, msg)
     945                print 'Unable to kill %s because %s' % (self.job_id, msg)
    961946                return
    962947            d.addCallback(self._killed_job)
    963948            d.addErrback(self._catch_failure)
    964949            return d
    965950        else:
    966951            return
    967        
     952
    968953    def _killed_job(self, job_id):
    969         return
     954        self.status = 'killed'
     955
    970956
    971957class BlockingJobWrapper(JobWrapper):
    972958    """
    973959    Blocking version of the JobWrapper object.  This is to be used
    974960    interactively.
    975    
     961
    976962    """
    977    
     963
    978964    def __init__(self, remoteobj, job):
    979965        self._update_job(job._reduce())
    980966        self._remoteobj = remoteobj
    981         from twisted.internet import reactor
    982967        self.job_id = blockingCallFromThread(reactor, self._remoteobj.callRemote,
    983968                                           'submit_job', job._reduce())
    984    
     969
    985970    def __repr__(self):
    986971        if self.killed:
    987972            return 'Job %s was killed' % (self.job_id)
     
    989974            self.get_job()
    990975        if self.status == 'completed' and not self.output:
    991976            return 'No output.'
     977        if self.result:
     978            return str(self.result)
    992979        if not self.output:
    993980            return 'No output yet.'
    994981        else:
    995982            return self.output
    996    
     983
    997984    def get_job(self):
    998985        from sage.dsage.errors.exceptions import NotConnectedException
    999        
     986
    1000987        if self._remoteobj == None:
    1001988            raise NotConnectedException
    1002989        if self.status == 'completed':
    1003990            return
    1004        
    1005         from twisted.internet import reactor       
     991
    1006992        jdict = blockingCallFromThread(reactor, self._remoteobj.callRemote,
    1007993                                        'get_job_by_id', self.job_id)
    1008        
     994
    1009995        self._update_job(jdict)
    1010    
     996
    1011997    def async_get_job(self):
    1012998        return JobWrapper.get_job(self)
    1013    
     999
    10141000    def rerun(self):
    10151001        """
    10161002        Resubmits the current job.
     1003
    10171004        """
    1018         from twisted.internet import reactor
    10191005        self.job_id = blockingCallFromThread(reactor,
    10201006                                             self._remoteobj.callRemote,
    10211007                                             'submit_job', self._jdict)
     1008
    10221009    def kill(self):
    10231010        """
    10241011        Kills the current job.
    1025        
     1012
    10261013        """
    1027         from twisted.internet import reactor       
     1014
    10281015        job_id = blockingCallFromThread(reactor, self._remoteobj.callRemote,
    10291016                                           'kill_job', self.job_id)
    10301017        self.job_id = job_id
    10311018        self.killed = True
    1032        
     1019
    10331020        return job_id
    1034    
    1035    
     1021
    10361022    def async_kill(self):
    10371023        """
    10381024        async version of kill
    1039        
     1025
    10401026        """
    1041        
     1027
    10421028        d = self._remoteobj.callRemote('kill_job', self.job_id)
    10431029        d.addCallback(self._killed_job)
    10441030        d.addErrback(self._catch_failure)
    1045        
     1031
    10461032        return d
    1047    
    1048    
     1033
    10491034    def wait(self, timeout=None):
    10501035        """
    10511036        Waits on a job until it is completed.
    1052        
     1037
    10531038        Parameters:
    10541039        timeout -- number of seconds to wait, if it has not completed by then
    10551040                   it will raise RunTimeError if it is set to None,
    10561041                   it will wait indefinitely until the job is completed
    10571042        """
    1058        
     1043
    10591044        import signal
    1060        
     1045
    10611046        if timeout is None:
    10621047            while self.status != 'completed':
    10631048                # print 'Wating...'
    1064                 time.sleep(1.0)
     1049                time.sleep(0.5)
    10651050                self.get_job()
    10661051        else:
    10671052            def handler(signum, frame):
    10681053                raise RuntimeError('Maximum wait time exceeded.')
     1054
    10691055            signal.signal(signal.SIGALRM, handler)
    10701056            signal.alarm(timeout)
    10711057            while self.status != 'completed':
    1072                 time.sleep(1.0)
     1058                time.sleep(0.5)
    10731059                self.get_job()
    10741060            signal.alarm(0)
  • sage/dsage/misc/config.py

    diff --git a/sage/dsage/interface/nodoctest.py b/sage/dsage/interface/nodoctest.py
    new file mode 100644
    diff --git a/sage/dsage/misc/config.py b/sage/dsage/misc/config.py
    a b  
    2424
    2525import os
    2626import ConfigParser
     27import uuid
    2728
    2829from sage.dsage.misc.constants import DSAGE_DIR
    2930   
     
    8889        elif type == 'monitor':
    8990            conf_file = os.path.join(DSAGE_DIR, 'worker.conf')
    9091            config.read(conf_file)
    91             import uuid
    9292            if len(config.get('uuid', 'id')) != 36:
    9393                config.set('uuid', 'id', str(uuid.uuid1()))
    9494                f = open(conf_file, 'w')
     
    121121    if value.lower() not in boolean_states:
    122122        raise ValueError('Not a boolean: %s' % value)
    123123   
    124     return boolean_states[value.lower()]
     124    return boolean_states[value.lower()]
     125 No newline at end of file
  • sage/dsage/misc/constants.py

    diff --git a/sage/dsage/misc/constants.py b/sage/dsage/misc/constants.py
    a b  
    88DSAGE_LOCAL = os.path.join(os.getenv('SAGE_ROOT'), 'local/dsage')
    99DSAGE_DB_DIR = os.path.join(DSAGE_DIR, 'db')
    1010DSAGE_DB = os.path.join(DSAGE_DB_DIR, 'dsage.db')
     11SAGE_BIN = os.path.join(os.getenv('SAGE_ROOT'), 'sage')
    1112
    1213# These are the twisted tac files to be used with twistd
    1314SERVER_TAC = """import sys
     
    144145dsage_service.setServiceParent(application)
    145146
    146147print_info(dsage_server)"""
    147 
    148 WORKER_TAC = """"""
    149  No newline at end of file
  • sage/dsage/misc/misc.py

    diff --git a/sage/dsage/misc/misc.py b/sage/dsage/misc/misc.py
    a b  
    2828
    2929from sage.dsage.misc.constants import DSAGE_DIR
    3030from sage.dsage.misc.config import check_dsage_dir
     31
     32
     33def test_dsage(dsage):
     34    from time import sleep
     35    port = find_open_port().next()
     36    dsage.server(blocking=False, port=port, verbose=False, ssl=False,
     37                 log_level=5, testing=True)
     38    dsage.worker(blocking=False, port=port, verbose=False, ssl=False,
     39                 log_level=5,authenticate=False)
     40    sleep(0.5)
     41    d = dsage.connect(username='tester', port=port, ssl=False, testing=True)
     42    sleep(0.5)
     43
     44    return d
     45
     46def write_tac(tac, fname):
     47    os.chdir(DSAGE_DIR)
     48    f = open(fname, 'w')
     49    f.writelines(tac)
     50    f.close()
    3151
    3252def exec_wrs(script):
    3353    """
     
    135155                yield port
    136156                port += 1
    137157            else:
    138                 port += 1
    139  No newline at end of file
     158                port += 1
  • sage/dsage/scripts/dsage_setup.py

    diff --git a/sage/dsage/scripts/dsage_setup.py b/sage/dsage/scripts/dsage_setup.py
    a b  
    11############################################################################
    2 #                                                                     
    3 #   DSAGE: Distributed SAGE                     
    4 #                                                                             
    5 #       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>               
    6 #                                                                           
    7 #  Distributed under the terms of the GNU General Public License (GPL)       
     2#
     3#   DSAGE: Distributed SAGE
     4#
     5#       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>
     6#
     7#  Distributed under the terms of the GNU General Public License (GPL)
    88#
    99#    This code is distributed in the hope that it will be useful,
    1010#    but WITHOUT ANY WARRANTY; without even the implied warranty of
     
    3737SAGE_ROOT = os.getenv('SAGE_ROOT')
    3838DSAGE_VERSION = version
    3939
     40
    4041def get_config(type):
    4142    config = ConfigParser.ConfigParser()
    4243    config.add_section('general')
     
    5455        config.add_section('server_log')
    5556        config.add_section('db')
    5657        config.add_section('db_log')
     58
    5759    return config
     60
    5861
    5962def add_default_client(Session):
    6063    """
    6164    Adds the default client.
    62    
     65
    6366    """
    64    
     67
    6568    from twisted.conch.ssh import keys
    6669    from getpass import getuser
    67    
     70
    6871    clientdb = ClientDatabase(Session)
    69    
     72
    7073    username = getuser()
    7174    pubkey_file = os.path.join(DSAGE_DIR, 'dsage_key.pub')
    7275    pubkey = keys.Key.fromFile(pubkey_file)
     
    8386        else:
    8487            print 'User %s already exists.' % (username)
    8588
     89
    8690def setup_client(testing=False):
    8791    check_dsage_dir()
    8892    key_file = os.path.join(DSAGE_DIR, 'dsage_key')
    8993    if testing:
    9094        cmd = ["ssh-keygen", "-q", "-trsa", "-P ''", "-f%s" % key_file]
    9195        return
    92    
     96
    9397    if not cmd_exists('ssh-keygen'):
    9498        print DELIMITER
    9599        print "Could NOT find ssh-keygen."
    96100        print "Aborting."
    97101        return
    98        
     102
    99103    print DELIMITER
    100104    print "Generating public/private key pair for authentication..."
    101105    print "Your key will be stored in %s/dsage_key" % DSAGE_DIR
    102106    print "Just hit enter when prompted for a passphrase"
    103107    print DELIMITER
    104    
    105     cmd = ["ssh-keygen", "-q", "-trsa", "-f%s" % key_file]   
     108
     109    cmd = ["ssh-keygen", "-q", "-trsa", "-f%s" % key_file]
    106110    ld = os.environ['LD_LIBRARY_PATH']
    107111    try:
    108112        del os.environ['LD_LIBRARY_PATH']
    109113        p = subprocess.call(cmd)
    110114    finally:
    111115        os.environ['LD_LIBRARY_PATH'] = ld
    112        
     116
    113117    print "\n"
    114118    print "Client configuration finished.\n"
     119
    115120
    116121def setup_worker():
    117122    check_dsage_dir()
    118123    print "Worker configuration finished.\n"
     124
    119125
    120126def setup_server(template=None):
    121127    check_dsage_dir()
     
    125131    if dn == '':
    126132        print "Using default localhost"
    127133        dn = 'localhost'
    128    
     134
    129135    template_dict = {'organization': 'SAGE (at %s)' % (dn),
    130136                'unit': '389',
    131137                'locality': None,
     
    134140                'cn': dn,
    135141                'uid': 'sage_user',
    136142                'dn_oid': None,
    137                 'serial': str(random.randint(1,2**31)),
     143                'serial': str(random.randint(1, 2**31)),
    138144                'dns_name': None,
    139145                'crl_dist_points': None,
    140146                'ip_address': None,
     
    146152                'signing_key': True,
    147153                'encryption_key': True,
    148154                }
    149                
     155
    150156    if isinstance(template, dict):
    151157        template_dict.update(template)
    152    
     158
    153159    s = ""
    154160    for key, val in template_dict.iteritems():
    155161        if val is None:
     
    160166            w = ' '.join(['"%s"' % x for x in val])
    161167        else:
    162168            w = '"%s"' % val
    163         s += '%s = %s \n' % (key, w) 
    164    
     169        s += '%s = %s \n' % (key, w)
     170
    165171    template_file = os.path.join(DSAGE_DIR, 'cert.cfg')
    166172    f = open(template_file, 'w')
    167173    f.write(s)
    168174    f.close()
    169    
     175
    170176    # Disable certificate generation -- not used right now anyways
    171177    privkey_file = os.path.join(DSAGE_DIR, 'cacert.pem')
    172178    pubkey_file = os.path.join(DSAGE_DIR, 'pubcert.pem')
    173    
     179
    174180    print DELIMITER
    175181    print "Generating SSL certificate for server..."
    176    
     182
    177183    if False and os.uname()[0] != 'Darwin' and cmd_exists('openssl'):
    178184        # We use openssl by default if it exists, since it is *vastly*
    179185        # faster on Linux.
     
    187193        print cmd[0]
    188194        # cmd = ['openssl genrsa > %s' % privkey_file]
    189195        subprocess.call(cmd, shell=True)
    190        
     196
    191197    cmd = ['certtool --generate-self-signed --template %s --load-privkey %s \
    192198           --outfile %s' % (template_file, privkey_file, pubkey_file)]
    193199    subprocess.call(cmd, shell=True)
    194200    print DELIMITER
    195    
     201
    196202    # Set read only permissions on cert
    197203    os.chmod(os.path.join(DSAGE_DIR, 'cacert.pem'), 0600)
    198    
     204
    199205    # create database schemas
    200206    from sage.dsage.database.db_config import init_db_sa as init_db
    201207    Session = init_db(DSAGE_DB)
    202    
     208
    203209    # add default user
    204210    add_default_client(Session)
    205            
     211
    206212    print "Server configuration finished.\n\n"
    207    
     213
     214
    208215def setup(template=None):
    209216    setup_client()
    210217    setup_worker()
     
    221228            setup_worker()
    222229        elif sys.argv[1] == 'client':
    223230            setup_client()
    224 
  • deleted file sage/dsage/scripts/dsage_worker.py

    diff --git a/sage/dsage/scripts/dsage_worker.py b/sage/dsage/scripts/dsage_worker.py
    deleted file mode 100755
    + -  
    1 #!/usr/bin/env python
    2 ############################################################################
    3 #                                                                     
    4 #   DSAGE: Distributed SAGE                     
    5 #                                                                             
    6 #       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>               
    7 #                                                                           
    8 #  Distributed under the terms of the GNU General Public License (GPL)       
    9 #
    10 #    This code is distributed in the hope that it will be useful,
    11 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
    12 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13 #    General Public License for more details.
    14 #
    15 #  The full text of the GPL is available at:
    16 #
    17 #                  http://www.gnu.org/licenses/
    18 #
    19 ############################################################################
    20 __docformat__ = "restructuredtext en"
    21 
    22 import sys
    23 import os
    24 import cPickle
    25 import zlib
    26 import pexpect
    27 import datetime
    28 from math import ceil
    29 from getpass import getuser
    30 
    31 from twisted.spread import pb
    32 from twisted.internet import reactor, defer, error, task
    33 from twisted.python import log
    34 from twisted.spread import banana
    35 banana.SIZE_LIMIT = 100*1024*1024 # 100 MegaBytes
    36 
    37 from gnutls.constants import *
    38 from gnutls.crypto import *
    39 from gnutls.errors import *
    40 from gnutls.interfaces.twisted import X509Credentials
    41 
    42 from sage.interfaces.sage0 import Sage
    43 from sage.misc.preparser import preparse_file
    44 
    45 from sage.dsage.database.job import Job, expand_job
    46 from sage.dsage.misc.hostinfo import HostInfo
    47 from sage.dsage.errors.exceptions import NoJobException
    48 from sage.dsage.twisted.pb import ClientFactory
    49 from sage.dsage.misc.constants import DELIMITER
    50 from sage.dsage.misc.constants import DSAGE_DIR
    51 from sage.dsage.misc.constants import TMP_WORKER_FILES
    52 from sage.dsage.misc.misc import random_str, get_uuid
    53 
    54 START_MARKER = '\x01r\x01e'
    55 END_MARKER = '\x01r\x01b'
    56 LOG_PREFIX = "[Worker %s] "
    57 
    58 class Worker(object):
    59     """
    60     Workers perform the computation of dsage jobs.
    61    
    62     """
    63    
    64     def __init__(self, remoteobj, id, log_level=0, poll=1.0):
    65         """
    66         :type remoteobj: remoteobj
    67         :param remoteobj: Reference to the remote dsage server
    68        
    69         :type id: integer
    70         :param id: numerical identifier of worker
    71        
    72         :type log_level: integer
    73         :param log_level: log level, higher means more verbose
    74        
    75         :type poll: integer
    76         :param poll: rate (in seconds) a worker talks to the server
    77        
    78         """
    79        
    80         self.remoteobj = remoteobj
    81         self.id = id
    82         self.free = True
    83         self.job = None
    84         self.log_level = log_level
    85         self.poll_rate = poll
    86         self.checker_task = task.LoopingCall(self.check_work)
    87         self.checker_timeout = 0.5
    88         self.got_output = False
    89         self.job_start_time = None
    90         self.orig_poll = poll
    91         self.start()
    92        
    93     def _catch_failure(self, failure):
    94         log.msg("Error: ", failure.getErrorMessage())
    95         log.msg("Traceback: ", failure.printTraceback())
    96    
    97     def _increase_poll_rate(self):
    98         if self.poll_rate >= 15: # Cap the polling interval to 15 seconds
    99             self.poll_rate = 15
    100             if self.log_level > 3:
    101                 log.msg('[Worker %s] Capping poll rate to %s'
    102                          % (self.id, self.poll_rate))
    103         else:
    104             self.poll_rate = ceil(self.poll_rate * 1.5)
    105             if self.log_level > 3:
    106                 log.msg('[Worker %s] Increased polling rate to %s'
    107                         % (self.id, self.poll_rate))
    108    
    109     def get_job(self):
    110         try:
    111             if self.log_level > 3:
    112                 log.msg(LOG_PREFIX % self.id +  'Getting job...')
    113             d = self.remoteobj.callRemote('get_job')
    114         except Exception, msg:
    115             log.msg(msg)
    116             log.msg(LOG_PREFIX % self.id +  'Disconnected...')
    117             self._increase_poll_rate()
    118             reactor.callLater(self.poll_rate, self.get_job)
    119             return
    120         d.addCallback(self.gotJob)
    121         d.addErrback(self.noJob)
    122        
    123         return d
    124    
    125     def gotJob(self, jdict):
    126         """
    127         callback for the remoteobj's get_job method.
    128        
    129         :type jdict: dict
    130         :param jdict: job dictionary
    131 
    132         """
    133        
    134         if self.log_level > 1:
    135             if jdict is None:
    136                 log.msg(LOG_PREFIX % self.id + 'No new job.')
    137         if self.log_level > 3:
    138             if jdict is not None:
    139                 log.msg(LOG_PREFIX % self.id + 'Got Job: %s' % jdict)
    140         self.job = expand_job(jdict)
    141         if not isinstance(self.job, Job):
    142             raise NoJobException
    143         try:
    144             self.poll_rate = self.orig_poll
    145             self.doJob(self.job)
    146         except Exception, msg:
    147             log.msg(msg)
    148             self.report_failure(msg)
    149             self.restart()
    150    
    151     def job_done(self, output, result, completed, cpu_time):
    152         """
    153         Reports to the server that a job has finished. It also reports partial
    154         completeness by presenting the server with new output.
    155        
    156         Parameters:
    157         :type output: string
    158         :param output: output of command (to sys.stdout)
    159        
    160         :type result: python pickle
    161         :param result: result of the job
    162        
    163         :type completed: bool
    164         :param completed: whether or not the job is finished
    165        
    166         :type cpu_time: string
    167         :param cpu_time: how long the job took
    168        
    169         """
    170        
    171         job_id = self.job.job_id
    172         wait = 5.0
    173         try:
    174             d = self.remoteobj.callRemote('job_done', job_id, output, result,
    175                                           completed, cpu_time)
    176         except Exception, msg:
    177             log.msg('Error trying to submit job status...')
    178             log.msg('Retrying to submit again in %s seconds...' % wait)
    179             log.err(msg)
    180             reactor.callLater(wait, self.job_done, output, result,
    181                               completed, cpu_time)
    182             d = defer.Deferred()
    183             d.errback(error.ConnectionLost())     
    184             return d
    185        
    186         if completed:
    187             log.msg('[Worker %s] Finished job %s' % (self.id, job_id))
    188             self.restart()
    189    
    190         return d
    191        
    192        
    193     def noJob(self, failure):
    194         """
    195         Errback that catches the NoJobException.
    196        
    197         :type failure: twisted.python.failure
    198         :param failure: a twisted failure object
    199        
    200         """
    201        
    202         if failure.check(NoJobException):
    203             if self.log_level > 1:
    204                 msg = 'Sleeping for %s seconds' % self.poll_rate
    205                 log.msg(LOG_PREFIX % self.id + msg)
    206             self._increase_poll_rate()
    207             reactor.callLater(self.poll_rate, self.get_job)
    208         else:
    209             log.msg("Error: ", failure.getErrorMessage())
    210             log.msg("Traceback: ", failure.printTraceback())
    211    
    212     def setup_tmp_dir(self, job):
    213         """
    214         Creates the temporary directory for the worker.
    215        
    216         :type job: sage.dsage.database.job.Job
    217         :param job: a Job object
    218        
    219         """
    220        
    221         cur_dir = os.getcwd() # keep a reference to the current directory
    222         tmp_job_dir = os.path.join(TMP_WORKER_FILES, job.job_id)
    223         if not os.path.isdir(TMP_WORKER_FILES):
    224             os.mkdir(TMP_WORKER_FILES)
    225         if not os.path.isdir(tmp_job_dir):
    226             os.mkdir(tmp_job_dir)
    227         os.chdir(tmp_job_dir)
    228         self.sage.eval("os.chdir('%s')" % tmp_job_dir)
    229        
    230         return tmp_job_dir
    231 
    232     def extract_and_load_job_data(self, job):
    233         """
    234         Extracts all the data that is in a job object.
    235        
    236         :type job: sage.dsage.database.job.Job
    237         :param job: a Job object
    238        
    239         """
    240        
    241         if isinstance(job.data, list):
    242             if self.log_level > 2:
    243                 msg = 'Extracting job data...'
    244                 log.msg(LOG_PREFIX % self.id + msg)
    245             try:
    246                 for var, data, kind in job.data:
    247                     try:
    248                         data = zlib.decompress(data)
    249                     except Exception, msg:
    250                         log.msg(msg)
    251                         continue
    252                     if kind == 'file':
    253                         data = preparse_file(data, magic=True, do_time=False,
    254                                              ignore_prompts=False)
    255                         f = open(var, 'wb')
    256                         f.write(data)
    257                         f.close()
    258                         if self.log_level > 2:
    259                             msg = 'Extracted %s' % f
    260                             log.msg(LOG_PREFIX % self.id + msg)
    261                         self.sage.eval("execfile('%s')" % var)
    262                     if kind == 'object':
    263                         fname = var + '.sobj'
    264                         if self.log_level > 2:
    265                             log.msg('Object to be loaded: %s' % fname)
    266                         f = open(fname, 'wb')
    267                         f.write(data)
    268                         f.close()
    269                         self.sage.eval("%s = load('%s')" % (var, fname))
    270                         if self.log_level > 2:
    271                             msg = 'Loaded %s' % fname
    272                             log.msg(LOG_PREFIX % self.id + msg)
    273             except Exception, msg:
    274                 log.msg(LOG_PREFIX % self.id + msg)
    275 
    276     def write_job_file(self, job):
    277         """
    278         Writes out the job file to be executed to disk.
    279        
    280         :type job: sage.dsage.database.job.Job
    281         :param job: A Job object
    282        
    283         """
    284        
    285         parsed_file = preparse_file(job.code, magic=True,
    286                                     do_time=False, ignore_prompts=False)
    287 
    288         job_filename = str(job.name) + '.py'
    289         job_file = open(job_filename, 'w')
    290         BEGIN = "print '%s'\n\n" % (START_MARKER)
    291         END = "print '%s'\n\n" % (END_MARKER)
    292         GO_TO_TMP_DIR = """os.chdir('%s')\n""" % self.tmp_job_dir
    293         SAVE_TIME = """save((time.time()-dsage_start_time), 'cpu_time.sobj', compress=False)\n"""
    294         SAVE_RESULT = """try:
    295     save(DSAGE_RESULT, 'result.sobj', compress=True)
    296 except:
    297     save('No DSAGE_RESULT', 'result.sobj', compress=True)
    298 """
    299         job_file.write("alarm(%s)\n\n" % (job.timeout))
    300         job_file.write("import time\n\n")
    301         job_file.write(BEGIN)
    302         job_file.write('dsage_start_time = time.time()\n')
    303         job_file.write(parsed_file)
    304         job_file.write("\n\n")
    305         job_file.write(END)
    306         job_file.write("\n")
    307         job_file.write(GO_TO_TMP_DIR)
    308         job_file.write(SAVE_RESULT)
    309         job_file.write(SAVE_TIME)
    310         job_file.close()
    311         if self.log_level > 2:
    312             log.msg('[Worker: %s] Wrote job file. ' % (self.id))
    313            
    314         return job_filename
    315        
    316     def doJob(self, job):
    317         """
    318         Executes a job
    319        
    320         :type job: sage.dsage.database.job.Job
    321         :param job: A Job object
    322 
    323         """
    324        
    325         log.msg(LOG_PREFIX % self.id + 'Starting job %s ' % job.job_id)
    326            
    327         self.free = False
    328         self.got_output = False
    329         d = defer.Deferred()
    330        
    331         try:
    332             self.checker_task.start(self.checker_timeout, now=False)
    333         except AssertionError:
    334             self.checker_task.stop()
    335             self.checker_task.start(self.checker_timeout, now=False)
    336         if self.log_level > 2:
    337             log.msg(LOG_PREFIX % self.id + 'Starting checker task...')
    338        
    339         self.tmp_job_dir = self.setup_tmp_dir(job)
    340         self.extract_and_load_job_data(job)
    341        
    342         job_filename = self.write_job_file(job)
    343 
    344         f = os.path.join(self.tmp_job_dir, job_filename)
    345         self.sage._send("execfile('%s')" % (f))
    346         self.job_start_time = datetime.datetime.now()
    347         if self.log_level > 2:
    348             msg = 'File to execute: %s' % f
    349             log.msg(LOG_PREFIX % self.id + msg)
    350        
    351         d.callback(True)
    352 
    353     def reset_checker(self):
    354         """
    355         Resets the output/result checker for the worker.
    356        
    357         """
    358        
    359         if self.checker_task.running:
    360             self.checker_task.stop()
    361         self.checker_timeout = 1.0
    362         self.checker_task = task.LoopingCall(self.check_work)
    363 
    364     def check_work(self):
    365         """
    366         check_work periodically polls workers for new output. The period is
    367         determined by an exponential back off algorithm.
    368        
    369         This figures out whether or not there is anything new output that we
    370         should submit to the server.
    371        
    372         """
    373        
    374         if self.sage == None:
    375             return
    376         if self.job == None or self.free == True:
    377             if self.checker_task.running:
    378                 self.checker_task.stop()
    379             return
    380         if self.log_level > 1:
    381             msg = 'Checking job %s' % self.job.job_id
    382             log.msg(LOG_PREFIX % self.id + msg)
    383         os.chdir(self.tmp_job_dir)
    384         try:
    385             # foo, output, new = self.sage._so_far()
    386             # This sucks and is a very bad way to tell when a calculation is
    387             # finished           
    388             done, new = self.sage._get()
    389             # If result.sobj exists, our calculation is done
    390             result = open('result.sobj', 'rb').read()
    391             done = True
    392         except RuntimeError, msg: # Error in calling worker.sage._so_far()
    393             done = False
    394             if self.log_level > 1:
    395                 log.msg(LOG_PREFIX % self.id + 'RuntimeError: %s' % msg)
    396                 log.msg("Don't worry, the RuntimeError above " +
    397                         "is a non-fatal SAGE failure")
    398             self.increase_checker_task_timeout()
    399             return
    400         except IOError, msg: # File does not exist yet
    401             done = False
    402            
    403         if done:
    404             try:
    405                 cpu_time = cPickle.loads(open('cpu_time.sobj', 'rb').read())
    406             except IOError:
    407                 cpu_time = -1
    408             self.free = True
    409             self.reset_checker()
    410         else:
    411             result = cPickle.dumps('Job not done yet.', 2)
    412             cpu_time = None
    413            
    414         if self.check_failure(new):
    415             self.report_failure(new)
    416             self.restart()
    417             return
    418        
    419         sanitized_output = self.clean_output(new)   
    420         if self.log_level > 3:
    421             print 'Output before sanitizing: \n' , sanitized_output
    422         if self.log_level > 3:
    423             print 'Output after sanitizing: \n', sanitized_output
    424         if sanitized_output == '' and not done:
    425             self.increase_checker_task_timeout()
    426         else:
    427             d = self.job_done(sanitized_output, result, done, cpu_time)
    428             d.addErrback(self._catch_failure)
    429 
    430     def report_failure(self, failure):
    431         """
    432         Reports failure of a job.
    433        
    434         :type failure: twisted.python.failure
    435         :param failure: A twisted failure object
    436        
    437         """
    438        
    439         msg = 'Job %s failed!' % (self.job.job_id)
    440         import shutil
    441         failed_dir = self.tmp_job_dir + '_failed'
    442         if os.path.exists(failed_dir):
    443             shutil.rmtree(failed_dir)
    444         shutil.move(self.tmp_job_dir, failed_dir)
    445         log.msg(LOG_PREFIX % self.id + msg)
    446         log.msg('Traceback: \n%s' % failure)
    447         d = self.remoteobj.callRemote('job_failed', self.job.job_id, failure)
    448         d.addErrback(self._catch_failure)
    449        
    450         return d
    451        
    452     def increase_checker_task_timeout(self):
    453         """
    454         Quickly decreases the number of times a worker checks for output
    455        
    456         """
    457        
    458         if self.checker_task.running:
    459             self.checker_task.stop()
    460        
    461         self.checker_timeout = self.checker_timeout * 1.5
    462         if self.checker_timeout > 300.0:
    463             self.checker_timeout = 300.0
    464         self.checker_task = task.LoopingCall(self.check_work)
    465         self.checker_task.start(self.checker_timeout, now=False)
    466         if self.log_level > 0:
    467             msg = 'Checking output again in %s' % self.checker_timeout
    468             log.msg(LOG_PREFIX % self.id + msg)
    469        
    470     def clean_output(self, sage_output):
    471         """
    472         clean_output attempts to clean up the output string from sage.
    473 
    474         :type sage_output: string
    475         :param sage_output: sys.stdout output from the child sage instance
    476        
    477         """
    478        
    479         begin = sage_output.find(START_MARKER)
    480         if begin != -1:
    481             self.got_output = True
    482             begin += len(START_MARKER)
    483         else:
    484             begin = 0
    485         end = sage_output.find(END_MARKER)
    486         if end != -1:
    487             end -= 1
    488         else:
    489             if not self.got_output:
    490                 end = 0
    491             else:
    492                 end = len(sage_output)
    493         output = sage_output[begin:end]
    494         output = output.strip()
    495         output = output.replace('\r', '')
    496        
    497         if ('execfile' in output or 'load' in output) and self.got_output:
    498             output = ''           
    499            
    500         return output
    501  
    502     def check_failure(self, sage_output):
    503         """
    504         Checks for signs of exceptions or errors in the output.
    505        
    506         :type sage_output: string
    507         :param sage_output: output from the sage instance
    508        
    509         """
    510 
    511         if sage_output == None:
    512             return False
    513         else:
    514             sage_output = ''.join(sage_output)
    515 
    516         if 'Traceback' in sage_output:
    517             return True
    518         elif 'Error' in sage_output:
    519             return True
    520         else:
    521             return False
    522 
    523     def kill_sage(self):
    524         """
    525         Try to hard kill the SAGE instance.
    526        
    527         """
    528        
    529         try:
    530             self.sage.quit()
    531             del self.sage
    532         except Exception, msg:
    533             pid = self.sage.pid()
    534             cmd = 'kill -9 %s' % pid
    535             os.system(cmd)
    536             log.msg(msg)
    537            
    538     def stop(self, hard_reset=False):
    539         """
    540         Stops the current worker and resets it's internal state.
    541        
    542         :type hard_reset: boolean
    543         :param hard_reset: Specifies whether to kill -9 the sage instances
    544            
    545         """
    546        
    547         # Set status to free and delete any current jobs we have
    548         self.free = True
    549         self.job = None
    550        
    551         if hard_reset:
    552             log.msg(LOG_PREFIX % self.id + 'Performing hard reset.')
    553             self.kill_sage()
    554         else: # try for a soft reset
    555             INTERRUPT_TRIES = 20
    556             timeout = 0.3
    557             e = self.sage._expect
    558             try:
    559                 for i in range(INTERRUPT_TRIES):   
    560                     self.sage._expect.sendline('q')
    561                     self.sage._expect.sendline(chr(3))  # send ctrl-c
    562                     try:
    563                         e.expect(self.sage._prompt, timeout=timeout)           
    564                         success = True
    565                         break
    566                     except (pexpect.TIMEOUT, pexpect.EOF), msg:
    567                         success = False
    568                         if self.log_level > 3:
    569                             msg = 'Interrupting SAGE (try %s)' % i
    570                             log.msg(LOG_PREFIX % self.id + msg)
    571             except Exception, msg:
    572                 success = False
    573                 log.msg(msg)
    574                 log.msg(LOG_PREFIX % self.id + "Performing hard reset.")
    575        
    576             if not success:
    577                 self.kill_sage()
    578             else:
    579                 self.sage.reset()
    580    
    581     def start(self):
    582         """
    583         Starts a new worker if it does not exist already.
    584        
    585         """
    586        
    587         log.msg('[Worker %s] Started...' % (self.id))
    588         if not hasattr(self, 'sage'):
    589             if self.log_level > 3:
    590                 logfile = DSAGE_DIR + '/%s-pexpect.log' % self.id
    591                 self.sage = Sage(maxread=1, logfile=logfile, python=True)
    592             else:
    593                 self.sage = Sage(maxread=1, python=True)
    594             try:
    595                 self.sage._start(block_during_init=True)
    596             except RuntimeError, msg: # Could not start SAGE
    597                 print msg
    598                 print 'Failed to start a worker, probably Expect issues.'
    599                 reactor.stop()
    600                 sys.exit(-1)
    601         E = self.sage.expect()
    602         E.sendline('\n')
    603         E.expect('>>>')
    604         cmd = 'from sage.all import *;'
    605         cmd += 'from sage.all_notebook import *;'
    606         cmd += 'import sage.server.support as _support_; '
    607         cmd += 'import time;'
    608         cmd += 'import os;'
    609         E.sendline(cmd)
    610        
    611         if os.uname()[0].lower() == 'linux':
    612             try:
    613                 self.base_mem = int(self.sage.get_memory_usage())
    614             except:
    615                 pass
    616    
    617         self.get_job()
    618    
    619     def restart(self):
    620         """
    621         Restarts the current worker.
    622        
    623         """
    624        
    625         log.msg('[Worker: %s] Restarting...' % (self.id))
    626        
    627         if hasattr(self, 'base_mem'):
    628             try:
    629                 cur_mem = int(self.sage.get_memory_usage())
    630             except:
    631                 cur_mem = 0
    632         try:
    633             if hasattr(self, 'base_mem'):
    634                 if cur_mem >= (2 * self.base_mem):
    635                     self.stop(hard_reset=True)
    636             else:
    637                 from sage.dsage.misc.misc import timedelta_to_seconds
    638                 delta = datetime.datetime.now() - self.job_start_time
    639                 secs = timedelta_to_seconds(delta)
    640                 if secs >= (3*60): # more than 3 minutes, do a hard reset
    641                     self.stop(hard_reset=True)
    642                 else:
    643                     self.stop(hard_reset=False)
    644         except TypeError:
    645             self.stop(hard_reset=True)
    646         self.job_start_time = None
    647         self.start()
    648         self.reset_checker()
    649    
    650    
    651 class Monitor(pb.Referenceable):
    652     """
    653     Monitors control workers.
    654     They are able to shutdown workers and spawn them, as well as check on
    655     their status.
    656    
    657     """
    658    
    659     def __init__(self, server='localhost', port=8081, username=getuser(),
    660                  ssl=True, workers=2, authenticate=False, priority=20,
    661                  poll=1.0, log_level=0,
    662                  log_file=os.path.join(DSAGE_DIR, 'worker.log'),
    663                  pubkey_file=None, privkey_file=None):
    664         """
    665         :type server: string
    666         :param server: hostname of remote server
    667        
    668         :type port: integer
    669         :param port: port of remote server
    670        
    671         :type username: string
    672         :param username: username to use for authentication
    673        
    674         :type ssl: boolean
    675         :param ssl: specify whether or not to use SSL for the connection
    676        
    677         :type workers: integer
    678         :param workers: specifies how many workers to launch
    679        
    680         :type authenticate: boolean
    681         :param authenticate: specifies whether or not to authenticate
    682        
    683         :type priority: integer
    684         :param priority: specifies the UNIX priority of the workers
    685        
    686         :type poll: float
    687         :param poll: specifies how fast workers talk to the server in seconds
    688        
    689         :type log_level: integer
    690         :param log_level: specifies verbosity of logging, higher equals more
    691        
    692         :type log_file: string
    693         :param log_file: specifies the location of the log_file
    694            
    695         """
    696        
    697         self.server = server
    698         self.port = port
    699         self.username = username
    700         self.ssl = ssl
    701         self.workers = workers
    702         self.authenticate = authenticate
    703         self.priority = priority
    704         self.poll_rate = poll
    705         self.log_level = log_level
    706         self.log_file = log_file
    707         self.pubkey_file = pubkey_file
    708         self.privkey_file = privkey_file
    709        
    710         self.remoteobj = None
    711         self.connected = False
    712         self.reconnecting = False
    713         self.worker_pool = None
    714         self.sleep_time = 1.0
    715        
    716         self.host_info = HostInfo().host_info
    717        
    718         self.host_info['uuid'] = get_uuid()
    719         self.host_info['workers'] = self.workers
    720         self.host_info['username'] = self.username
    721        
    722         self._startLogging(self.log_file)
    723        
    724         try:
    725             os.nice(self.priority)
    726         except OSError, msg:
    727             log.msg('Error setting priority: %s' % (self.priority))
    728             pass       
    729         if self.authenticate:
    730             from twisted.cred import credentials
    731             from twisted.conch.ssh import keys
    732             self.DATA =  random_str(500)
    733             # public key authentication information
    734             self.pubkey = keys.Key.fromFile(self.pubkey_file)
    735             # try getting the private key object without a passphrase first
    736             try:
    737                 self.privkey = keys.Key.fromFile(self.privkey_file)
    738             except keys.BadKeyError:
    739                 pphrase = self._getpassphrase()
    740                 self.privkey = keys.Key.fromFile(self.privkey_file,
    741                                                   passphrase=pphrase)
    742             self.algorithm = 'rsa'
    743             self.blob = self.pubkey.blob()
    744             self.data = self.DATA
    745             self.signature = self.privkey.sign(self.data)
    746             self.creds = credentials.SSHPrivateKey(self.username,
    747                                                    self.algorithm,
    748                                                    self.blob,
    749                                                    self.data,
    750                                                    self.signature)
    751    
    752     def _startLogging(self, log_file):
    753         """
    754         :type log_file: string
    755         :param log_file: file name to log to
    756        
    757         """
    758        
    759         if log_file == 'stdout':
    760             log.startLogging(sys.stdout)
    761             log.msg('WARNING: Only loggint to stdout!')
    762         else:
    763             worker_log = open(log_file, 'a')
    764             log.startLogging(sys.stdout)
    765             log.startLogging(worker_log)
    766             log.msg("Logging to file: ", log_file)
    767            
    768     def _getpassphrase(self):
    769         import getpass
    770         passphrase = getpass.getpass('Passphrase (Hit enter for None): ')
    771        
    772         return passphrase
    773        
    774     def _connected(self, remoteobj):
    775         """
    776         Callback for connect.
    777        
    778         :type remoteobj: remote object
    779         :param remoteobj: remote obj
    780        
    781         """
    782        
    783         self.remoteobj = remoteobj
    784         self.remoteobj.notifyOnDisconnect(self._disconnected)
    785         self.connected = True
    786        
    787         if self.worker_pool == None: # Only pool workers the first time
    788             self.pool_workers(self.remoteobj)
    789         else:
    790             for worker in self.worker_pool:
    791                 worker.remoteobj = self.remoteobj # Update workers
    792                 if worker.job == None:
    793                     worker.restart()
    794    
    795     def _disconnected(self, remoteobj):
    796         """
    797         :type remoteobj: remote object
    798         :param remoteobj: remote obj
    799        
    800         """
    801        
    802         log.msg('Closed connection to the server.')
    803         self.connected = False
    804    
    805     def _got_killed_jobs(self, killed_jobs):
    806         """
    807         Callback for check_killed_jobs.
    808        
    809         :type killed_jobs: dict
    810         :param killed_jobs: dict of job jdicts which were killed
    811        
    812         """
    813        
    814         if killed_jobs == None:
    815             return
    816         killed_jobs = [expand_job(jdict) for jdict in killed_jobs]
    817         for worker in self.worker_pool:
    818             if worker.job is None:
    819                 continue
    820             if worker.free:
    821                 continue
    822             for job in killed_jobs:
    823                 if job is None or worker.job is None:
    824                     continue
    825                 if worker.job.job_id == job.job_id:
    826                     msg = 'Processing killed job, restarting...'
    827                     log.msg(LOG_PREFIX % worker.id + msg)
    828                     worker.restart()
    829    
    830     def _retryConnect(self):
    831         log.msg('[Monitor] Disconnected, reconnecting in %s' % (5.0))
    832         if not self.connected:
    833             reactor.callLater(5.0, self.connect)
    834    
    835     def _catchConnectionFailure(self, failure):               
    836         log.msg("Error: ", failure.getErrorMessage())
    837         log.msg("Traceback: ", failure.printTraceback())
    838         self._disconnected(None)
    839    
    840     def _catch_failure(self, failure):
    841         log.msg("Error: ", failure.getErrorMessage())
    842         log.msg("Traceback: ", failure.printTraceback())
    843        
    844     def connect(self):
    845         """
    846         This method connects the monitor to a remote PB server.
    847        
    848         """
    849        
    850         if self.connected: # Don't connect multiple times
    851             return
    852        
    853         self.factory = ClientFactory(self._login, (), {})
    854         cred = None
    855         if self.ssl:
    856             cred = X509Credentials()
    857             reactor.connectTLS(self.server, self.port, self.factory, cred)
    858         else:
    859             reactor.connectTCP(self.server, self.port, self.factory)
    860        
    861         log.msg(DELIMITER)
    862         log.msg('DSAGE Worker')
    863         log.msg('Started with PID: %s' % (os.getpid()))
    864         log.msg('Connecting to %s:%s' % (self.server, self.port))
    865         if cred is not None:
    866             log.msg('Using SSL: True')
    867         else:
    868             log.msg('Using SSL: False')
    869         log.msg(DELIMITER)
    870    
    871     def _login(self, *args, **kwargs):
    872         if self.authenticate:
    873             log.msg('Connecting as authenticated worker...\n')
    874             d = self.factory.login(self.creds, (self, self.host_info))
    875         else:
    876             from twisted.cred.credentials import Anonymous
    877             log.msg('Connecting as unauthenticated worker...\n')
    878             d = self.factory.login(Anonymous(), (self, self.host_info))
    879         d.addCallback(self._connected)
    880         d.addErrback(self._catchConnectionFailure)
    881            
    882         return d
    883        
    884     def pool_workers(self, remoteobj):
    885         """
    886         Creates the worker pool.
    887        
    888         """
    889 
    890         log.msg('[Monitor] Starting %s workers...' % (self.workers))
    891         self.worker_pool = [Worker(remoteobj, x, self.log_level,
    892                             self.poll_rate)
    893                             for x in range(self.workers)]
    894 
    895        
    896     def remote_set_uuid(self, uuid):
    897         """
    898         Sets the workers uuid.
    899         This is called by the server.
    900        
    901         """
    902        
    903         from sage.dsage.misc.misc import set_uuid
    904         set_uuid(uuid)
    905    
    906 
    907     def remote_calc_score(self, script):
    908         """
    909         Calculuates the worker score.
    910        
    911         :type script: string
    912         :param script: script to score the worker
    913        
    914         """
    915        
    916         from sage.dsage.misc.misc import exec_wrs
    917        
    918         return exec_wrs(script)
    919 
    920    
    921     def remote_kill_job(self, job_id):
    922         """
    923         Kills the job given the job id.
    924        
    925         :type job_id: string
    926         :param job_id: the unique job identifier.
    927        
    928         """
    929        
    930         print 'Killing %s' % (job_id)
    931         for worker in self.worker_pool:
    932             if worker.job != None:
    933                 if worker.job.job_id == job_id:
    934                     worker.restart()
    935        
    936        
    937 def usage():
    938     """
    939     Prints usage help.
    940 
    941     """
    942    
    943     from optparse import OptionParser
    944    
    945     usage = ['usage: %prog [options]\n',
    946               'Bug reports to <yqiang@gmail.com>']
    947     parser = OptionParser(usage=''.join(usage))
    948     parser.add_option('-s', '--server',
    949                       dest='server',
    950                       default='localhost',
    951                       help='hostname. Default is localhost')
    952     parser.add_option('-p', '--port',
    953                       dest='port',
    954                       type='int',
    955                       default=8081,
    956                       help='port to connect to. default=8081')
    957     parser.add_option('--poll',
    958                       dest='poll',
    959                       type='float',
    960                       default=5.0,
    961                       help='poll rate before checking for new job. default=5')
    962     parser.add_option('-a', '--authenticate',
    963                       dest='authenticate',
    964                       default=False,
    965                       action='store_true',
    966                       help='Connect as authenticate worker. default=True')
    967     parser.add_option('-f', '--logfile',
    968                       dest='logfile',
    969                       default=os.path.join(DSAGE_DIR, 'worker.log'),
    970                       help='log file')
    971     parser.add_option('-l', '--loglevel',
    972                       dest='loglevel',
    973                       type='int',
    974                       default=0,
    975                       help='log level. default=0')
    976     parser.add_option('--ssl',
    977                       dest='ssl',
    978                       action='store_true',
    979                       default=False,
    980                       help='enable or disable ssl')
    981     parser.add_option('--privkey',
    982                       dest='privkey_file',
    983                       default=os.path.join(DSAGE_DIR, 'dsage_key'),
    984                       help='private key file. default = ' +
    985                            '~/.sage/dsage/dsage_key')
    986     parser.add_option('--pubkey',
    987                       dest='pubkey_file',
    988                       default=os.path.join(DSAGE_DIR, 'dsage_key.pub'),
    989                       help='public key file. default = ' +
    990                            '~/.sage/dsage/dsage_key.pub')
    991     parser.add_option('-w', '--workers',
    992                       dest='workers',
    993                       type='int',
    994                       default=2,
    995                       help='number of workers. default=2')
    996     parser.add_option('--priority',
    997                       dest='priority',
    998                       type='int',
    999                       default=20,
    1000                       help='priority of workers. default=20')
    1001     parser.add_option('-u', '--username',
    1002                       dest='username',
    1003                       default=getuser(),
    1004                       help='username')
    1005     parser.add_option('--noblock',
    1006                       dest='noblock',
    1007                       action='store_true',
    1008                       default=False,
    1009                       help='tells that the server was ' +
    1010                            'started in blocking mode')
    1011     (options, args) = parser.parse_args()
    1012 
    1013     return options
    1014        
    1015 def main():
    1016     options = usage()
    1017     SSL = options.ssl
    1018     monitor = Monitor(server=options.server, port=options.port,
    1019                       username=options.username, ssl=SSL,
    1020                       workers=options.workers,
    1021                       authenticate=options.authenticate,
    1022                       priority=options.priority, poll=options.poll,
    1023                       log_file=options.logfile,
    1024                       log_level=options.loglevel,
    1025                       pubkey_file=options.pubkey_file,
    1026                       privkey_file=options.privkey_file)
    1027     monitor.connect()
    1028     try:
    1029         if options.noblock:
    1030             reactor.run(installSignalHandlers=0)
    1031         else:
    1032             reactor.run(installSignalHandlers=1)
    1033     except:
    1034         log.msg('Error starting the twisted reactor, exiting...')
    1035         sys.exit()
    1036 
    1037 if __name__ == '__main__':
    1038     usage()
    1039     main()
  • sage/dsage/server/server.py

    diff --git a/sage/dsage/server/server.py b/sage/dsage/server/server.py
    a b  
    11##############################################################################
    2 #                                                                     
    3 #  DSAGE: Distributed SAGE                     
    4 #                                                                             
    5 #       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>               
    6 #                                                                           
    7 #  Distributed under the terms of the GNU General Public License (GPL)       
     2#
     3#  DSAGE: Distributed SAGE
     4#
     5#       Copyright (C) 2006, 2007 Yi Qiang <yqiang@gmail.com>
     6#
     7#  Distributed under the terms of the GNU General Public License (GPL)
    88#
    99#    This code is distributed in the hope that it will be useful,
    1010#    but WITHOUT ANY WARRANTY; without even the implied warranty of
     
    2020
    2121from twisted.spread import pb
    2222from twisted.python import log
     23from twisted.internet import defer, task
     24
    2325from subprocess import Popen
    2426
    2527from sage.dsage.database.job import expand_job
    2628from sage.dsage.misc.misc import timedelta_to_seconds
    2729from sage.dsage.misc.constants import SERVER_LOG, WORKER_LOG
    2830
     31
    2932class DSageServer(pb.Root):
    3033    """
    3134    Distributed Sage server which does all the coordination of distributing
    3235    jobs, creating new jobs and accepting job submissions.
    33        
     36
    3437    """
    35    
     38
    3639    def __init__(self, jobdb, workerdb, clientdb, log_level=0):
    3740        """
    3841        Initializes the Distributed Sage PB Server.
    3942
    4043        :type jobdb: sage.dsage.database.jobdb.JobDatabaseSQLite
    4144        :param jobdb: a instance of the job database
    42        
     45
    4346        :type workerdb: sage.dsage.database.workerdb.WorkerDatabase
    4447        :param workerdb: instance of the monitor database
    45        
     48
    4649        :type log_level: integer
    4750        :param log_level: level of logging verbosity, higher is more verbose
    48        
     51
    4952        """
    5053
    5154        self.jobdb = jobdb
     
    5457        self.log_level = log_level
    5558        self.clients = []
    5659        self.workers = {}
     60        # Setting this to true results in NO authentication being made.
     61        self._testing = False
     62
     63    @defer.inlineCallbacks
     64    def push_jobs(self):
     65        """
     66        I get a list of free workers and try to push jobs to them.
     67        """
    5768       
    58         # Setting this to true results in NO authentication being made.
    59         self._testing = False
     69        log.msg("DEBUG: Trying to push jobs to workers...")
     70       
     71        for uuid, remoteobj in self.workers.iteritems():
     72            now = datetime.datetime.now()
     73            jdict = self.get_job()
     74            if jdict is None:
     75                return
     76            ready = yield remoteobj.callRemote('do_job', jdict)
     77            if not ready:
     78                jdict['status'] = 'new' 
     79                self.submit_job(jdict)
     80            jdict['uuid'] = uuid
     81            self.submit_job(jdict)
     82            if self.log_level > 3:
     83                log.msg('%s is ready: %s' % (uuid, ready))
     84                log.msg('time to call push_jobs: %s' % (datetime.datetime.now() - now))
     85               
     86    @defer.inlineCallbacks
     87    def push_job(self, monitor):
     88        jdict = self.get_job()
     89        if jdict is None:
     90            return
     91        else: # TODO: Need to add error handling
     92            result = yield monitor.callRemote('do_job', jdict)
     93       
     94        defer.returnValue(result)
    6095       
    6196    def get_job(self):
    6297        """
    6398        Returns a job to the client.
    64        
     99
    65100        This method returns the first job that has not been completed
    66101        in our job database.
    67        
     102
    68103        :type authenticated: boolean
    69104        :param authenticated: whether or not the requester is authenticated
    70        
     105
    71106        """
    72        
     107
    73108        job = self.jobdb.get_job()
    74109        if job == None:
    75110            if self.log_level > 3:
     
    82117            job.status = 'processing'
    83118            job.start_time = datetime.datetime.now()
    84119            self.jobdb.update_job(job)
    85            
     120
    86121        return job._reduce()
    87    
     122
    88123    def set_job_uuid(self, job_id, uuid):
    89124        """
    90125        Sets the job's universal unique identifer, which identifies the worker
    91126        that processed the job.
    92        
     127
    93128        :type job_id: string
    94129        :param job_id: unique job identifier
    95        
     130
    96131        :type uuid: string
    97132        :param uuid: universial unique identifier for the worker
    98        
     133
    99134        """
    100        
     135
    101136        return self.jobdb.set_job_uuid(job_id, uuid)
    102    
     137
    103138    def set_busy(self, uuid, busy):
    104139        """
    105140        Sets whether or not a particular worker is busy.
    106        
     141
    107142        :type uuid: string
    108143        :param uuid: universial unique identifier
    109        
     144
    110145        :type busy: boolean
    111146        :param busy: Whether or not the worker is busy
    112        
     147
    113148        """
    114        
     149
    115150        return self.workerdb.set_busy(uuid, busy=busy)
    116        
     151
    117152    def get_job_by_id(self, job_id):
    118153        """
    119         Returns a job by the job id. 
    120        
     154        Returns a job by the job id.
     155
    121156        :type job_id: string
    122157        :param job_id: unique job identifier
    123        
     158
    124159        """
    125        
     160
    126161        jdict = self.jobdb.get_job_by_id(job_id)._reduce()
    127        
     162
    128163        return jdict
    129164
    130165    def get_job_result_by_id(self, job_id):
     
    134169        :param job_id: unique job identifier
    135170
    136171        """
    137        
     172
    138173        job = self.jobdb.get_job_by_id(job_id)
    139        
     174
    140175        return job.result
    141176
    142177    def get_job_output_by_id(self, job_id):
     
    153188
    154189    def sync_job(self, job_id):
    155190        raise NotImplementedError
    156        
     191
    157192    def get_jobs_by_username(self, username, status):
    158193        """
    159         Returns jobs created by username. 
     194        Returns jobs created by username.
    160195
    161196        Parameters:
    162197        username -- the username (str)
     
    164199        """
    165200
    166201        jobs = self.jobdb.get_jobs_by_username(username, status)
    167            
     202
    168203        return [job._reduce() for job in jobs]
    169204
    170205    def submit_job(self, jdict):
    171206        """
    172207        Submits a job to the job database.
    173        
     208
    174209        Parameters:
    175210        jdict -- the internal dictionary of a Job object
    176        
    177         """
    178        
     211        """
     212
    179213        if self.log_level > 3:
    180214            log.msg('[submit_job] %s' % (jdict))
    181215        if jdict['code'] is None:
    182216            return False
    183217        if jdict['name'] is None:
    184218            jdict['name'] = 'Default'
    185        
     219
    186220        jdict['update_time'] = datetime.datetime.now()
    187        
     221
    188222        job_id = self.jobdb.store_jdict(jdict)
    189223        log.msg('Received job %s' % job_id)
    190        
     224
    191225        return job_id
    192        
     226
    193227    def get_all_jobs(self):
    194228        """
    195         Returns a list of all jobs in the database. 
    196        
     229        Returns a list of all jobs in the database.
     230
    197231        """
    198        
     232
    199233        return [job._reduce() for job in self.jobdb.get_all_jobs()]
    200        
     234
    201235    def get_active_jobs(self):
    202236        """
    203237        Returns a list of active jobs"""
     
    207241    def get_active_clients_list(self):
    208242        """
    209243        Returns a list of active clients.
    210        
     244
    211245        """
    212        
     246
    213247        raise NotImplementedError
    214248
    215249    def get_killed_jobs_list(self):
    216250        """
    217         Returns a list of killed job jdicts. 
    218        
     251        Returns a list of killed job jdicts.
     252
    219253        """
    220        
     254
    221255        killed_jobs = self.jobdb.get_killed_jobs_list()
    222        
     256
    223257        return [job._reduce() for job in killed_jobs]
    224258
    225259    def get_next_job_id(self):
    226260        """
    227261        Returns the next job id.
    228        
     262
    229263        """
    230        
     264
    231265        if self.log_level > 0:
    232266            log.msg('[DSage, get_next_job_id] Returning next job ID')
    233            
     267
    234268        return self.jobdb.get_next_job_id()
    235269
    236     def job_done(self, job_id, output, result, completed, cpu_time):
     270    def job_done(self, job_id, output, result, cpu_time):
    237271        """
    238272        job_done is called by the workers check_output method.
    239273
     
    242276        output -- the stdout from the worker (string)
    243277        result -- the result from the client (compressed pickle string)
    244278                  result could be 'None'
    245         completed -- whether or not the job is completed (bool)
     279        """
    246280
    247         """
    248        
    249281        if self.log_level > 0:
    250282            log.msg('[DSage, job_done] %s called back' % (job_id))
    251283        if self.log_level > 3:
    252284            log.msg('[DSage, job_done] output: %s ' % output)
    253             log.msg('[DSage, job_done] completed: %s ' % completed)
    254        
     285
    255286        output = str(output)
    256287        job = self.jobdb.get_job_by_id(job_id)
    257288        job.output += output
    258289        job.wall_time = datetime.datetime.now() - job.start_time
    259290        job.update_time = datetime.datetime.now()
    260         if completed:
    261             job.result = result
    262             job.cpu_time = cpu_time
    263             job.status = 'completed'
    264             job.finish_time = datetime.datetime.now()
     291        job.result = result
     292        job.cpu_time = cpu_time
     293        job.status = 'completed'
     294        job.finish_time = datetime.datetime.now()
    265295        self.jobdb.sess.save_or_update(job)
    266296        self.jobdb.sess.commit()
    267        
     297
    268298        return job_id
    269299
    270300    def job_failed(self, job_id, traceback):
     
    273303
    274304        Parameters:
    275305        job_id -- the job id (str)
    276        
     306
    277307        """
    278    
     308
    279309        job = self.jobdb.get_job_by_id(job_id)
    280310        job.failures += 1
    281311        job.output = traceback
    282        
     312
    283313        if job.failures > self.jobdb.failure_threshold:
    284314            job.status = 'failed'
    285315        else:
    286316            job.status = 'new' # Put job back in the queue
    287        
     317
    288318        if self.log_level > 1:
    289319            s = ['[DSage, job_failed] Job %s failed ' % (job_id),
    290320                 '%s times. ' % (job.failures)]
     
    292322            if job.status == 'failed':
    293323                msg = '%s failed, removing from queue.' % (job_id)
    294324                log.msg(msg)
     325
     326        job.update_time = datetime.datetime.now()
    295327       
    296         job.update_time = datetime.datetime.now()
    297            
    298328        return self.jobdb.store_jdict(job._reduce())
    299329
    300330    def kill_job(self, job_id):
    301331        """
    302         Kills a job. 
     332        Kills a job.
    303333
    304334        Marks as job as killed and moves it to the killed jobs database.
    305        
     335
    306336        """
    307        
     337
    308338        try:
    309339            job = self.jobdb.set_killed(job_id, killed=True)
    310340            if self.log_level > 0:
     
    313343            log.err(msg)
    314344            log.msg('Failed to kill job %s' % job_id)
    315345            return None
     346       
     347        if job.uuid:
     348            self.workers[job.uuid].callRemote('kill_job', job_id)
    316349
    317         try:
    318             self.workers[job.uuid].callRemote('kill_job', job_id)
    319         except KeyError:
    320             pass
    321            
    322350        return job_id
    323351
    324352    def get_worker_list(self):
    325353        """
    326354        Returns a list of workers as a 3 tuple.
    327355
    328         tuple[0] = broker object 
     356        tuple[0] = broker object
    329357        tuple[1] = ip
    330358        tuple[2] = port
    331359
    332360        """
    333        
     361
    334362        return self.workerdb.get_worker_list()
    335    
     363
    336364    def get_client_list(self):
    337365        """
    338366        Returns a list of clients.
    339        
     367
    340368        """
    341        
     369
    342370        return [c.username for c in self.clientdb.get_client_list()]
    343371
    344372    def get_cluster_speed(self):
     
    347375
    348376        """
    349377        raise NotImplementedError
    350        
    351    
     378
    352379    def get_worker_count(self):
    353380        """
    354381        Returns a list of busy and free workers.
    355        
     382
    356383        """
    357        
     384
    358385        count = {}
    359386        free_workers = self.workerdb.get_worker_count(connected=True,
    360387                                                       busy=False)
    361388        working_workers = self.workerdb.get_worker_count(connected=True,
    362389                                                          busy=True)
    363        
     390
    364391        count['free'] = free_workers
    365392        count['working'] = working_workers
    366        
     393
    367394        return count
    368    
     395
    369396    def upgrade_workers(self):
    370397        """
    371398        Upgrades the connected workers to the latest SAGE version.
    372        
     399
    373400        """
    374        
     401
    375402        raise NotImplementedError
    376    
     403
    377404    def read_log(self, n, kind):
    378405        """
    379406        Returns the last n lines of the server log.
    380407        Defaults to returning the last 50 lines of the server log.
    381408        """
    382        
     409
    383410        if kind == 'server':
    384411            log_file = SERVER_LOG
    385412        elif kind == 'worker':
     
    388415            log = os.popen('tail -n %s %s' % (n, log_file)).read()
    389416        except:
    390417            log = "Error reading %s" % log_file
    391            
    392         return log
    393  No newline at end of file
     418
     419        return log
  • sage/dsage/server/tests/test_server.py

    diff --git a/sage/dsage/server/tests/test_server.py b/sage/dsage/server/tests/test_server.py
    a b  
    161161        completed = True
    162162        job_id = self.dsage_server.job_done(job.job_id,
    163163                                            output, result,
    164                                             completed,
    165164                                            time.time() - time.time())
    166165        job = expand_job(self.dsage_server.get_job_by_id(job_id))
    167166        self.assertEquals(job.output, output)
     
    173172        output = 'testing'
    174173        completed = False
    175174        job_id = self.dsage_server.job_done(job.job_id, output, result,
    176                                             completed,
    177175                                            time.time() - time.time())
    178176        job = expand_job(self.dsage_server.get_job_by_id(job_id))
    179177        self.assert_(isinstance(job.output, str))
    180         self.assert_(job.status != 'completed')
     178        self.assert_(job.status == 'completed')
    181179       
    182180    def testjob_failed(self):
    183181        job = expand_job(self.dsage_server.get_job())
  • sage/dsage/tests/testdoc.py

    diff --git a/sage/dsage/tests/testdoc.py b/sage/dsage/tests/testdoc.py
    a b  
    11r"""
    22WARNING:
    33The following examples will not work if you have not run \code{dsage.setup()}.
    4     sage: from sage.dsage.misc.misc import find_open_port
    5     sage: port = find_open_port().next()
    6     sage: dsage.server(blocking=False, port=port, verbose=False, ssl=False, log_level=3)
     4    sage: from sage.dsage.misc.misc import test_dsage
     5    sage: d = test_dsage(dsage)
    76    Going into testing mode...
    8     sage: dsage.worker(blocking=False, port=port, verbose=False, ssl=False, log_level=3, poll=0.1, authenticate=False)
    9     sage: sleep(2.0)
    10     sage: d = dsage.connect(port=port, ssl=False)
    11     sage: sleep(2.0)
    127    sage: a = d('2 + 3')
    138    sage: a.wait(timeout=30)
    149    sage: a
     
    2116   
    2217Set timeout to 30 seconds so it will not hang the doctests indefinitely.
    2318
    24     sage: _ = [x.wait(timeout=30) for x in v]    # long time
    25     sage: print v                                # long time
     19    sage: _ = [x.wait(timeout=30) for x in v]   
     20    sage: print v                               
    2621    [10000, 10201, 10404]
    2722   
    2823    sage: _ = [x.kill() for x in v]
     
    3227    sage: j.wait()
    3328    sage: j
    3429    10
    35     sage: t = DistributedFunctionTest(d, 5) # long time
    36     sage: t.wait(timeout=60) # long time
    37     sage: t.done # long time
     30    sage: t = DistributedFunctionTest(d, 5)
     31    sage: t.wait(timeout=10)
     32    sage: t.done
    3833    True
    39     sage: t.result # long time
     34    sage: t.result
    4035    15
    4136   
    4237    The following code block makes sure that things exit cleanly
  • sage/dsage/twisted/pb.py

    diff --git a/sage/dsage/twisted/pb.py b/sage/dsage/twisted/pb.py
    a b  
    4040    """
    4141    Custom implementation of the ClientFactory that supports logging in
    4242    with public key as well as unauthenticated credentials.
    43    
    4443    """
    45    
     44
     45    factor = 1.6180339887498948
     46
    4647    def __init__(self, cb, args, kwargs):
    4748        PBClientFactory.__init__(self)
    4849        self._observer = (cb, args, kwargs)
    49    
     50
     51    # I am overwriting this method from ReconnectingClientFactory so that
     52    # resetDelay does not actually reset the factory's retryConnection
     53    # ivar
     54    def resetDelay(self):
     55        """Call me after a successful connection to reset.
     56
     57        I reset the delay and the retry counter.
     58        """
     59
     60        self.delay = self.initialDelay
     61        self.retries = 0
     62        self._callID = None
     63
    5064    def login(self, creds, mind=None):
    5165        if ISSHPrivateKey.providedBy(creds):
    5266            d = self.getRootObject()
     
    6478            return d
    6579        else:
    6680            raise TypeError('Invalid credentials.')
    67    
     81
    6882    def _cbSendUsername(self, root, username, algorithm, blob, sig_data,
    6983                        signature, mind):
    7084        d = root.callRemote("login", username, algorithm, blob, sig_data,
    7185                                signature, mind)
    7286        return d
    73    
     87
    7488    def _cbAnonymousLogin(self, root, mind):
    7589        d = root.callRemote("login_authenticate", mind)
    76        
     90
    7791        return d
    78    
     92
    7993    def startConnecting(self, server, port, cred=None):
    8094        if cred:
    8195            reactor.connectTLS(self.server, self.port, self.factory,
    8296                               cred)
    8397        else:
    8498            reactor.connectTCP(self.server, self.port, self.factory)
    85    
     99
    86100    def clientConnectionMade(self, broker):
    87101        PBClientFactory.clientConnectionMade(self, broker)
     102        self.resetDelay()
    88103        cb, args, kwargs = self._observer
    89104        cb(self._root, *args, **kwargs)
    90    
     105
    91106    def clientConnectionLost(self, connector, reason, reconnecting=0):
    92107        PBClientFactory.clientConnectionLost(self, connector, reason,
    93108                                             reconnecting=1)
    94109        ReconnectingClientFactory.clientConnectionLost(self, connector,
    95110                                                       reason)
    96    
     111
    97112    def clientConnectionFailed(self, connector, reason):
    98113        PBClientFactory.clientConnectionFailed(self, connector, reason)
    99114        ReconnectingClientFactory.clientConnectionFailed(self, connector,
     
    110125                                                blob,
    111126                                                data,
    112127                                                signature)
    113        
     128
    114129        d = self.portal.login(pubkey_cred, mind, IPerspective)
    115130        d.addCallback(self._loggedIn)
    116        
     131
    117132        return d
    118    
     133
    119134    def remote_login_authenticate(self, mind):
    120135        d = self.portal.login(Anonymous(), mind, IPerspective)
    121136        d.addCallback(self._loggedIn)
    122        
     137
    123138        return d
    124    
     139
    125140    def _loggedIn(self, (interface, perspective, logout)):
    126141        if not IJellyable.providedBy(perspective):
    127142            perspective = AsReferenceable(perspective, "perspective")
    128143        self.broker.notifyOnDisconnect(logout)
    129        
     144
    130145        return perspective
    131146
    132147class DefaultPerspective(pb.Avatar):
    133148    """
    134149    Custom implementation of pb.Avatar.
    135    
     150
    136151    """
    137    
     152
    138153    def __init__(self, dsage_server, avatarID):
    139154        self.dsage_server = dsage_server
    140155        self.avatarID = avatarID
     
    143158        self.clientdb = self.dsage_server.clientdb
    144159        self.kind = ''
    145160        self.host_info = []
    146    
     161
    147162    def __repr__(self):
    148163        return "<%s:'%s'>" % (self.kind, self.avatarID)
    149    
     164
    150165    def perspectiveMessageReceived(self, broker, message, args, kw):
    151166        self.broker = broker
    152        
     167
    153168        return pb.Avatar.perspectiveMessageReceived(self, broker,
    154169                                                    message, args, kw)
    155    
    156    
     170
     171
    157172    def attached(self, avatar, mind):
    158173        self.connections += 1
    159174        log.msg('%s connected' % avatar)
    160    
    161    
     175
     176
    162177    def detached(self, avatar, mind):
    163178        self.connections -= 1
    164179        log.msg('%s disconnected' % avatar)
    165180
    166181
    167 
    168182class AnonymousWorker(DefaultPerspective):
    169183    """
    170184    Defines the perspective of an authenticate worker.
    171    
     185
    172186    """
    173    
     187
    174188    def __init__(self, dsage_server, avatarID):
    175189        DefaultPerspective.__init__(self, dsage_server, avatarID)
     190        self.uuid = None
    176191       
    177192    def attached(self, avatar, mind):
    178193        DefaultPerspective.attached(self, avatar, mind)
    179         self.monitor = mind[0]
    180         self.host_info = mind[1]
    181         self.host_info['ip'] = mind[0].broker.transport.getPeer().host
    182         self.host_info['port'] = mind[0].broker.transport.getPeer().port
     194        if isinstance(mind[0], pb.RemoteReference):
     195            self.remoteobj = mind[0]
     196        else:
     197            raise ValueError("Worker must supply a pb.RemoteReference.")
     198        if isinstance(mind[1], dict):
     199            self.host_info = mind[1]
     200            self.host_info['ip'] = mind[0].broker.transport.getPeer().host
     201            self.host_info['port'] = mind[0].broker.transport.getPeer().port
     202            log.msg('Worker connected from %s:%s' % (self.host_info['ip'],
     203                                                     self.host_info['port']))
     204        else:
     205            log.msg('Worker did not supply a hostinfo, disconnecting')
     206            self.remoteobj.broker.transport.loseConnection()
     207            return
    183208        if check_uuid(self.host_info['uuid']):
    184209            uuid = self.host_info['uuid']
    185210            if self.workerdb.get_worker(uuid) is None:
     
    188213                self.workerdb.update_worker(self.host_info)
    189214        else:
    190215            uuid = gen_uuid()
    191             if isinstance(self.monitor, pb.RemoteReference):
    192                 d = self.monitor.callRemote('set_uuid', uuid)
    193                 self.host_info['uuid'] = uuid
    194                 self.workerdb.add_worker(self.host_info)
     216            d = self.remoteobj.callRemote('set_uuid', uuid)
     217            self.host_info['uuid'] = uuid
     218            self.workerdb.add_worker(self.host_info)
    195219        self.uuid = uuid
    196220        self.workerdb.set_connected(uuid, connected=True)
    197221        self.workerdb.set_authenticated(uuid, False)
    198         self.dsage_server.workers[uuid] = mind[0]
    199        
     222        self.dsage_server.workers[uuid] = self.remoteobj
     223
    200224        return uuid
    201    
     225
    202226    def detached(self, avatar, mind):
    203227        DefaultPerspective.detached(self, avatar, mind)
    204         self.workerdb.set_connected(self.uuid, connected=False)
    205         del self.dsage_server.workers[self.uuid]
    206    
     228        if self.uuid:
     229            self.workerdb.set_connected(self.uuid, connected=False)
     230        try:
     231            del self.dsage_server.workers[self.uuid]
     232        except KeyError:
     233            log.msg('Could not find %s in list of workers' % self.uuid)
     234        del self.remoteobj
     235
    207236    def perspective_get_job(self):
    208237        """
    209238        Returns jobs only marked as doable by authenticate workers.
    210        
     239
    211240        """
    212        
     241
    213242        uuid = self.host_info['uuid']
    214243        jdict = self.dsage_server.get_job()
    215         if jdict is not None:
    216             self.dsage_server.set_job_uuid(jdict['job_id'], uuid)
    217             self.dsage_server.set_busy(uuid, True)
    218         else:
    219             self.dsage_server.set_busy(uuid, False)
     244
     245        if jdict is None:
     246            return
    220247       
     248        self.dsage_server.set_job_uuid(jdict['job_id'], uuid)
     249        self.dsage_server.set_busy(uuid, True)
     250
    221251        return jdict
    222    
     252
    223253    def perspective_get_killed_jobs_list(self):
    224254        return self.dsage_server.get_killed_jobs_list()
    225    
     255
    226256    def perspective_job_failed(self, job_id, traceback):
    227257        if not isinstance(job_id, str):
    228258            log.msg('Bad job_id %s' % (job_id))
    229259            raise BadTypeError()
    230        
     260
    231261        uuid = self.host_info['uuid']
    232262        self.dsage_server.set_busy(uuid, False)
    233        
     263
    234264        return self.dsage_server.job_failed(job_id, traceback)
    235    
    236     def perspective_job_done(self, job_id, output, result, completed,
    237                              cpu_time):
    238         if not (isinstance(job_id, str) or isinstance(completed, bool)):
     265
     266    def perspective_job_done(self, job_id, output, result, cpu_time):
     267        if not isinstance(job_id, str):
    239268            log.msg('Bad job_id passed to perspective_job_done')
    240269            log.msg('job_id: %s' % (job_id))
    241270            log.msg('output: %s' % (output))
    242             log.msg('completed: %s' % (completed))
    243271            raise BadTypeError()
    244         if completed:
    245             uuid = self.host_info['uuid']
    246             self.dsage_server.set_busy(uuid, False)
    247        
    248         return self.dsage_server.job_done(job_id, output, result, completed,
    249                                          cpu_time)
     272        uuid = self.host_info['uuid']
     273        self.dsage_server.set_busy(uuid, False)
     274
     275        return self.dsage_server.job_done(job_id, output, result, cpu_time)
     276
    250277
    251278class Worker(AnonymousWorker):
    252279    """
    253280    Defines the perspective of an authenticated worker to the server.
    254    
     281
    255282    """
    256283    def __init__(self, dsage_server, avatarID):
    257284        DefaultPerspective.__init__(self, dsage_server, avatarID)
    258    
     285
    259286    def attached(self, avatar, mind):
    260287        uuid = AnonymousWorker.attached(self, avatar, mind)
    261288        self.workerdb.set_authenticated(uuid, True)
    262289        return uuid
    263    
    264     def perspective_get_job(self):
    265         """
    266         Returns jobs to authenticated workers.
    267        
    268         """
    269        
    270         try:
    271             uuid = self.host_info['uuid']
    272         except Exception, msg:
    273             raise ValueError("Could not match a uuid to the monitor.")
    274        
    275         jdict = self.dsage_server.get_job()
    276         if jdict is not None:
    277             self.dsage_server.set_job_uuid(jdict['job_id'], uuid)
    278             self.dsage_server.set_busy(uuid, True)
    279        
    280         return jdict
     290
    281291
    282292class Client(DefaultPerspective):
    283293    """
    284294    Defines the perspective of a regular user to the server.
    285    
     295
    286296    """
    287    
     297
    288298    def __init__(self, dsage_server, avatarID):
    289299        DefaultPerspective.__init__(self, dsage_server, avatarID)
    290    
     300
    291301    def attached(self, avatar, mind):
    292302        DefaultPerspective.attached(self, avatar, mind)
    293303        self.clientdb.set_connected(self.avatarID, connected=True)
    294304        self.clientdb.update_login_time(self.avatarID)
    295305        self.dsage_server.clients.append(self)
    296    
     306
    297307    def detached(self, avatar, mind):
    298308        DefaultPerspective.detached(self, avatar, mind)
    299309        self.clientdb.set_connected(self.avatarID, connected=False)
    300310        self.dsage_server.clients.remove(self)
    301    
     311
    302312    def perspective_get_job_by_id(self, job_id):
    303313        if not isinstance(job_id, str):
    304314            log.msg('Bad job_id [%s] passed to get_job_by_id' % (job_id))
    305315            raise BadTypeError()
    306316        job = self.dsage_server.get_job_by_id(job_id)
    307        
     317
    308318        return job
    309    
     319
    310320    def perspective_get_jobs_by_username(self, username, status):
    311321        if not (isinstance(username, str)):
    312322            log.msg('Bad username [%s] passed to ' +
    313323                    'perspective_get_jobs_by_username' % (username))
    314324            raise BadTypeError()
    315        
     325
    316326        jobs = self.dsage_server.get_jobs_by_username(username, status)
    317        
     327
    318328        return jobs
    319    
     329
    320330    def perspective_get_job_result_by_id(self, job_id):
    321331        if not isinstance(job_id, str):
    322332            log.msg('Bad job_id [%s] passed to' +
    323333                    'perspective_get_job_result_by_id' % (job_id))
    324334            raise BadTypeError()
    325        
     335
    326336        return self.dsage_server.get_job_result_by_id(job_id)
    327    
     337
    328338    def perspective_get_job_output_by_id(self, job_id):
    329339        if not isinstance(job_id, str):
    330340            log.msg('Bad job_id [%s] passed to ' +
    331341                    'get_job_output_by_id' % (job_id))
    332342            raise BadTypeError()
    333        
     343
    334344        return self.dsage_server.get_job_output_by_id(job_id)
    335    
     345
    336346    def perspective_sync_job(self, job_id):
    337347        if not isinstance(job_id, str):
    338348            return None
    339        
     349
    340350        return self.dsage_server.sync_job(job_id)
    341    
     351
    342352    def perspective_submit_job(self, jdict):
    343353        if jdict is None:
    344354            raise BadJobError()
    345            
    346         return self.dsage_server.submit_job(jdict)
    347    
     355
     356        job_id = self.dsage_server.submit_job(jdict)
     357        self.dsage_server.push_jobs()
     358       
     359        return job_id
     360
    348361    def perspective_web_server_url(self):
    349362        return "http://localhost:%s" % self.dsage_server.web_port
    350        
     363
    351364    def perspective_kill_job(self, job_id):
    352365        if not isinstance(job_id, str):
    353366            log.msg('Bad job_id [%s] passed to perspective_kill_job' % job_id)
    354367            raise BadTypeError()
    355        
     368
    356369        return self.dsage_server.kill_job(job_id)
    357    
     370
    358371    def perspective_get_cluster_speed(self):
    359372        return self.dsage_server.get_cluster_speed()
    360    
     373
    361374    def perspective_get_worker_list(self):
    362375        # return [x[1] for x in self.dsage_server.get_worker_list()]
    363376        return self.dsage_server.get_worker_list()
    364    
     377
    365378    def perspective_get_client_list(self):
    366379        return self.dsage_server.get_client_list()
    367    
     380
    368381    def perspective_get_worker_count(self):
    369382        return self.dsage_server.get_worker_count()
    370    
     383
    371384    def perspective_get_killed_jobs_list(self):
    372385        return self.dsage_server.get_killed_jobs_list()
    373    
     386
    374387    def perspective_read_log(self, n, kind):
    375388        return self.dsage_server.read_log(n, kind)
    376389
     
    378391class Admin(Client, Worker):
    379392    """
    380393    Defines the perspective of the admin.
    381    
     394
    382395    """
    383    
     396
    384397    def __init__(self, dsage_server, avatarID):
    385398        Client.__init__(self, dsage_server, avatarID)
     399
    386400
    387401class Tester(Client, Worker):
    388402    def __init__(self, dsage_server, avatarID):
    389403        DefaultPerspective.__init__(self, dsage_server, avatarID)
    390    
     404
    391405    def attached(self, avatar, mind):
    392406        try:
    393407            Worker.attached(self, avatar, mind)
    394408        except:
    395409            Client.attached(self, avatar, mind)
    396    
     410
    397411    def detached(self, avatar, mind):
    398412        try:
    399413            Worker.detached(self, avatar, mind)
    400414        except:
    401415            Client.attached(self, avatar, mind)
    402            
     416
     417
    403418class Realm(object):
    404419    implements(portal.IRealm)
    405    
     420
    406421    def __init__(self, dsage_server):
    407422        self.dsage_server = dsage_server
    408         self.max_connections = 1000
    409423        self.client_avatars = {}
    410        
     424        self.connections = 0
     425
    411426    def requestAvatar(self, avatarID, mind, *interfaces):
    412427        if not pb.IPerspective in interfaces:
    413428            raise NotImplementedError("No supported avatar interface.")
    414429        else:
     430            # testing mode, this should only be set for Sage doctests and
     431            # debugging purposes.
    415432            if self.dsage_server._testing:
    416433                kind = 'tester'
    417434                avatarID = 'tester'
     
    439456                    avatar = Worker(self.dsage_server, avatarID)
    440457        avatar.kind = kind
    441458        avatar.attached(avatar, mind)
    442         self.max_connections += 1
    443         if avatar.connections >= self.max_connections:
    444             raise ValueError('Too many connections.')
    445                
     459        self.connections += 1
     460
    446461        return pb.IPerspective, avatar, lambda a = avatar:a.detached(avatar,
    447462                                                                     mind)
  • sage/dsage/twisted/pubkeyauth.py

    diff --git a/sage/dsage/twisted/pubkeyauth.py b/sage/dsage/twisted/pubkeyauth.py
    a b  
    5757        if client == None:
    5858            log.msg("Invalid username: '%s'" % credentials.username)
    5959            return defer.fail(AuthenticationError('Login failed.'))
     60        if client.username == 'tester': # Testing case.
     61            return credentials.username
    6062        try:
    6163            pubkey = keys.Key.fromString(credentials.blob, type='blob')
    6264        except:
     
    7678        else:
    7779            log.msg('Invalid signature for user %s' % (credentials.username))
    7880            return defer.fail(AuthenticationError('Login failed.'))
    79    
    80  No newline at end of file
     81   
  • sage/dsage/twisted/tests/test_pubkeyauth.py

    diff --git a/sage/dsage/twisted/tests/test_pubkeyauth.py b/sage/dsage/twisted/tests/test_pubkeyauth.py
    a b  
    7777                range(500)]])
    7878
    7979class PublicKeyCredentialsCheckerTest(unittest.TestCase):
    80     username = 'tester'
     80    username = 'testing123'
    8181    test_db = tempfile.NamedTemporaryFile()
    8282   
    8383    def setUp(self):
  • sage/dsage/twisted/tests/test_remote.py

    diff --git a/sage/dsage/twisted/tests/test_remote.py b/sage/dsage/twisted/tests/test_remote.py
    a b  
    208208        import time
    209209        job_id = jdict['job_id']
    210210        result = jdict['result']
    211         d = remoteobj.callRemote('job_done', job_id,
    212                                  'Nothing.', result, True,
     211        d = remoteobj.callRemote('job_done', job_id, 'Nothing.', result,
    213212                                 time.time() - time.time())
    214213        d.addCallback(self._done_job)
    215214       
  • sage/dsage/web/web_server.py

    diff --git a/sage/dsage/web/web_server.py b/sage/dsage/web/web_server.py
    a b  
    283283        <tbody>
    284284        """
    285285       
    286         for worker in self.workerdb.get_worker_list():
     286        for worker in self.workerdb.get_worker_list(filter={'connected': True}):
    287287            html += """
    288288            <tr>
    289289                <td>%s</td>
     
    478478            except Exception, msg:
    479479                return http.Response(stream=msg)
    480480            return http.Response(stream="User %s deleted!" % username)
    481         return http.Response(stream=self.gen_html())
    482  No newline at end of file
     481        return http.Response(stream=self.gen_html())
  • new file sage/dsage/worker/monitor.py

    diff --git a/sage/dsage/worker/__init__.py b/sage/dsage/worker/__init__.py
    new file mode 100644
    diff --git a/sage/dsage/worker/monitor.py b/sage/dsage/worker/monitor.py
    new file mode 100644
    - +  
     1"""
     2This file contains the DSage Monitor. It is responsible for constructing a
     3process pool for Sage instances and dispatching jobs to them.
     4"""
     5
     6import os
     7import sys
     8from getpass import getuser, getpass
     9import itertools
     10gen = itertools.count()
     11
     12from twisted.internet import reactor, defer, error
     13from twisted.python import log
     14from twisted.spread import pb
     15
     16from gnutls.constants import *
     17from gnutls.crypto import *
     18from gnutls.errors import *
     19from gnutls.interfaces.twisted import X509Credentials
     20
     21
     22from sage.dsage.twisted.pb import ClientFactory
     23from sage.dsage.misc.constants import DSAGE_DIR, DELIMITER
     24from sage.dsage.misc.hostinfo import HostInfo
     25from sage.dsage.misc.misc import gen_uuid, random_str
     26from sage.dsage.worker.process import SageProcessPool
     27
     28class DSageMonitor(pb.Referenceable):
     29    """
     30    I am the monitor responsible for controlling a Sage process pool. I also
     31    handle the remote connection to the DSage server.
     32    """
     33
     34    LOG_PREFIX = 'Monitor: '
     35    LOG_FILE = os.path.join(DSAGE_DIR, 'worker.log')
     36
     37    def __init__(self, server='localhost', port=8081, username=getuser(),
     38                 ssl=True, workers=2, authenticate=False, priority=20,
     39                 log_level=0, log_file=LOG_FILE,
     40                 pubkey_file=None, privkey_file=None, **kwds):
     41        """
     42        :type server: string
     43        :param server: hostname of remote server
     44
     45        :type port: integer
     46        :param port: port of remote server
     47
     48        :type username: string
     49        :param username: username to use for authentication
     50
     51        :type ssl: boolean
     52        :param ssl: specify whether or not to use SSL for the connection
     53
     54        :type workers: integer
     55        :param workers: specifies how many workers to launch
     56
     57        :type authenticate: boolean
     58        :param authenticate: specifies whether or not to authenticate
     59
     60        :type priority: integer
     61        :param priority: specifies the UNIX priority of the workers
     62
     63        :type log_level: integer
     64        :param log_level: specifies verbosity of logging, higher equals more
     65
     66        :type log_file: string
     67        :param log_file: specifies the location of the log_file
     68        """
     69
     70        self.server = server
     71        self.port = port
     72        self.username = username
     73        if username is None:
     74            self.username = 'Anonymous'
     75        self.ssl = ssl
     76        self.workers = workers
     77        self.authenticate = authenticate
     78        self.priority = priority
     79        self.log_level = log_level
     80        self.log_file = log_file
     81        self.pubkey_file = pubkey_file
     82        self.privkey_file = privkey_file
     83        self.host_info = HostInfo().host_info
     84        self.host_info['uuid'] = gen_uuid()
     85        self.host_info['workers'] = self.workers
     86        self.process_pool = SageProcessPool(self, workers)
     87        self.process_pool.start()
     88
     89        if self.authenticate:
     90            from twisted.cred import credentials
     91            from twisted.conch.ssh import keys
     92            self.data =  random_str(500)
     93            self.pubkey = keys.Key.fromFile(self.pubkey_file)
     94            try:
     95                self.privkey = keys.Key.fromFile(self.privkey_file)
     96            except keys.BadKeyError:
     97                pphrase = self._getpassphrase()
     98                self.privkey = keys.Key.fromFile(self.privkey_file,
     99                                                  passphrase=pphrase)
     100            self.algorithm = 'rsa'
     101            self.blob = self.pubkey.blob()
     102            self.signature = self.privkey.sign(self.data)
     103            self.creds = credentials.SSHPrivateKey(self.username,
     104                                                   self.algorithm,
     105                                                   self.blob,
     106                                                   self.data,
     107                                                   self.signature)
     108
     109    def _getpassphrase(self):
     110        """
     111        Prompt the user for a passphrase.
     112        """
     113
     114        passphrase = getpass('Passphrase (Hit enter for None): ')
     115       
     116        return passphrase
     117
     118    @defer.inlineCallbacks
     119    def _login(self, *args, **kwargs):
     120        """
     121        I am called by the ClientFactory to login to the remote server.
     122        """
     123
     124        if self.authenticate:
     125            log.msg('Connecting as authenticated worker...\n')
     126            remoteobj = yield self.factory.login(self.creds,
     127                                                 (self, self.host_info))
     128        else:
     129            from twisted.cred.credentials import Anonymous
     130            log.msg('Connecting as unauthenticated worker...\n')
     131            remoteobj = yield self.factory.login(Anonymous(),
     132                                                 (self, self.host_info))
     133        if remoteobj:
     134            self.remoteobj = remoteobj
     135            self.remoteobj.notifyOnDisconnect(self._disconnected)
     136
     137        defer.returnValue(self.remoteobj)
     138
     139    def _disconnected(self, remoteobj):
     140        """
     141        :type remoteobj: remote object
     142        :param remoteobj: remote obj
     143        """
     144
     145        log.msg('Lost connection to the server.')
     146
     147    def connect(self):
     148        """
     149        This method connects the monitor to a remote PB server.
     150        """
     151       
     152        self.factory = ClientFactory(self._login, (), {})
     153        self.factory.continueTrying = True
     154        cred = None
     155        if self.ssl:
     156            cred = X509Credentials()
     157            reactor.connectTLS(self.server, self.port, self.factory, cred)
     158        else:
     159            reactor.connectTCP(self.server, self.port, self.factory)
     160
     161        log.msg(DELIMITER)
     162        log.msg('DSAGE Monitor')
     163        log.msg('Started with PID: %s' % (os.getpid()))
     164        log.msg('Connecting to %s:%s' % (self.server, self.port))
     165        if cred is not None:
     166            log.msg('Using SSL: True')
     167        else:
     168            log.msg('Using SSL: False')
     169        log.msg(DELIMITER)
     170
     171    def get_job(self):
     172        """
     173        I retrieve a job from the server.
     174        """
     175
     176        log.msg('Getting job...')
     177
     178        return self.remoteobj.callRemote('get_job')
     179
     180    @defer.inlineCallbacks
     181    def job_done(self, job_id, output, result, cpu_time=None):
     182        """
     183        Reports to the server that a job has finished. It also reports partial
     184        completeness by presenting the server with new output.
     185        Parameters:
     186
     187        :type output: string
     188        :param output: output of command (to sys.stdout)
     189        :type result: python pickle
     190        :param result: result of the job
     191        :type cpu_time: string
     192        :param cpu_time: how long the job took
     193        """
     194
     195        wait = 1.0
     196        self.busy = False
     197        try:
     198            result = yield self.remoteobj.callRemote('job_done',
     199                                                     job_id, output,
     200                                                     result, cpu_time)
     201        except:
     202            log.msg('Error trying to submit job status...')
     203            log.msg('Retrying to submit again in %s seconds...' % wait)
     204            reactor.callLater(wait, self.job_done, output, result, cpu_time)
     205            result = defer.Deferred()
     206            result.errback(error.ConnectionLost())
     207            yield result
     208
     209
     210    def do_job(self, jdict):
     211        """
     212        I try to execute a jdict if my ProcessPool is ready.
     213        Otherwise I return False.
     214        """
     215
     216        if self.process_pool.ready:
     217            log.msg("[Monitor] do_job, size of ready: ",
     218                    len(self.process_pool.ready))
     219            self.process_pool.do_job(jdict)
     220            return True
     221
     222        return False
     223
     224    def _job_failed(self, result):
     225        """
     226        I report failure of a job.
     227       
     228        :type failure: twisted.python.failure
     229        :param failure: A twisted failure object
     230        """
     231       
     232        proto, job, failure = result
     233        log.msg('Job %s failed!' % (job.job_id))
     234        log.msg(self.LOG_PREFIX + 'Traceback: \n%s' % failure)
     235        self.remoteobj.callRemote('job_failed', job.job_id, failure)
     236        self.process_pool._recycle_process(proto)
     237
     238        return proto
     239
     240
     241    def remote_do_job(self, jdict):
     242        """
     243        I am called by the dsage server in order to execute a job.
     244        """
     245
     246        if jdict is None:
     247            return False
     248        return self.do_job(jdict)
     249
     250    def remote_is_ready(self):
     251        """
     252        I am called by the dsage server to see if there are any Sage processes
     253        ready to execute a job.
     254        """
     255
     256        if self.process_pool.ready:
     257            return True
     258        return False
     259
     260    def remote_kill_job(self, job_id):
     261        """
     262        I get called when a job needs to be killed.
     263        """
     264
     265        log.msg(self.LOG_PREFIX + "Trying to kill %s" % job_id)
     266        for p in self.process_pool.busy:
     267            if p.job.job_id == job_id:
     268                p.kill_job()
     269
     270
     271def usage():
     272    """
     273    Prints usage help.
     274    """
     275   
     276    from optparse import OptionParser
     277   
     278    usage = ['usage: %prog [options]\n',
     279              'Bug reports to <yqiang@gmail.com>']
     280    parser = OptionParser(usage=''.join(usage))
     281    parser.add_option('-s', '--server',
     282                      dest='server',
     283                      default='localhost',
     284                      help='hostname. Default is localhost')
     285    parser.add_option('-p', '--port',
     286                      dest='port',
     287                      type='int',
     288                      default=8081,
     289                      help='port to connect to. default=8081')
     290    parser.add_option('-a', '--authenticate',
     291                      dest='authenticate',
     292                      default=False,
     293                      action='store_true',
     294                      help='Connect as authenticate worker. default=True')
     295    parser.add_option('-f', '--logfile',
     296                      dest='log_file',
     297                      default=os.path.join(DSAGE_DIR, 'worker.log'),
     298                      help='log file')
     299    parser.add_option('-l', '--loglevel',
     300                      dest='log_level',
     301                      type='int',
     302                      default=0,
     303                      help='log level. default=0')
     304    parser.add_option('--ssl',
     305                      dest='ssl',
     306                      action='store_true',
     307                      default=True,
     308                      help='enable or disable ssl')
     309    parser.add_option('--privkey',
     310                      dest='privkey_file',
     311                      default=os.path.join(DSAGE_DIR, 'dsage_key'),
     312                      help='private key file. default = ' +
     313                           '~/.sage/dsage/dsage_key')
     314    parser.add_option('--pubkey',
     315                      dest='pubkey_file',
     316                      default=os.path.join(DSAGE_DIR, 'dsage_key.pub'),
     317                      help='public key file. default = ' +
     318                           '~/.sage/dsage/dsage_key.pub')
     319    parser.add_option('-w', '--workers',
     320                      dest='workers',
     321                      type='int',
     322                      default=2,
     323                      help='number of workers. default=2')
     324    parser.add_option('--priority',
     325                      dest='priority',
     326                      type='int',
     327                      default=20,
     328                      help='priority of workers. default=20')
     329    parser.add_option('-u', '--username',
     330                      dest='username',
     331                      default=getuser(),
     332                      help='username')
     333    parser.add_option('--noblock',
     334                      dest='noblock',
     335                      action='store_true',
     336                      default=False,
     337                      help='tells that the server was ' +
     338                           'started in blocking mode')
     339    (options, args) = parser.parse_args()
     340   
     341    return options
     342
     343
     344if __name__ == '__main__':
     345    options = usage()
     346    monitor = DSageMonitor(**options.__dict__)
     347    monitor.connect()
     348    log.startLogging(sys.stdout)
     349    reactor.run()
     350
     351MONITOR_TAC = """from twisted.application import internet, service
     352
     353from sage.dsage.worker.monitor import DSageMonitor
     354
     355SERVER = '%s'
     356PORT = %s
     357WORKERS = %s
     358USERNAME = '%s'
     359SSL = %s
     360AUTHENTICATE = %s
     361PRIORITY= %s
     362LOG_LEVEL= '%s'
     363LOG_FILE = '%s'
     364PRIVKEY = '%s'
     365PUBKEY = '%s'
     366
     367def start_dsage_monitor():
     368    monitor = DSageMonitor(server=SERVER, port=PORT, username=USERNAME,
     369    ssl=SSL, workers=WORKERS, authenticate=AUTHENTICATE, priority=PRIORITY,
     370    log_level=LOG_LEVEL, log_file=LOG_FILE, pubkey_file=PUBKEY,
     371    privkey_file=PRIVKEY)
     372
     373    monitor.connect()
     374    service = internet.TCPClient(monitor.server, monitor.port,
     375            monitor.factory)
     376
     377    return service
     378   
     379application = service.Application('DSage Worker')
     380service = start_dsage_monitor()
     381service.setServiceParent(application)"""
  • new file sage/dsage/worker/process.py

    diff --git a/sage/dsage/worker/process.py b/sage/dsage/worker/process.py
    new file mode 100644
    - +  
     1from twisted.spread import pb
     2from twisted.internet import defer
     3from twisted.internet import protocol
     4from twisted.internet import reactor
     5from twisted.python import log
     6
     7import os
     8import itertools
     9gen = itertools.count()
     10import datetime
     11import cPickle
     12import zlib
     13
     14from sage.misc.preparser import preparse_file
     15from sage.dsage.misc.constants import SAGE_BIN, TMP_WORKER_FILES
     16from sage.dsage.database.job import expand_job
     17
     18class SageProcessPool(pb.Referenceable):
     19    """
     20    I am a process pool that holds onto Sage processes and dispatches jobs to
     21    them.
     22    """
     23
     24    sage = SAGE_BIN
     25    args = ['sage', '-q']
     26    LOG_PREFIX = "ProcessPool: "
     27
     28    def __init__(self, monitor, num_workers=None, proto=None):
     29        """
     30        INPUT:
     31            monitor     -- a DSageMonitor instance
     32            num_workers -- number of workers to spawn
     33            proto       -- a ProcessProtocol implementation (defaults to
     34                           SageProcessProtocol)
     35        """
     36
     37        self.monitor = monitor
     38        self.num_workers = num_workers
     39        if num_workers is None:
     40            self.num_workers = 5 # TODO: autodetect cpus
     41        self.proto = proto
     42        if proto is None:
     43            self.proto = SageProcessProtocol
     44
     45        self.processes = set()
     46        self.ready = set()
     47        self.busy = set()
     48
     49    def start(self):
     50        """
     51        Call me to start the processes.
     52        """
     53
     54        self.started = True
     55        self._start_processes()
     56
     57    def _err_handler(self, failure):
     58        """
     59        Generic error handler for Deferreds.
     60        """
     61
     62        log.err(failure)
     63       
     64        return failure
     65
     66    @defer.inlineCallbacks
     67    def _process_ready(self, proto):
     68        """
     69        I am called when a process is ready.
     70
     71        INPUT:
     72            proto   -- a ProcessProtocol
     73        """
     74
     75        log.msg(self.LOG_PREFIX + "_process_ready")
     76        self.processes.add(proto)
     77        self.ready.add(proto)
     78        jdict = yield self.monitor.get_job()
     79        if jdict:
     80            self.do_job(jdict)
     81        else:
     82            log.msg(self.LOG_PREFIX + 'jdict is None')
     83
     84    def _job_finished(self, result):
     85        """
     86        I am called when a job is finished as handled by outReceived.
     87
     88        INPUT:
     89            result  -- a tuple (proto, job)
     90        """
     91
     92        proto, job = result
     93        log.msg(self.LOG_PREFIX + "Job %s finished" % job.job_id)
     94        self.processes.discard(proto)
     95        self.busy.discard(proto)
     96        d = self.monitor.job_done(job.job_id, proto.data, job.result,
     97                              job.cpu_time)
     98        self._recycle_process(proto)
     99
     100    def _process_finished(self, proto):
     101        """
     102        I am called when a process has exited. This will be called after
     103        processEnded gets called.
     104
     105        INPUT:
     106            proto -- a ProcessProtocol
     107        """
     108
     109        self.processes.discard(proto)
     110        self.busy.discard(proto)
     111        p = SageProcessProtocol()
     112        ready, finished, failed = self._start_process(p)
     113
     114    def _recycle_process(self, proto):
     115        """
     116        I try to reuse an existing process without restarting it.
     117
     118        INPUT:
     119            proto - a ProcessProtocol
     120        """
     121
     122        log.msg(self.LOG_PREFIX + "Recycling %s" % proto.name)
     123        proto._soft_reset()
     124        proto.ready.addCallback(self._process_ready)
     125        proto.finished.addCallback(self._job_finished)
     126        proto.failed.addCallback(self.monitor._job_failed)
     127        self.busy.discard(proto)
     128
     129        return proto.ready, proto.finished
     130
     131    def _start_process(self, proto):
     132        """
     133        Starts a sage worker process.
     134
     135        INPUT:
     136            proto - a ProcessProtocol
     137        """
     138
     139        reactor.spawnProcess(proto, self.sage, self.args,
     140                             env=os.environ, usePTY=True)
     141        self.processes.add(proto)
     142        proto.ready.addCallback(self._process_ready)
     143        proto.finished.addCallback(self._job_finished)
     144        proto.failed.addCallback(self.monitor._job_failed)
     145        proto.process_ended.addCallback(self._process_finished)
     146
     147        return proto.ready, proto.finished, proto.failed
     148
     149    def _start_processes(self):
     150        """
     151        I start a number of processes as indicated by num_workers.
     152        """
     153
     154        for i in range(self.num_workers):
     155            log.msg(self.LOG_PREFIX + "Starting worker %s..." % (i))
     156            p = SageProcessProtocol()
     157            ready, finished, failed = self._start_process(p)
     158
     159    def do_job(self, jdict):
     160        """
     161        I take a process from the ready set and have it run the job.
     162       
     163        INPUT:
     164            jdict -- a dict which will be expanded to a Job object
     165        """
     166
     167        child = self.ready.pop()
     168        log.msg('Popped child: %s' % child.name)
     169        self.busy.add(child)
     170        d = child.run(jdict)
     171
     172
     173class SageProcessProtocol(protocol.ProcessProtocol):
     174    """
     175    I implement a protocol for communicating with a Sage instance.
     176    """
     177
     178    job_id = None
     179    LOG_PREFIX = "[Worker %s] "
     180    START_MARKER = '\x01r\x01e'
     181    END_MARKER = '\x01r\x01b'
     182
     183    def __init__(self, name=None):
     184        self.name = name
     185        if  name is None:
     186            self.name = gen.next()
     187        self.data = ""
     188        self.ready = defer.Deferred()
     189        self.finished = defer.Deferred()
     190        self.failed = defer.Deferred()
     191        self.process_ended = defer.Deferred()
     192        self.job = None
     193        self.log_level = 5 # TODO: REMOVE
     194        self.started = False
     195        self.reset = False
     196
     197    def _soft_reset(self):
     198        """
     199        I try to reset the Sage interpreter by issuing the reset() command.
     200        """
     201
     202        log.msg(self.LOG_PREFIX % self.name + "Doing a soft reset...")
     203        self.reset = True
     204        self.ready = defer.Deferred()
     205        self.finished = defer.Deferred()
     206        self.failed = defer.Deferred()
     207        self.data = ""
     208        self.job = None
     209        self.transport.writeSequence("\r\r\r")
     210        self.transport.writeSequence("reset()\r")
     211       
     212        return self.ready, self.finished, self.failed
     213
     214    def extract_and_load_job_data(self, job):
     215        """
     216        Extracts all the data that is in a job object.
     217
     218        :type job: sage.dsage.database.job.Job
     219        :param job: a Job object
     220        """
     221
     222        if isinstance(job.data, list):
     223            if self.log_level > 2:
     224                msg = 'Extracting job data...'
     225                log.msg(self.LOG_PREFIX % self.name + msg)
     226            try:
     227                for var, data, kind in job.data:
     228                    try:
     229                        data = zlib.decompress(data)
     230                    except Exception, msg:
     231                        log.msg(data)
     232                        log.msg(msg)
     233                        continue
     234                    if kind == 'file':
     235                        data = preparse_file(data, magic=True, do_time=False,
     236                                             ignore_prompts=False)
     237                        var = os.path.join(self.tmp_job_dir, var)
     238                        f = open(var, 'wb')
     239                        f.write(data)
     240                        f.close()
     241                        if self.log_level > 2:
     242                            msg = 'Extracted %s' % f
     243                            log.msg(self.LOG_PREFIX % self.name + msg)
     244                        self.transport.writeSequence("execfile('%s')\r" % var)
     245                        self.transport.writeSequence("exit()\r")
     246                    if kind == 'object':
     247                        fname = os.path.join(self.tmp_job_dir, var + '.sobj')
     248                        if self.log_level > 2:
     249                            log.msg('Object to be loaded: %s' % fname)
     250                        f = open(fname, 'wb')
     251                        f.write(data)
     252                        f.close()
     253                        self.transport.writeSequence("%s = load('%s')\r"
     254                                                     % (var, fname))
     255                        if self.log_level > 2:
     256                            msg = 'Loaded %s' % fname
     257                            log.msg(self.LOG_PREFIX % self.name + msg)
     258            except Exception, msg:
     259                log.msg(self.LOG_PREFIX % self.name + str(msg))
     260
     261    def setup_tmp_dir(self, job):
     262        """
     263        Creates the temporary directory for the worker.
     264
     265        :type job: sage.dsage.database.job.Job
     266        :param job: a Job object
     267
     268        """
     269
     270        cur_dir = os.getcwd() # keep a reference to the current directory
     271        tmp_job_dir = os.path.join(TMP_WORKER_FILES, job.job_id)
     272        if not os.path.isdir(TMP_WORKER_FILES):
     273            os.mkdir(TMP_WORKER_FILES)
     274        if not os.path.isdir(tmp_job_dir):
     275            os.mkdir(tmp_job_dir)
     276        self.transport.writeSequence("os.chdir('%s')\r" % tmp_job_dir)
     277
     278        return tmp_job_dir
     279
     280    def write_job_file(self, job):
     281        """
     282        Writes out the job file to be executed to disk.
     283
     284        :type job: sage.dsage.database.job.Job
     285        :param job: A Job object
     286
     287        """
     288
     289        parsed_file = preparse_file(job.code, magic=True, do_time=False,
     290                                    ignore_prompts=False)
     291        job_filename = os.path.join(self.tmp_job_dir, str(job.name) + '.py')
     292        job_file = open(job_filename, 'w')
     293        BEGIN = "print '%s'\n\n" % (self.START_MARKER)
     294        END = "print '%s'\n\n" % (self.END_MARKER)
     295        GO_TO_TMP_DIR = """os.chdir('%s')\n""" % self.tmp_job_dir
     296        SAVE_TIME = """save((time.time()-dsage_start_time), 'cpu_time.sobj', compress=False)\n"""
     297        SAVE_RESULT = """try:
     298    save(DSAGE_RESULT, 'result.sobj', compress=True)
     299except:
     300    save('No DSAGE_RESULT', 'result.sobj', compress=True)
     301"""
     302        # job_file.write("alarm(%s)\n\n" % (job.timeout))
     303        job_file.write("import time\n\n")
     304        job_file.write(BEGIN)
     305        job_file.write('dsage_start_time = time.time()\n')
     306        job_file.write(parsed_file)
     307        job_file.write("\n\n")
     308        job_file.write(GO_TO_TMP_DIR)
     309        job_file.write(SAVE_RESULT)
     310        job_file.write(SAVE_TIME)
     311        job_file.write(END)
     312        job_file.write("\n")
     313        job_file.close()
     314        log.msg(self.LOG_PREFIX % self.name + 'Wrote file %s' % (job_filename))
     315
     316        return job_filename
     317
     318    def check_failure(self, sage_output):
     319        """
     320        Checks for signs of exceptions or errors in the output.
     321       
     322        :type sage_output: string
     323        :param sage_output: output from the sage instance
     324       
     325        """
     326       
     327        if sage_output == None:
     328            return False
     329        else:
     330            sage_output = ''.join(sage_output)
     331       
     332        if 'Traceback' in sage_output:
     333            return True
     334        elif 'Error' in sage_output:
     335            return True
     336        else:
     337            return False
     338
     339    def run(self, jdict):
     340        """
     341        Executes a job.
     342
     343        INPUT:
     344            jdict -- a dict which gets expanded to a Job object
     345        """
     346
     347        #if jdict is None:
     348        #    log.msg(self.LOG_PREFIX % self.name + "jdict is None!")
     349        #    return False
     350
     351        self.job = expand_job(jdict)
     352
     353        log.msg(self.LOG_PREFIX % self.name +
     354                "Running job %s" % self.job.job_id)
     355        self.busy = True
     356        self.job_start_time = datetime.datetime.now()
     357        self.tmp_job_dir = self.setup_tmp_dir(self.job)
     358        self.extract_and_load_job_data(self.job)
     359        job_filename = self.write_job_file(self.job)
     360        f = os.path.join(self.tmp_job_dir, job_filename)
     361        log.msg(self.LOG_PREFIX % self.name + "Executing %s" % f)
     362        self.transport.writeSequence("execfile('%s')\r" % f)
     363
     364        return True
     365
     366    def connectionMade(self):
     367        log.msg(self.LOG_PREFIX % self.name + "Connection made...")
     368
     369    def outReceived(self, data):
     370        self.data += data
     371        if self.check_failure(data):
     372            log.msg(self.LOG_PREFIX % self.name +
     373                    "Found failure in output...")
     374            self.failed.callback((self, self.job, self.data))
     375            return
     376        if self.END_MARKER in data:
     377            result = os.path.join(self.tmp_job_dir, 'result.sobj')
     378            cpu_time = os.path.join(self.tmp_job_dir, 'cpu_time.sobj')
     379            self.job.result = open(result, 'rb').read()
     380            self.job.cpu_time = cPickle.loads(open(cpu_time, 'rb').read())
     381            self.finished.callback((self, self.job))
     382            return
     383        if self.reset:
     384            self.started = True
     385            self.reset = False
     386            self.ready.callback(self)
     387            return
     388        if 'sage: ' in data and self.started is False:
     389            self.started = True
     390            log.msg(self.LOG_PREFIX % self.name + "is ready...")
     391            self.ready.callback(self)
     392            return
     393
     394    def errReceived(self, data):
     395        log.msg("FROM %s: %s" % (self.name, repr(data.strip())))
     396
     397    def outConnectionLost(self):
     398        log.msg(self.LOG_PREFIX % self.name + "outConnectionLost...")
     399
     400    def processEnded(self, status):
     401        log.msg(self.LOG_PREFIX % self.name + "ended: %s" %
     402                status.value.exitCode)
     403        self.process_ended.callback(self)
     404
     405    def kill_job(self):
     406        """
     407        I get called to kill a job. I will actually kill the process and
     408        restart it.
     409        """
     410
     411        pgid = os.getpgid(self.transport.pid)
     412        os.killpg(pgid, 9)
     413
  • new file sage/dsage/worker/tests/test_process.py

    diff --git a/sage/dsage/worker/tests/__init__.py b/sage/dsage/worker/tests/__init__.py
    new file mode 100644
    diff --git a/sage/dsage/worker/tests/test_process.py b/sage/dsage/worker/tests/test_process.py
    new file mode 100644
    - +  
     1import unittest
     2import tempfile
     3
     4from sage.dsage.worker.process import SageProcessProtocol
     5from sage.dsage.worker.process import SageProcessPool
     6
     7class SageProcessPoolTest(unittest.TestCase):
     8    """
     9    I test the implementation of the SageProcessPool
     10    """
     11   
     12    def setUp(self):
     13        self.pool = SageProcessPool(None)
     14
     15
     16class SageProcessProtocolTest(unittest.TestCase):
     17    """
     18    I test the implementation of the SageProcessProtocol.
     19    """
     20
     21    def setUp(self):
     22        self.proto = SageProcessProtocol()
  • setup.py

    diff --git a/setup.py b/setup.py
    a b  
    14531453                     'sage.dsage.database.tests',
    14541454                     'sage.dsage.server',
    14551455                     'sage.dsage.server.tests',
     1456                     'sage.dsage.worker',
     1457                     'sage.dsage.worker.tests',
    14561458                     'sage.dsage.interface',
    14571459                     'sage.dsage.interface.tests',
    14581460                     'sage.dsage.errors',
     
    14661468                     'sage.dsage.scripts',
    14671469                     ],
    14681470
    1469       scripts = ['sage/dsage/scripts/dsage_worker.py',
    1470                  'sage/dsage/scripts/dsage_setup.py',
     1471      scripts = ['sage/dsage/scripts/dsage_setup.py',
    14711472                 'spkg-debian-maybe',
    14721473                ],
    14731474