Ticket #12720: 12720.patch

File 12720.patch, 56.6 KB (added by roed, 7 years ago)

Still very much in progress

  • new file sage/doctest/analyse.py

    # HG changeset patch
    # User David Roe <roed.math@gmail.com>
    # Date 1332375397 25200
    # Node ID b45c4d9048cdc1b8ab788b1d3083cd367c20196f
    # Parent  9af2f539cd33041f7e439b99923d67f6144c61c0
    Adds a speed-regression testing database and visualization tools to the doctesting framework.
    
    diff --git a/sage/doctest/analyse.py b/sage/doctest/analyse.py
    new file mode 100644
    - +  
     1# Example
     2# Running Example
     3# Doctest
     4# File
     5
     6import os, sys
     7import math, re, sqlite3
     8from collections import defaultdict
     9from colorsys import hsv_to_rgb
     10from optparse import OptionParser
     11
     12from util import open_db, parse_clause
     13from sources import get_basename
     14
     15DEBUG=False
     16
     17bad_chars = re.compile('[^0-9a-zA-Z_]')
     18def identifier(s):
     19    return not bad_chars.search(s)
     20
     21def analyse(conn, before_clause, after_clause, after_table='timings', type='example'):
     22    """
     23    This function compares individual statements (examples) against all other
     24    executions of that same statement accross other files and runs.  The
     25    tightness of the comparison depends on the history (state) whose
     26    possibilities are as follows:
     27   
     28        running - the sequence of commands affecting globals used in this
     29                  command is identical
     30        doctest - the sequence of commands from the beginning of this doctest
     31                  until now is identical
     32        total   - the sequence of commands from sage startup time until now
     33                  is identical
     34   
     35    In particular, this allows for timings to be correalated accross file
     36    changes and larger sample sizes for common examples.
     37    """
     38    c = conn.cursor()
     39    c.execute("select run_id, count(*) as c from %s left join runs using (run_id) where %s group by run_id order by c limit 1" % (after_table, after_clause))
     40    first = list(c)
     41    if not first:
     42        print after_clause
     43        raise ValueError("No matching doctests.")
     44    else:
     45        run_id = first[0]['run_id']
     46    c.execute("create temporary table _backdrop as " +
     47              "select basename, doctest, source, want, sequence_number, running_state, doctest_state, total_state " +
     48              "from %s left join runs using (run_id) where %s and run_id='%s' and type='%s'" % (after_table, after_clause, run_id, type))
     49    target_query = (
     50        "select _backdrop.*, " +
     51        "sum(AFTER_TABLE.count_) as after_count, sum(AFTER_TABLE.cputime * AFTER_TABLE.count_) / sum(AFTER_TABLE.count_) as after_cputime, sum(AFTER_TABLE.walltime * AFTER_TABLE.count_) / sum(AFTER_TABLE.count_) as after_walltime ".replace("AFTER_TABLE", after_table) +
     52        "from _backdrop join %s using (total_state) left join runs using (run_id) where %s and type='%s'" % (after_table, after_clause, type) +
     53        "group by total_state")
     54    c.execute("create temporary table _target as %s" % target_query)
     55   
     56    aggregate_fields = ("(sum(walltime * count_) / sum(count_))",
     57                        "sum(walltime_ss)",
     58                        "(sum(cputime * count_) / sum(count_))",
     59                        "sum(cputime_ss)",
     60                        "sum(count_)")
     61    aggregate_expression = " || ',' || ".join(aggregate_fields)
     62   
     63    # Here we do a complicated join to group things according to the various running checksums.
     64    target_fields = 'basename', 'doctest', 'sequence_number', 'source', 'want', 'after_count', 'after_cputime', 'after_walltime'
     65    all_fields = ['_target.%s as %s' % (field, field) for field in target_fields]
     66    join_clauses = []
     67    order_by = ('basename', 'doctest', 'sequence_number')
     68    sub_query = ("(SELECT %s from timings join runs using (run_id) " +
     69                 "where %s AND type='%s' AND STATE_state=_target.STATE_state GROUP BY STATE_state) as STATE_data") % (aggregate_expression, before_clause, type)
     70    for state in ('running', 'doctest', 'total'):
     71        all_fields.append(sub_query.replace("STATE", state))
     72    query = "select %s from _target order by %s" % (", ".join(all_fields), ", ".join(order_by))
     73    if DEBUG or True:
     74        print query
     75        c.execute("EXPLAIN QUERY PLAN " + query)
     76        print
     77        for row in c:
     78            print row
     79        print
     80    c.execute(query)
     81    return c
     82
     83def analyse_simple(conn, before_clause, after_clause, join_field='doctest', type='package', packages=None):
     84    """
     85    Compares doctest timings between runs.
     86   
     87    Note that full files and packages as a whole are also stored as "doctests"
     88    with the times of their sub-components summed.
     89    """
     90    c = conn.cursor()
     91    package_clause = create_package_clause(packages)
     92    query = ("create temporary table _SIDE as " +
     93            "select doctest, " +
     94                    "sum(walltime * count_) / sum(count_) as SIDE_walltime, " +
     95                    "sum(walltime_ss) as SIDE_walltime_ss, "+
     96                    "sum(cputime * count_) / sum(count_) as SIDE_cputime, " +
     97                    "sum(cputime_ss) as SIDE_cputime_ss, "
     98                    "sum(count_) as SIDE_count " +
     99            "from timings join runs using (run_id) " +
     100            "where CLAUSE AND type='%s' AND %s " % (type, package_clause) +
     101            "group by (doctest)")
     102    c.execute(query.replace("SIDE", "before").replace("CLAUSE", before_clause))
     103    c.execute(query.replace("SIDE", "after").replace("CLAUSE", after_clause))
     104    c.execute("create unique index _after_index on _after (doctest)")
     105    if DEBUG:
     106        print "-" * 72
     107        for row in c.execute("select * from _before"):
     108            print row
     109        print "-" * 72
     110        for row in c.execute("select * from _after"):
     111            print row
     112        print "-" * 72
     113    c.execute("select * from _before join _after using (doctest)")
     114    return c
     115
     116def generate_stats(data, state=None, type="mixed", cpu_or_wall="cpu", always=True):
     117    if state is None:
     118        return (
     119            generate_stats(data, 'total', type, cpu_or_wall, always=False) or
     120            generate_stats(data, 'doctest', type, cpu_or_wall, always=False) or
     121            generate_stats(data, 'running', type, cpu_or_wall))
     122    if not data.get(state + '_walltime'):
     123        if always:
     124            return [0, "white", "No previous data."]
     125        else:
     126            return None
     127    after = data["after_" + cpu_or_wall + "time"]
     128    before = mean = data[state + "_" + cpu_or_wall + "time"]
     129    count = data[state + "_" + "count"]
     130    sum = count * mean
     131    sum_squares = data[state + "_" + cpu_or_wall + "time_ss"]
     132    if count == 1:
     133        std_dev = before / 4 # just a guess
     134    else:
     135        sum_diff = sum_squares - mean * sum
     136        if -1e-10 < sum_diff <= 0:
     137            std_dev = 1e-100 # rounding error
     138        else:
     139            std_dev = math.sqrt(sum_diff / (count - 1))
     140    diff = after - before
     141    if type == "mixed":
     142        type = "raw" if abs(diff) > 2 * std_dev else "normalized"
     143    if type == "normalized":
     144        diff /= 2 * std_dev
     145    else:
     146        diff = math.log(after / before) / math.log(2)
     147    if abs(diff) > 1:
     148        sgn = diff / abs(diff)
     149        # map (1,oo) onto (1,2)
     150        diff = sgn * (2 - 1/abs(diff))
     151    hue = (2 - diff) / 6
     152    r, g, b = hsv_to_rgb(hue, 1, 1)
     153    return [diff,
     154            "#%02x%02x%02x" % (int(r*255), int(g*255), int(b*255)),
     155            "Current %.2g Previous %.2g &plusmn; %d%% (%s runs)" % (after, before, int(std_dev/before * 200),
     156            int(count))]
     157
     158def de_concatinate(d):
     159    """
     160    This undoes the hack of concatinating all the stats becase sqlite can't handle subselects with multiple values in an expression.
     161    """
     162    for state in ('running', 'doctest', 'total'):
     163        if d[state + '_data']:
     164            for id, data in zip(('walltime', 'walltime_ss', 'cputime', 'cputime_ss', 'count'), d[state + '_data'].split(',')):
     165                d[state + '_' + id] = float(data)
     166   
     167
     168def analyse_single_file(*args):
     169    stat_permutations = [(cpu_or_wall, type, state)
     170                            for cpu_or_wall in ('cpu', 'wall')
     171                            for type in ("normalized", "mixed", "raw")
     172                            for state in (None, 'running', 'doctest', 'total')]
     173    from jinja2 import Template
     174    template = Template(open("report.html").read())
     175    stats = []
     176    def doctest_iter():
     177        last_doctest = None
     178        doctest_data = None
     179        for row in analyse(*args):
     180            if last_doctest != row['doctest']:
     181                last_doctest = row['doctest']
     182                if doctest_data is not None:
     183                    yield doctest_data
     184                examples = []
     185                doctest_data = dict(
     186                    id = "doc",
     187                    doctest = row['doctest'],
     188                    examples = examples,
     189                )
     190            example = dict(row)
     191            example['id'] = len(stats)
     192            de_concatinate(row)
     193            stats.append([generate_stats(row, state=state, cpu_or_wall=cpu_or_wall, type=type)
     194                            for cpu_or_wall, type, state in stat_permutations])
     195            examples.append(example)
     196        if doctest_data is not None:
     197            yield doctest_data
     198               
     199    return template.render(stats=stats, doctests=doctest_iter(), enumerate=enumerate)
     200
     201cpu_or_wall_values = ('cpu', 'wall')
     202type_values = ('normalized', 'mixed', 'raw')
     203
     204def cross_product_iter(*args):
     205    if len(args) == 0:
     206        yield ()
     207    else:
     208        for value in args[0]:
     209            for rest in cross_product_iter(*args[1:]):
     210                yield (value,) + rest
     211
     212def cross_product(*args):
     213    return list(cross_product_iter(*args))
     214
     215package_stat_permutations = cross_product(cpu_or_wall_values, type_values)
     216
     217
     218def analyse_packages(conn, before_clause, after_clause, depth=None, order='doctest', packages=None):
     219    stat_permutations = [(cpu_or_wall, type)
     220                            for cpu_or_wall in ('cpu', 'wall')
     221                            for type in ("normalized", "mixed", "raw")]
     222    all = []
     223    for row in analyse_simple(conn, before_clause, after_clause, packages=packages):
     224        if depth and row['doctest'].count('.') >= depth:
     225            continue
     226        data = dict(row)
     227        stats = {}
     228        for stat in package_stat_permutations:
     229            cpu_or_wall, type = stat
     230            stats[stat] = generate_stats(row, state='before', cpu_or_wall=cpu_or_wall, type=type)
     231        data['stats'] = stats
     232        all.append(data)
     233    if order:
     234        cmp_fn = lambda a, b: cmp(a['doctest'], b['doctest'])
     235    else:
     236        # default
     237        cpu_or_wall = 'cpu'
     238        type = 'normalized'
     239        # override
     240        if 'wall' in order:
     241            cpu_or_wall = 'wall'
     242        if 'mixed' in order:
     243            type = 'mixed'
     244        elif 'raw' in order:
     245            type = 'raw'
     246        extract = lambda x: x['stats'][cpu_or_wall, type][0]
     247        cmp_fn = lambda(a, b): cmp(extract(a), extract(b))
     248    all.sort(cmp_fn)
     249    return all
     250
     251def render_packages(*args, **kwds):
     252    from jinja2 import Template
     253    template = Template(open("packages.html").read())
     254    return template.render(packages=analyse_packages(*args, **kwds), stat_dims=(cpu_or_wall_values, type_values), cross_product=cross_product, enumerate=enumerate)
     255
     256def create_package_clause(package):
     257    if package is None:
     258        return "1"
     259    elif isinstance(package, list):
     260        return "(%s)" % " OR ".join(map(create_package_clause, package))
     261    assert "'" not in package
     262    return "substr(doctest, 1, %s) == '%s'" % (len(package), package)
     263
     264
     265if __name__ == "__main__":
     266    parser = OptionParser()
     267    parser.add_option("-b", "--before", dest="before", default="all", metavar="SQL_CLAUSE", help="version, run_id, date")
     268    parser.add_option("-a", "--after", dest="after", default="last", metavar="SQL_CLAUSE")
     269    parser.add_option("-d", "--database", dest="database", default="timings.db", metavar="/path/to/db.sqlite")
     270    parser.add_option("--package", dest="package_list", metavar="pkg.subpkg", action="append")
     271    parser.add_option("--html", dest="html_output", metavar="/path/to/output.html")
     272   
     273    parser.add_option("--timing", dest="cpu", default="both", metavar="[cpu|wall|both]")
     274    parser.add_option("--match", dest="match", default="best", metavar="[total|doctest|running|best]")
     275    parser.add_option("--list", dest="list", default=None, metavar="SQL_CLAUSE or N")
     276    parser.add_option("--list_count", dest="list_count", type=int, default=10, metavar="N")
     277    parser.add_option("--sql", dest="sql", metavar="SQL")
     278   
     279    (options, args) = parser.parse_args()
     280
     281    conn = open_db(options.database)
     282    if options.list:
     283        try:
     284            list_count = int(options.list)
     285            list_clause = "1"
     286        except ValueError:
     287            list_count = options.list_count
     288            list_clause = parse_clause(options.list, conn, time_column='run_id')
     289        sql = ("select run_id, version, summarize(basename) as summary, count(*) as c, '' as extra from timings join runs using (run_id) " +
     290               "where type='file' and (%s) and (%s)" % (create_package_clause(options.package_list), list_clause) +
     291               "group by run_id order by run_id desc limit %s" % list_count)
     292        print sql
     293        for row in conn.execute(sql):
     294            print row['run_id'], '\t', row['summary'], '\t', "(%s)" % row['c'], row['version'], row['extra']
     295        sys.exit(0)
     296   
     297    if options.sql:
     298        for row in conn.execute(options.sql):
     299            print row
     300        sys.exit(0)
     301           
     302   
     303    before_clause = parse_clause(options.before, conn)
     304    after_clause = parse_clause(options.after, conn)
     305
     306    if args:
     307        for filename in args:
     308            basename = get_basename(filename)
     309            basename_clause = " AND timings.basename='%s'" % basename
     310            open("%s.html" % basename, "w").write(analyse_single_file(conn, before_clause, after_clause + basename_clause))
     311    else:
     312        open("all.html", "w").write(render_packages(conn, before_clause, after_clause, packages=options.package_list))
  • sage/doctest/control.py

    diff --git a/sage/doctest/control.py b/sage/doctest/control.py
    a b  
    2222from sage.structure.sage_object import SageObject
    2323from sage.misc.misc import DOT_SAGE
    2424
    25 from sources import FileDocTestSource, DictAsObject
     25from sources import FileDocTestSource, DatabaseDocTestSource, DictAsObject
    2626from forker import DocTestDispatcher
    2727from reporting import DocTestReporter
    2828from util import NestedName, Timer, count_noun, dict_difference
     
    9595        if not os.path.exists(self.stats_path):
    9696            with open(self.stats_path, "w") as stats_file:
    9797                json.dump({},stats_file)
     98
     99        # Database stuff
     100        self.database = None
     101        self.label = None
     102        self.rerun = None
    98103        self.__dict__.update(kwds)
    99104
    100105    def _repr_(self):
     
    200205                self.logfile = None
    201206        else:
    202207            self.logfile = None
     208        if options.database:
     209            self.db = None
     210            #self.db = DoctestDatabase(options.database)
     211        else:
     212            self.db = None
    203213        self.stats = {}
    204214        self.load_stats(options.stats_path)
    205215
     
    343353            sage: DC = DocTestController(DocTestDefaults(), [])
    344354            sage: DC.create_run_id()
    345355            Running doctests with ID ...
     356
     357        If a timings database is specified, the run is added to the database::
     358
     359            sage: raise NotImplementedError
    346360        """
    347         self.run_id = time.strftime('%Y-%m-%d-%H-%M-%S-') + "%08x" % random.getrandbits(32)
     361        if self.options.label is None:
     362            self.options.label = "%08x" % random.getrandbits(32)
     363        self.run_id = time.strftime('%Y-%m-%d-%H-%M-%S-') + self.options.label
    348364        from sage.version import version
    349365        self.log("Running doctests with ID %s."%self.run_id)
     366        if self.db is not None:
     367            run_data = dict(
     368                run_id=self.run_id,
     369                version=version,
     370                time=time.strftime('%Y-%m-%d %H:%M:%S'),
     371                label=self.options.label,
     372            )
     373            # Insert the current run into the database
     374            self.db.runs.insert(run_data)
     375            self.db.runs.ensure_index(time=True)
     376            self.db.runs.ensure_index(version=True)
     377            self.db.runs.ensure_index(run_id=True)
    350378
    351379    def add_files(self):
    352380        """
     
    485513                    yield path
    486514        self.sources = [FileDocTestSource(path, self.options) for path in expand()]
    487515
     516    def add_database_sources(self):
     517        """
     518        Add doctests from the most recent run that are stored in the database.
     519
     520        These doctests will be the ones that match the SQL clause given in the `rerun` options field.
     521
     522        EXAMPLES::
     523
     524            sage: from sage.doctest.control import DoctestDefaults, DoctestController
     525            sage: from sage.doctest.database import make_test_db
     526            sage: DD = DoctestDefaults(); DD.rerun = 'qwerty'
     527            sage: DC = DoctestController(DD, [])
     528            sage: DC.db = make_test_db()
     529            sage: DC.add_database_sources()
     530            Retrieving sources from database....
     531            sage: len(DC.sources)
     532        """
     533        # Add in sources from a previous run if the --rerun option is passed
     534        if self.options.rerun:
     535            if self.db is None:
     536                raise ValueError("In order to rerun doctests you need to specify a database to draw from")
     537            self.log("Retrieving sources from database....")
     538            conn = self.db.conn
     539            clause = parse_clause(self.options.rerun, conn)
     540            old_run_id = list(conn.execute("select run_id from timings join runs using (run_id) where %s order by run_id desc limit 1" % clause))[0]['run_id']
     541            for row in conn.execute("select basename, null as path from timings where run_id=? and type='file' and %s" %(old_run_id, clause)):
     542                self.sources.append(DatabaseDoctestSource(self.options.database, row['path'], row['basename'], old_run_id, randorder=self.options.randorder))
     543
     544
     545
    488546    def filter_sources(self):
    489547        """
    490548       
     
    530588            sage: DC.sort_sources()
    531589            Sorting sources by runtime so that slower doctests are run first....
    532590            sage: print "\n".join([source.basename for source in DC.sources])
     591            sage.doctest.visualize
    533592            sage.doctest.util
    534593            sage.doctest.test
    535594            sage.doctest.sources
    536595            sage.doctest.reporting
    537596            sage.doctest.parsing
    538597            sage.doctest.forker
     598            sage.doctest.database
    539599            sage.doctest.control
     600            sage.doctest.analyze
    540601            sage.doctest.all
    541602            sage.doctest
    542603        """
     
    574635                cumulative wall time: ... seconds
    575636        """
    576637        nfiles = 0
     638        ndbsources = 0
    577639        nother = 0
    578640        for F in self.sources:
    579641            if isinstance(F, FileDocTestSource):
    580642                nfiles += 1
     643            elif isinstance(F, DatabaseDocTestSource):
     644                ndbsources += 1
    581645            else:
    582646                nother += 1
    583647        if self.sources:
    584648            filestr = ", ".join(([count_noun(nfiles, "file")] if nfiles else []) +
     649                                ([count_noun(ndbsources, "database source")] if ndbsources else []) +
    585650                                ([count_noun(nother, "other source")] if nother else []))
    586651            threads = " using %s threads"%(self.options.nthreads) if self.options.nthreads > 1 else ""
    587652            iterations = []
     
    615680        """
    616681        Runs cleanup activities after actually running doctests.
    617682
    618         In particular, saves the stats to disk and closes the logfile.
     683        In particular, saves the stats and database to disk and closes the logfile.
    619684
    620685        INPUT:
    621686
     
    654719        self.save_stats(self.options.stats_path)
    655720        # Close the logfile
    656721        if final and self.logfile is not None:
     722            if self.db is not None:
     723                self.log("Saving timings to database...")
     724                # Insert package timings by summing more fine grained timings and commit the database.
     725                self.db.create_package_timings(self.run_id)
     726                self.log("Done!")
    657727            self.logfile.close()
    658728            self.logfile = None
    659729
     
    673743        for o in ("all", "sagenb"):
    674744            if o in opt:
    675745                raise ValueError("You cannot run gdb/valgrind on the whole sage%s library"%("" if o == "all" else "nb"))
     746        if opt["rerun"]:
     747            raise ValueError("You cannot run gdb/valgrind with the rerun option")
    676748        for o in ("all", "sagenb", "long", "force_lib", "verbose", "failed", "new"):
    677749            if o in opt:
    678750                cmd += "--%s "%o
     
    820892            self.create_run_id()
    821893            self.add_files()
    822894            self.expand_files_into_sources()
     895            self.add_database_sources()
    823896            self.filter_sources()
    824897            self.sort_sources()
    825898            self.run_doctests()
  • new file sage/doctest/database.py

    diff --git a/sage/doctest/database.py b/sage/doctest/database.py
    new file mode 100644
    - +  
     1
     2import os
     3from collections import defaultdict
     4from sage.databases.sql_db import SQLDatabase
     5from sage.structure.sage_object import loads, dumps
     6
     7# class TableWrapper(SageObject):
     8#     def __init__(self, db, name):
     9#         self.db = db
     10#         self.name = name
     11#     def __repr__(self):
     12#         s = 'table ' + self.name + ':\n'
     13#         for column in self.db.__skeleton__[self.name]:
     14#             s += '    column ' + column + ': '
     15#             for data in self.db.__skeleton__[self.name][column]:
     16#                 s += data + ': ' + str(self.db.__skeleton__[self.name][column][data]) + '; '
     17#             s += '\n'
     18#         return s
     19#     def show(self, **kwds):
     20#         self.db.show(self.name, **kwds)
     21#     def add_column(self, col_name, col_dict, default='NULL'):
     22#         self.db.add_column(self.name, col_name, col_dict, default)
     23#     def drop_column(self, col_name):
     24#         self.db.drop_column(self.name, col_name)
     25#     def drop_data(self):
     26#         self.db.drop_data_from_table(self.name)
     27#     def make_index(self, col_name, unique=False):
     28#         self.db.make_index(col_name, self.name, unique)
     29#     def drop_index(self, index_name):
     30#         self.db.drop_index(self.name, index_name)
     31#     def make_unique(self, col_name):
     32#         self.db.make_unique(self.name, col_name)
     33#     def drop_unique(self, col_name):
     34#         self.db.drop_unique(self.name, col_name)
     35#     def make_primary_key(self, col_name):
     36#         self.db.make_primary_key(self.name, col_name)
     37#     def drop_primary_key(self, col_name):
     38#         self.db.drop_primary_key(self.name, col_name)
     39#     def add_row(self, values, entry_order=None):
     40#         self.db.add_rows(self.name, [values], entry_order)
     41#     def add_rows(self, rows, entry_order=None):
     42#         self.db.add_rows(self.name, rows, entry_order)
     43
     44def format(value):
     45    if value is None or isinstance(value, (int, long, float, str, unicode)):
     46        return value
     47    else:
     48        return "json:" + json.dumps(value, separators=(',',':'))
     49
     50def unformat(value):
     51    if isinstance(value, (str, unicode)) and value.startswith("json:"):
     52        value = json.loads(value[5:])
     53        if isinstance(value, dict):
     54            # json returns unicode keys which are invalid for kwds
     55            value = dict((str(key), value) for key, value in value.iteritems())
     56    return value
     57
     58def dict_factory(cursor, row):
     59    d = {}
     60    for idx, col in enumerate(cursor.description):
     61        d[col[0]] = unformat(row[idx])
     62    return d
     63
     64class TableWrapper(ConnWrapper):
     65    def __init__(self, conn, name):
     66        conn.row_factory = dict_factory
     67        self.conn = conn
     68        self.name = name
     69    def execute(self, sql, *args):
     70        return self.conn.execute(sql, args)
     71    def executemany(self, sql, args_list):
     72        return self.conn.executemany(sql, args_list)
     73    def commit(self):
     74        self.conn.commit()
     75    def print_sql(self, sql, *args):
     76        for row in self.execute(sql, *args):
     77            print row
     78    def insert(self, data):
     79        if isinstance(data, dict):
     80            data = [data]
     81        cols = set()
     82        for datum in data:
     83            cols.update(datum.keys())
     84        if not cols:
     85            return
     86        self.ensure_columns(cols)
     87        cols = tuple(cols)
     88        def to_insert():
     89            for datum in data:
     90                yield tuple(format(datum.get(col, None)) for col in cols)
     91        sql = "insert into %s (%s) values (%s)" % (self.name, ",".join(cols), ",".join(["?"] * len(cols)))
     92        self.executemany(sql, to_insert())
     93   
     94    def find(self, limit=None, *args, **kwds):
     95        clauses = list(args)
     96        for key, value in kwds.items():
     97            clauses.append("%s = '%s'" % (key, value))
     98        sql = "select * from %s where %s" % (self.name, " and ".join(clauses))
     99        if limit is not None:
     100            sql += " limit %s" % limit
     101        return self.execute(sql)
     102   
     103    def find_one(self, *args, **kwds):
     104        return self.find(limit=1, *args, **kwds)
     105       
     106    def ensure_columns(self, cols):
     107        existing_cols = set(row['name'] for row in self.execute("pragma table_info (%s)" % self.name))
     108        for col in set(cols).difference(existing_cols):
     109            self.execute("alter table %s add column %s" % (self.name, col))
     110   
     111    def ensure_index(self, col=None, **kwds):
     112        if col is None:
     113            assert len(kwds) == 1
     114            col = kwds.popitem()[0]
     115        self.ensure_columns([col])
     116        self.execute("create index if not exists __index_%s on %s (%s)" % (col, self.name, col))
     117
     118class DoctestDatabase(SQLDatabase):
     119    # aggregate types
     120    example_type = 0
     121    doctest_type = 1
     122    file_type = 2
     123    package_type = 3
     124
     125
     126    def __init__(self, filename):
     127        SQLDatabase.__init__(self, filename, False)
     128        import copy
     129        floattype = {'primary_key':False, 'index':False, 'sql':'REAL'}
     130        inttype = {'primary_key':False, 'index':False, 'sql':'INTEGER'}
     131        texttype = {'primary_key':False, 'index':False, 'sql':'TEXT'}
     132        def common_skel():
     133            tskel = {}
     134            tskel['id'] = {'primary_key':True, 'index':True, 'sql':'INTEGER'}
     135            tskel['run_id'] = copy.copy(texttype)
     136            tskel['basename'] = copy.copy(texttype) # we don't deduplicate since we want to do analysis on substrings
     137            tskel['lineno'] = copy.copy(inttype)
     138            tskel['cputime'] = copy.copy(floattype)
     139            tskel['walltime'] = copy.copy(floattype)
     140            tskel['total_state'] = copy.copy(texttype)
     141            tskel['cputime_ss'] = copy.copy(floattype)
     142            tskel['walltime_ss'] = copy.copy(floattype)
     143            tskel['count_'] = copy.copy(inttype)
     144            return tskel
     145        # Should do error checking if the tables already exist
     146        if 'example_timings' not in self.__skeleton__:
     147            tskel = common_skel()
     148            tskel['source_id'] = copy.copy(inttype)
     149            tskel['want_id'] = copy.copy(inttype)
     150            tskel['predecessors_id'] = copy.copy(inttype)
     151            tskel['exc_msg_id'] = copy.copy(inttype)
     152            tskel['want_markings_id'] = copy.copy(inttype)
     153            tskel['sequence_number'] = copy.copy(inttype)
     154            tskel['doctest_state'] = copy.copy(texttype)
     155            tskel['running_state'] = copy.copy(texttype)
     156            tskel['options_id'] = copy.copy(inttype)
     157            self.create_table('example_timings', tskel)
     158        if 'aggregate_timings' not in self.__skeleton__:
     159            tskel = common_skel()
     160            tskel['type'] = copy.copy(inttype)
     161            tskel['path_id'] = copy.copy(inttype)
     162            tskel['failures'] = copy.copy(inttype)
     163            # count?
     164            self.create_table('aggregate_timings', tskel)
     165        if 'dedup' not in self.__skeleton__:
     166            dskel = {}
     167            dskel['id'] = {'primary_key':True, 'index':True, 'sql':'INTEGER'}
     168            dskel['value'] = {'primary_key':False, 'index':True, 'unique':True, 'sql':'TEXT'}
     169            self.create_table('dedup', dskel)
     170#         if 'ex_timings' not in self.__skeleton__:
     171#             ex_timing_skel = {'run_id','basename','doctest','lineno','snippet_id','output_id','predecessors',
     172#                               'exc_msg','options','sequence_number','cputime','walltime','total_state','doctest_state','running_state'
     173#                               'count_','walltime_ss','cputime_ss'}
     174#         if 'dt_timings' not in self.__skeleton__:
     175#             dt_timing_skel = {'run_id','basename','doctest','lineno','total_state','cputime','walltime','count_','walltime_ss','cputime_ss'}
     176#         if 'f_timings' not in self.__skeleton__:
     177#             f_timing_skel = {'run_id','path','basename','cputime','walltime','failures','count_','walltime_ss','cputime_ss'}
     178#         if 'p_timings' not in self.__skeleton__:
     179#             p_timing_skel = {'run_id','basename','count','cputime','walltime','count_','walltime_ss','cputime_ss'}
     180#         if 'snippets' not in self.__skeleton__:
     181#             # a snippet is a bit of code, like '1 + 1'.  Each such snippet occurs once in this table.
     182#             snippets_skel = {'snippet_id','source'}
     183#         if 'outputs' not in self.__skeleton__:
     184#             outputs_skel = {'output_id','output_with_marking','slice_location'}
     185#         if 'files' not in self.__skeleton__:
     186#             files_skel = {'file_id','package_id','basename','path'}
     187#         if 'packages' not in self.__skeleton__:
     188#             packages_skel = {'package_id','basename'}
     189#         if 'runs' not in self.__skeleton__:
     190#             runs_skel = {'run_id','run_label'}
     191#         if 'examples' not in self.__skeleton__:
     192#             # an example is one line of a doctest, and is aware of its location and predecessors
     193#             # in particular, each example tracks which doctest it's part of and what its predecessors are.
     194#             examples_skel = {'example_id','sequence_no','snippet_id','doctest_id','predecessors','lineno'}
     195#         if 'doctests' not in self.__skeleton__:
     196#             # a doctest is a sequence of snippet ids attached to a basename
     197#             doctests_skel = {'doctest_id','file_id','basename','lineno'}
     198
     199    def deduplicate(self, dupdict, insert=True):
     200        pass
     201
     202    def record_doctests(self, run_id, path, basename, runner, doctests):
     203        dupdict = {path:None}
     204        for test in doctests:
     205            dupdict[test.name] = None
     206            for example in test.examples:
     207                try:
     208                    src = 'sage: ' + example.sage_source
     209                except AttributeError:
     210                    src = example.source
     211                dupdict[src] = None
     212                dupdict[str(example.want)] = None
     213                if example.predecessors is not None:
     214                    dupdict[",".join([str(e.sequence_number) for e in example.predecessors])] = None
     215                if example.exc_msg:
     216                    dupdict[str(example.exc_msg)] = None
     217                if isinstance(example.want, MarkedOutput):
     218                    dupdict[dumps(example.want.__dict__)] = None
     219                if example.options:
     220                    dupdict[dumps(example.options)] = None
     221        self.deduplicate(dupdict)
     222        dpath = dupdict[path]
     223        ex_col_order = ('run_id','basename','lineno','cputime','walltime','total_state',
     224                        'source_id','want_id','predecessors_id','exc_msg_id','want_markings_id',
     225                        'sequence_number','doctest_state','running_state','options_id')
     226        aggro_col_order = ('run_id','basename','lineno','cputime','walltime','total_state',
     227                           'type','path_id','failures')
     228        aggro = [(run_id, basename, None, runner.cputime, runner.walltime, None, self.file_type, dpath, runner.failures)]
     229        examples = []
     230        for test in doctests:
     231            test_data = dict(basename=basename, lineno=test.lineno,
     232                             doctest=dupdict[test.name], run_id=run_id, type=self.doctest_type, path_id=dpath)
     233            doctest_cputime = doctest_walltime = 0
     234            doctest_total_state = None
     235            for example in test.examples:
     236                # Update doctest info.
     237                if doctest_total_state is None:
     238                    doctest_total_state = example.total_state
     239                doctest_cputime += example.cputime
     240                doctest_walltime += example.walltime
     241                # Update Example info.
     242                data = dict(test_data)
     243                try:
     244                    data['source_id'] = dupdict['sage: ' + example.sage_source]
     245                except AttributeError:
     246                    data['source_id'] = dupdict[example.source]
     247                data['want_id'] = dupdict[str(example.want)]
     248                if example.predecessors is not None:
     249                    data['predecessors_id'] = dupdict[",".join([str(e.sequence_number) for e in example.predecessors])]
     250                else:
     251                    data['predecessors_id'] = None
     252                if isinstance(example.want, MarkedOutput):
     253                    data['want_markings_id'] = dupdict[dumps(example.want.__dict__)]
     254                else:
     255                    data['want_markings_id'] = None
     256                if example.exc_msg:
     257                    data['exc_msg'] = dupdict[str(example.exc_msg)]
     258                else:
     259                    data['exc_msg'] = None
     260                if example.options:
     261                    data['options_id'] = dupdict[dumps(example.options)]
     262                else:
     263                    data['options_id'] = None
     264                for attr in ('lineno', 'sequence_number', 'cputime', 'walltime', 'total_state', 'doctest_state'):
     265                    data[attr] = getattr(example, attr)
     266                try:
     267                    data['running_state'] = example.running_state
     268                except AttributeError:
     269                    # Optional timing.
     270                    data['running_state'] = None
     271                examples.append(tuple(data[a] for a in ex_col_order))
     272            doctest_data = dict(total_state=doctest_total_state,
     273                                cputime=doctest_cputime,
     274                                walltime=doctest_walltime,
     275                                **test_data)
     276            aggro.append(tuple(doctest_data[a] for a in aggro_col_order))
     277        self.add_rows('example_timings', examples, ex_col_order)
     278        self.add_rows('aggregate_timings', aggro, aggro_col_order)
     279        self.commit()
     280
     281    def create_package_timings(self, run_id):
     282        def empty_data():
     283            return dict(count_=0, cputime=0.0, walltime=0.0, type=self.package_type, lineno=None, total_state=None, path_id=None, failures=None, run_id=run_id)
     284        summary_data = defaultdict(empty_data)
     285        for file in self.aggregate_timings.find(run_id=run_id, type=self.file_type):
     286            package_list = file['basename'].split('.')
     287            # Note that we store each file as a "package" as well for easier analysis.
     288            for k in range(1,len(package_list) + 1):
     289                package = '.'.join(package_list[:k])
     290                data = summary_data[package]
     291                data['basename'] = package
     292                data['count_'] += 1
     293                data['cputime'] += file['cputime']
     294                data['walltime'] += file['walltime']
     295
     296   
     297'run_id','basename','lineno','cputime','walltime','total_state',
     298                       'type','path_id','failures')
     299def insert_timing_data(db, data, indices=[]):
     300    if isinstance(data, dict):
     301        data = [data]
     302    db.timings.insert(data)
     303    for index in indices:
     304        db.timings.ensure_index(col=index)
     305
     306def create_package_timings(db, run_id):
     307    def empty_data():
     308        return dict(count=0, cputime=0.0, walltime=0.0, type='package', run_id=run_id)
     309    summary_data = defaultdict(empty_data)
     310    print "A"
     311    for file in db.timings.find(run_id=run_id, type='file'):
     312        print file['doctest']
     313        package_list = file['doctest'].split('.')
     314        # Note that we store each file as a "package" as well for easier analysis.
     315        for k in range(1, len(package_list) + 1):
     316            package = '.'.join(package_list[:k])
     317            data = summary_data[package]
     318            data['doctest'] = package
     319            data['count'] += 1
     320            data['cputime'] += file['cputime']
     321            data['walltime'] += file['walltime']
     322    print "B"
     323    insert_timing_data(db, summary_data.values())
     324    print "C"
     325    db.timings.ensure_columns(('count_', 'walltime_ss', 'cputime_ss'))
     326    print "D"
     327    db.execute("update timings set count_=1, walltime_ss=walltime*walltime, cputime_ss=cputime*cputime where run_id=?", run_id)
     328#    for row in db.timings.find(run_id=run_id, type='package'):
     329#        log(row)
     330    print "E"
     331    db.commit()
     332    print "F"
     333
  • new file sage/doctest/packages.html

    diff --git a/sage/doctest/packages.html b/sage/doctest/packages.html
    new file mode 100644
    - +  
     1<html>
     2<body onload="updateAll(-1)">
     3
     4<br>
     5<a href="javascript:updateAll(0)">first</a>
     6<a href="javascript:updateAll(1)">second</a>
     7<a href="javascript:updateAll(2)">third</a>
     8<br>
     9<br>
     10<form name="selectors">
     11{% for ix, dim in enumerate(stat_dims) %}
     12{% for value in dim %}
     13<input type="radio" name="dim{{ix}}" value="{{value}}" onclick="updateAll(-1)" checked> {{value}}
     14{% endfor %}
     15<br>
     16{% endfor %}
     17</form>
     18<hr>
     19{% for ix, package in enumerate(packages) %}
     20<div id="{{ix}}" style="padding: 0px">
     21<div id="{{ix}}-blurb" style="position: absolute; right: 10px;">blurb</div>
     22<b>{{package.doctest}}</b>
     23</div>
     24{% endfor %}
     25<script>
     26
     27function valueOf(radio) {
     28    for(var i = 0; i < radio.length; i++) {
     29        if(radio[i].checked) {
     30            return radio[i].value;
     31        }
     32    }
     33}
     34
     35
     36function updateAll(signal) {
     37    if (signal == -1) {
     38        elements = document.forms['selectors'].elements;
     39        signal = "";
     40{% for ix, dim in enumerate(stat_dims) %}
     41        signal += "_" + valueOf(elements["dim{{ix}}"]);
     42{% endfor %}
     43        signal = signal.substring(1);
     44    }
     45    for (var i = 0; i < data.length; i++) {
     46        id = "" + i;
     47        document.getElementById(id).style.background = data[i][signal][1];
     48        document.getElementById(id + "-blurb").innerHTML = data[i][signal][2];
     49    }
     50}
     51
     52var data = new Array();
     53data[0] = [[0, "#0F0", "good"], [0, "#FF0", "OK"], [0, "#F00", "bad"]];
     54var template = true;
     55
     56/* {{"*" + "/"}}
     57
     58template = false;
     59
     60{% for ix, datum in enumerate(packages) %}
     61data[{{ix}}] = {
     62{% for perm in cross_product(*stat_dims) %}
     63  {{'_'.join(perm)}} : {{datum['stats'][perm]}},
     64{% endfor %}
     65};
     66{% endfor %}
     67
     68{{"/" + "*"}} */
     69
     70</script>
     71</body>
     72</html>
     73 No newline at end of file
  • sage/doctest/parsing.py

    diff --git a/sage/doctest/parsing.py b/sage/doctest/parsing.py
    a b  
    148148            want.update(require_failure=True)
    149149    return want
    150150
     151version_regex = re.compile(r"\d+(\.\d+)*((alpha|rc)\d)?$")
     152basename_regex = re.compile(r"[a-zA-Z]+(\.[a-zA-Z0-9]+)*$")
     153filename_regex = re.compile(r"\S+(/\S+)+$")
     154def parse_clause(clause, conn, time_column='time'):
     155    if clause == '':
     156        return "1"
     157    if ';' in clause:
     158        raise ValueError
     159    if "'" in clause or '"' in clause:
     160        # Assume it's a full SQL clause.
     161        pass
     162    else:
     163        conditions = []
     164        for condition in clause.split():
     165            if condition == 'all':
     166                pass
     167            elif condition == 'last':
     168                c = conn.cursor()
     169                last = c.execute("select run_id from runs order by time desc limit 1").fetchone()['run_id']
     170                print "last", last
     171                conditions.append("run_id='%s'" % last)
     172            elif condition == 'penultimate':
     173                c = conn.cursor()
     174                penultimate = list(c.execute("select run_id from runs order by time desc limit 2"))[-1]['run_id']
     175                conditions.append("run_id='%s'" % penultimate)
     176            elif version_regex.match(condition):
     177                conditions.append("runs.version='%s'" % condition)
     178            elif basename_regex.match(condition):
     179                conditions.append("substr(file, 1, %s)='%s'" % (len(condition)+1, condition))
     180            elif filename_regex.match(condition):
     181                conditions.append("path='%s'" % condition)
     182            elif condition[0] in '<=>':
     183                op = condition[0]
     184                date_string = condition[1:]
     185                if date_string[0] == '=':
     186                    date_string = date_string[1:]
     187                    op += '='
     188                conditions.append("substr(%s, 1, %s) %s '%s'" % (time_column, len(date_string), op, date_string))
     189            elif ':' in condition:
     190                ix = condition.index(':')
     191                conditions.append(condition[:ix] + "='" + condition[1+ix:] + "'");
     192            else:
     193                conditions.append("run_id='%s'" % condition)
     194        clause = " AND ".join(conditions)
     195        if not clause:
     196            clause = "1"
     197    c = conn.cursor()
     198    sql = "select count(*) from timings left join runs using (run_id) where %s and type='package'" % clause
     199    count = c.execute(sql).fetchone()['count(*)']
     200    if not count:
     201        raise ValueError, "No matching records for %s" % clause
     202    return clause
     203
    151204def pre_hash(s):
    152205    """
    153206    Prepends a string with its length.
  • new file sage/doctest/report.html

    diff --git a/sage/doctest/report.html b/sage/doctest/report.html
    new file mode 100644
    - +  
     1<html>
     2<body onload="updateAll(-1)">
     3
     4<br>
     5<a href="javascript:updateAll(0)">first</a>
     6<a href="javascript:updateAll(1)">second</a>
     7<a href="javascript:updateAll(2)">third</a>
     8<br>
     9<br>
     10<form name="selectors">
     11<input type="radio" name="cpu" value="0" onclick="updateAll(-1)" checked> cpu
     12<input type="radio" name="cpu" value="1" onclick="updateAll(-1)"> wall
     13<br>
     14<input type="radio" name="normalization" value="0" onclick="updateAll(-1)" checked> normalized
     15<input type="radio" name="normalization" value="1" onclick="updateAll(-1)"> mixed
     16<input type="radio" name="normalization" value="2" onclick="updateAll(-1)"> raw
     17<br>
     18<input type="radio" name="match" value="0" onclick="updateAll(-1)" checked> best
     19<input type="radio" name="match" value="1" onclick="updateAll(-1)"> running
     20<input type="radio" name="match" value="2" onclick="updateAll(-1)"> doctest
     21<input type="radio" name="match" value="3" onclick="updateAll(-1)"> total
     22</form>
     23<hr>
     24{% for doctest in doctests %}
     25<div id="{{doctest.id}}">
     26<h3>{{doctest.doctest}}</h3>
     27{% for example in doctest.examples %}
     28<div id="{{example.id}}" style="padding: 0px">
     29<div id="{{example.id}}-blurb" style="position: absolute; right: 10px;">blurb</div>
     30<pre style="margin: 0px">{{example.source.strip()|escape}}
     31{{example.want|escape}}</pre>
     32</div>
     33{% endfor %}
     34</div>
     35{% endfor %}
     36<script>
     37
     38function valueOf(radio) {
     39    for(var i = 0; i < radio.length; i++) {
     40        if(radio[i].checked) {
     41            return radio[i].value;
     42        }
     43    }
     44}
     45
     46
     47function updateAll(signal) {
     48    if (signal == -1) {
     49        elements = document.forms['selectors'].elements;
     50        signal = 12 * valueOf(elements['cpu']) + 4 * valueOf(elements['normalization']) + 1 * valueOf(elements['match']);
     51    }
     52    for (var i = 0; i < data.length; i++) {
     53        if (template) {
     54            id = "{{example.id}}";
     55        } else {
     56            id = "" + i;
     57        }
     58        document.getElementById(id).style.background = data[i][signal][1];
     59        document.getElementById(id + "-blurb").innerHTML = data[i][signal][2];
     60    }
     61}
     62
     63var data = new Array();
     64data[0] = [["#0F0", "good"], ["#FF0", "OK"], ["#F00", "bad"]];
     65var template = true;
     66
     67/* {{"*" + "/"}}
     68
     69template = false;
     70
     71{% for ix, stat in enumerate(stats) %}
     72data[{{ix}}] = {{stat}};
     73{% endfor %}
     74
     75{{"/" + "*"}} */
     76
     77</script>
     78</body>
     79</html>
     80 No newline at end of file
  • sage/doctest/reporting.py

    diff --git a/sage/doctest/reporting.py b/sage/doctest/reporting.py
    a b  
    3131#                  http://www.gnu.org/licenses/
    3232#*****************************************************************************
    3333
    34 
    3534import sys, signal
    3635from sage.structure.sage_object import SageObject
    3736from sage.doctest.util import count_noun
    3837from sage.doctest.sources import DictAsObject
     38from sage.doctest.parsing import MarkedOutput
    3939
    4040def signal_name(sig):
    4141    """
     
    421421                                            log("    %s not run"%(count_noun(nskipped, tag + " test")))
    422422                                if untested:
    423423                                    log ("    %s skipped"%(count_noun(untested, "%stest"%("other " if seen_other else ""))))
     424
     425                    db = self.controller.db
     426                    if db:
     427                        # File-level profile data.
     428                        data = dict(path=source.path, basename=runner.basename, doctest=runner.basename, run_id=run_id, type='file')
     429                        for attr in ('cputime', 'walltime', 'failures'):
     430                            data[attr] = getattr(runner, attr)
     431                        insert_timing_data(db, data, ['doctest', 'basename'])
     432
     433                        all = []
     434                        for test in doctests:
     435                            test_data = {
     436                                'basename': runner.basename,
     437                                'lineno': test.lineno,
     438                                'doctest': test.name,
     439                                'run_id': run_id,
     440                                'type': 'example',
     441                            }
     442                            doctest_cputime = doctest_walltime = 0
     443                            doctest_total_state = None
     444                            for example in test.examples:
     445                                # Update doctest info.
     446                                if doctest_total_state is None:
     447                                    doctest_total_state = example.total_state
     448                                doctest_cputime += example.cputime
     449                                doctest_walltime += example.walltime
     450                                # Update Example info.
     451                                data = dict(test_data)
     452                                try:
     453                                    data['source'] = 'sage: ' + example.sage_source
     454                                except AttributeError:
     455                                    data['source'] = example.source
     456                                if example.predecessors is not None:
     457                                    data['predecessors'] = sorted(e.sequence_number for e in example.predecessors)
     458                                data['want'] = str(example.want)
     459                                if isinstance(example.want, MarkedOutput):
     460                                    data['want_markings'] = example.want.__dict__
     461                                for attr in ('exc_msg', 'lineno', 'options',
     462                                    'sequence_number', 'cputime', 'walltime', 'total_state', 'doctest_state'):
     463                                    data[attr] = getattr(example, attr)
     464                                try:
     465                                    data['running_state'] = example.running_state
     466                                except AttributeError:
     467                                    # Optional timing.
     468                                    pass
     469                                all.append(data)
     470                            doctest_data = dict(total_state=doctest_total_state,
     471                                                cputime=doctest_cputime,
     472                                                walltime=doctest_walltime,
     473                                                **test_data)
     474                            doctest_data['type'] = 'doctest'
     475                            all.append(doctest_data)
     476                        insert_timing_data(db, all, ['total_state','doctest_state','running_state','run_id'])
     477                        db.commit()
     478
    424479                    log("    [%s, %s%.1f s]" % (count_noun(ntests, "test"), "%s, "%(count_noun(f, "failure")) if f else "", wall))
    425480            self.sources_completed += 1
    426481
  • sage/doctest/sources.py

    diff --git a/sage/doctest/sources.py b/sage/doctest/sources.py
    a b  
    300300        else:
    301301            return doctests, extras
    302302
     303    def load_file(self):
     304        if self.path:
     305            base, filename = os.path.split(self.path)
     306            _, ext = os.path.splitext(filename)
     307            if not self.in_lib and ext in ('.py', '.pyx', '.sage', '.spyx'):
     308                cwd = os.getcwd()
     309                if base:
     310                    os.chdir(base)
     311                if os.path.exists(self.path):
     312                    load(filename, namespace) # errors raised here will be caught in DocTestTask
     313                else:
     314                    log("Unable to locate %s"%(self.path))
     315                if base:
     316                    os.chdir(cwd)
     317        else:
     318            log("Unable to locate %s"%(self.basename))
     319
    303320class StringDocTestSource(DocTestSource):
    304321    """
    305322    This class creates doctests from a string.
     
    615632        if not os.path.exists(self.path):
    616633            import errno
    617634            raise IOError(errno.ENOENT, "File does not exist", self.path)
    618         base, filename = os.path.split(self.path)
    619         _, ext = os.path.splitext(filename)
    620         if not self.in_lib and ext in ('.py', '.pyx', '.sage', '.spyx'):
    621             cwd = os.getcwd()
    622             if base:
    623                 os.chdir(base)
    624             load(filename, namespace) # errors raised here will be caught in DocTestTask
    625             if base:
    626                 os.chdir(cwd)
     635        self.load_file()
    627636        self.qualified_name = NestedName(self.basename)
    628637        return self._create_doctests(namespace)
    629638
     
    14141423                                                self.printpath, start + 1)
    14151424        return [outer_doctest] + inner_doctests
    14161425
     1426class DatabaseDocTestSource(DocTestSource):
     1427    """
     1428    This class creates doctests from the timings database.
     1429
     1430    EXAMPLES::
     1431
     1432        sage: from sage.doctest.sources import DatabaseDoctestSource
     1433        sage: import os
     1434        sage: filename = os.path.join(os.environ['SAGE_ROOT'],'devel','sage','sage','doctest','sources.py')
     1435        sage: FDS = FileDoctestSource(filename,True,False,set(['sage']),None)
     1436        sage: FDS.basename
     1437        'sage.doctest.sources'
     1438    """
     1439    def __init__(self, db, path, basename, run_id, randorder):
     1440        self.db = db
     1441        self.given_path = path
     1442        self.basename = basename
     1443        self.run_id = run_id
     1444        self.randorder = randorder
     1445
     1446    @lazy_attribute
     1447    def path(self):
     1448        if self.given_path is None or not os.path.exists(self.given_path):
     1449            ext_list = (os.path.splitext(self.given_path)[1],) if self.given_path else ('.py', '.pyx', '.sage', '.spyx')
     1450            new_path = None
     1451            basename = self.basename
     1452            if '.' in basename:
     1453                try:
     1454                    module_name = basename[basename.rindex('.') + 1:]
     1455                    pkg = __import__(basename[:basename.rindex('.')]).__path__[0]
     1456                    for ext in ext_list:
     1457                        candidate = os.path.join(pkg, module_name + ext)
     1458                        if os.path.exists(candidate):
     1459                            new_path = candidate
     1460                except ImportError:
     1461                    pass
     1462            else:
     1463                for lib_path in sys.path:
     1464                    for ext in ext_list:
     1465                        candidate = os.path.join(lib_path, basename + ext)
     1466                        if os.path.exists(candidate):
     1467                            new_path = candidate
     1468                            break
     1469                    if new_path is not None:
     1470                        break
     1471            if new_path is not None:
     1472                return new_path
     1473        return self.given_path
     1474
     1475    def create_doctests(self, namespace):
     1476        self.load_file()
     1477        conn = dbwrap.DBWrapper(self.db)
     1478        doctest_name = None
     1479        tests = []
     1480        for example in conn.execute("select * from timings where type='example' and run_id=? and basename=?", self.run_id, self.basename):
     1481            example = DictAsObject(example)
     1482            if doctest_name != example.doctest:
     1483                doctest_name = example.doctest
     1484                test = doctest.DocTest([], namespace, example.doctest, example.basename, example.lineno, None)
     1485                tests.append(test)
     1486            if 'want_markings' in example and example.want_markings is not None:
     1487                want = MarkedOutput(example.want).update(**example.want_markings)
     1488            else:
     1489                want = example.want
     1490            example = doctest.Example(example.source, want, example.exc_msg, example.lineno, 0, example.options)
     1491            if example.source.startswith('sage: '):
     1492                example.sage_source = example.source[6:]
     1493                example.source = preparse(example.sage_source)
     1494            tests[-1].examples.append(example)
     1495        return tests
     1496
    14171497class DictAsObject(dict):
    14181498    """
    14191499    A simple subclass of dict that inserts the items from the initializing dictionary into attributes.
  • new file sage/doctest/visualize.py

    diff --git a/sage/doctest/visualize.py b/sage/doctest/visualize.py
    new file mode 100644
    - +  
     1import optparse, os
     2from collections import defaultdict
     3from dbwrap import *
     4
     5from sage.all import DiGraph
     6
     7def doctest_graph(db, source):
     8    latest = list(db.execute("select run_id from timings where source = ? order by run_id limit 1", source))
     9    if not latest:
     10        raise ValueError, "No such doctest: '%s'" % source
     11    run_id = latest[0]['run_id']
     12    filter_clause = "type='example' and run_id=? and doctest in (select doctest from timings where source=? and run_id=?)"
     13    filter_args = run_id, source, run_id
     14    counts = db.execute("select count(*) from timings where source=? and " + filter_clause, source, *filter_args).next()["count(*)"]
     15    print counts, "occurrences"
     16    states = len(list(db.execute("select running_state from timings where source=? and " + filter_clause + " group by running_state", source, *filter_args)))
     17    print states, "initial states"
     18    query = "select doctest, sequence_number, source, predecessors from timings where " + filter_clause
     19    examples = dict()
     20    successors = defaultdict(list)
     21    predecessors = defaultdict(list)
     22    all = db.execute(query, *filter_args)
     23    for example in all:
     24        if example['source'] == source and False:
     25            print example
     26        doctest = example['doctest']
     27        num = example['sequence_number']
     28        examples[doctest, num] = example['source']
     29        for predecessor in example['predecessors']:
     30            predecessors[doctest, num].append((doctest, predecessor))
     31            successors[doctest, predecessor].append((doctest, num))
     32    G = DiGraph()
     33    def extend_forward(node):
     34        for s in successors[node]:
     35            G.add_edge(examples[node], examples[s])
     36            extend_forward(s)
     37    def extend_backward(node):
     38        for s in predecessors[node]:
     39            G.add_edge(examples[s], examples[node])
     40            extend_backward(s)
     41    for example in db.execute("select doctest, sequence_number from timings where run_id=? and source=?", run_id, source):
     42        node = (example['doctest'], example['sequence_number'])
     43        extend_forward(node)
     44        extend_backward(node)
     45    return G
     46   
     47   
     48def print_graph(G, root, indent="", inverted=None):
     49    if inverted is None:
     50        # Print both
     51        print_graph(G.reverse(), root, indent, inverted=True)
     52        print
     53        print_graph(G, root, indent, inverted=False)
     54        return
     55    if not inverted:
     56        print indent, root.strip()
     57    for node in G.neighbor_out_iterator(root):
     58        print_graph(G, node, "    " + indent, inverted)
     59    if inverted:
     60        print indent, root.strip()
     61
     62
     63if __name__ == "__main__":
     64
     65    parser = optparse.OptionParser()
     66    parser.add_option("-d", "--database", dest="database", default="timings.db", metavar="/path/to/db.sqlite")
     67    parser.add_option("-r", "--run_id", dest="run_id")
     68    parser.add_option("-o", "--output", dest="output", default="doctest")
     69    (options, args) = parser.parse_args()
     70
     71    assert len(args) == 1
     72    source = args[0]
     73    db = DBWrapper(options.database)
     74    G = doctest_graph(db, source + "\n")
     75    print_graph(G, source + "\n")
     76    G.save(options.output + ".sobj")
     77    G.plot().plot(figsize=50).save(options.output + ".png")
     78    G.graphviz_to_file_named(options.output + ".dot")
     79    os.system("unset LIBRARY_PATH; unset LD_LIBRARY_PATH; unset DYLD_LIBRARY_PATH; dot -Tpng -O " + options.output + ".dot")
     80