Ticket #5079: 5079-preparse-3.patch

File 5079-preparse-3.patch, 9.9 KB (added by robertwb, 12 years ago)
  • sage/misc/preparser.py

    # HG changeset patch
    # User Robert Bradshaw <robertwb@math.washington.edu>
    # Date 1232792490 28800
    # Node ID ccd5c3191de06204331bde8c09a287c3bec1fda8
    # Parent  7b34a9ff87bff438a5f607a2d85680c84aa478cf
    Preparser simplification
    
    diff -r 7b34a9ff87bf -r ccd5c3191de0 sage/misc/preparser.py
    a b  
    450450        '[_sage_const_1 .sqrt(), _sage_const_1p2 .sqrt(), 1 , 1.2 , R.1, R0.1, (_sage_const_1 .._sage_const_5 )]'
    451451    """
    452452    return preparse_numeric_literals(code, True)
    453    
     453
     454all_num_regex = None
     455
    454456def preparse_numeric_literals(code, extract=False):
    455457    """
    456458    This preparses numerical literals into their sage counterparts,
     
    463465        sage: from sage.misc.preparser import preparse_numeric_literals
    464466        sage: preparse_numeric_literals("5")
    465467        'Integer(5)'
    466         sage: preparse_numeric_literals("0x05f")
    467         'Integer(0x05f)'
    468468        sage: preparse_numeric_literals("5j")
    469469        "ComplexNumber(0, '5')"
    470470        sage: preparse_numeric_literals("5jr")
     
    497497        'Integer(1).exp()'
    498498        sage: preparse_numeric_literals("1e+10")
    499499        "RealNumber('1e+10')"
     500        sage: preparse_numeric_literals("0x0af")
     501        'Integer(0x0af)'
     502        sage: preparse_numeric_literals("0x10.sqrt()")
     503        'Integer(0x10).sqrt()'
     504        sage: preparse_numeric_literals('0o100')
     505        "Integer('100', 8)"
     506        sage: preparse_numeric_literals('0b111001')
     507        "Integer('111001', 2)"
    500508    """
    501509    literals = {}
    502510    last = 0
    503511    new_code = []
    504     dec_num = r"\b\d+"
    505     hex_num = r"\b0x[0-9a-f]+"
    506     # This is slightly annoying as floating point numbers may start
    507     # with a decimal point, but if they do the \b will not match.
    508     float_num = r"((\b\d+([.]\d*)?)|([.]\d+))(e[-+]?\d+)?"
    509     all_num = r"((%s)|(%s)|(%s))(rj|rL|jr|Lr|j|L|r|)\b" % (float_num, dec_num, hex_num)
    510     for m in re.finditer(all_num, code, re.I):
     512
     513    global all_num_regex
     514    if all_num_regex is None:
     515        dec_num = r"\b\d+"
     516        hex_num = r"\b0x[0-9a-f]+"
     517        oct_num = r"\b0o[0-7]+"
     518        bin_num = r"\b0b[01]+"
     519        # This is slightly annoying as floating point numbers may start
     520        # with a decimal point, but if they do the \b will not match.
     521        float_num = r"((\b\d+([.]\d*)?)|([.]\d+))(e[-+]?\d+)?"
     522        all_num = r"((%s)|(%s)|(%s)|(%s)|(%s))(rj|rL|jr|Lr|j|L|r|)\b" % (float_num, dec_num, hex_num, oct_num, bin_num)
     523        all_num_regex = re.compile(all_num, re.I)
     524       
     525    for m in all_num_regex.finditer(code):
    511526        start, end = m.start(), m.end()
    512527        num = m.group(1)
    513528        postfix = m.groups()[-1].upper()
     
    533548                        # handle 4.sqrt()
    534549                        end -= 1
    535550                        num = num[:-1]
    536             elif end < len(code) and code[end] == '.' and not postfix:
    537                 # \b does not match after the .
     551            elif end < len(code) and code[end] == '.' and not postfix and re.match(r'\d+$', num):
     552                # \b does not match after the . for floating point
    538553                # two dots in a row would be an ellipsis
    539554                if end+1 == len(code) or code[end+1] != '.':
    540555                    end += 1
     
    549564                    num_make = "RealNumber('%s')" % num
    550565            else:
    551566                num_name = numeric_literal_prefix + num
    552                 num_make = "Integer(%s)" % num
     567                if len(num) > 3:
     568                    # Py3 oct and bin support
     569                    if num[1] in 'bB':
     570                        num_make = "Integer('%s', 2)" % num[2:]
     571                    elif num[1] in 'oO':
     572                        num_make = "Integer('%s', 8)" % num[2:]
     573                    else:
     574                        num_make = "Integer(%s)" % num
     575                else:
     576                    num_make = "Integer(%s)" % num
    553577       
    554578            literals[num_name] = num_make
    555579           
     
    772796        # Then can also handle multiple lines more efficiently, but
    773797        # that optimization can be done later.
    774798        L, literals, quote_state = strip_string_literals(line, quote_state)
     799       
     800        # Ellipsis Range
     801        # [1..n]
    775802        try:
    776803            L = parse_ellipsis(L, preparse_step=False)
    777804        except SyntaxError:
    778805            pass
     806           
     807        # Implicit Multiplication
     808        # 2x -> 2*x
    779809        if implicit_mul_level:
    780810            L = implicit_mul(L, level = implicit_mul_level)
     811           
     812        # Wrapping
     813        # 1 + 0.5 -> Integer(1) + RealNumber('0.5')
    781814        L = preparse_numeric_literals(L)
     815       
     816        # Generators
     817        # R.0 -> R.gen(0)
     818        L = re.sub(r'([_a-zA-Z]\w*|[)\]])\.(\d+)', r'\1.gen(\2)', L)
     819
     820        # Use ^ for exponentiation and ^^ for xor
     821        # (A side effect is that **** becomes xor as well.)
     822        L = L.replace('^', '**').replace('****', '^')
     823
    782824        line = L % literals
    783825
    784826
     
    800842        i = line.find('...')
    801843        return line[:i+3] + preparse(line[i+3:], reset=reset, do_time=do_time, ignore_prompts=ignore_prompts)
    802844
    803     # Wrap integers with ZZ() and reals with RR().
    804     def wrap_num(i, line, is_real, num_start):
    805         zz = line[num_start:i]
    806         if is_real or '.' in zz:
    807             if zz[-1] == '.' and i < len(line) and line[i].isalpha():
    808                 # by popular demand -- this allows, e.g., 173.sqrt().
    809                 if '.' in zz[:-1]:
    810                     O = "RealNumber('"; C="')."
    811                 else:
    812                     O = "Integer("; C = ")."
    813                 zz = zz[:-1]
    814             else:
    815                 O = "RealNumber('"; C="')"
    816         else:
    817             O = "Integer("; C = ")"
    818         # Number wrapping handled earlier
    819         if False:
    820             line = line[:num_start] + O + zz + C + line[i:]
    821             return line, len(O+C)
    822         else:
    823             return line, 0
    824    
    825845    i = 0
    826     num_start = -1
    827     in_number = False
    828     is_real = False
    829     is_hex = False
    830 
    831846    in_args = False
    832847
    833848    if reset:
     
    876891                    i += 1
    877892                    continue
    878893
    879         # Decide if we should wrap a particular integer or real literal
    880         if in_number:
    881             if line[i] == ".":
    882                 is_real = True
    883             elif not is_real and i == num_start+1 and line[num_start:i+1].lower() == '0x':
    884                 is_hex = True
    885             elif not (line[i].isdigit() or (is_hex and line[i].lower() in 'abcdef')):
    886                 # end of a number
    887                 # Do we wrap?
    888                 if in_quote():
    889                     # do not wrap
    890                     pass
    891                 elif i < len(line) and line[i] == 'x' and line[i-1] == '0' and num_start==i-1:
    892                     # Yes, hex constant.
    893                     i += 1
    894                     continue
    895                 elif i < len(line) and line[i] in 'eE':
    896                     # Yes, in scientific notation, so will wrap later
    897                     is_real = True
    898                     i += 1   
    899                     if i < len(line) and line[i] == '-':
    900                         i += 2
    901                     continue
    902                 elif i < len(line) and line[i] in 'rR':
    903                     # Raw number so do not wrap; but have to get rid of the "r".
    904                     line = line[:i] + line[i+1:]
    905                 else:
    906                     line, n = wrap_num(i, line, is_real, num_start)
    907                     i += n
    908                 in_number = False
    909                 is_real = False
    910                 continue
    911 
    912         elif line[i] == ";" and not in_quote():
     894        if line[i] == ";" and not in_quote():
    913895            line = line[:i+1] + preparse(line[i+1:], reset, do_time, ignore_prompts, after_semicolon=True)
    914896            i = len(line)
    915897            continue
     
    10281010        #   
    10291011        ####### END CALCULUS ########
    10301012
    1031         # Since we use ^ for exponentiation (see below), we
    1032         # rewrite ^^ to ^ so that XOR is still accessible
    1033         elif line[i:i+2] == "^^" and not in_quote():
    1034             line = line[:i] + "^" + line[i+2:]
    1035             i += 1
    1036             continue
    1037 
    1038         # exponents can be either ^ or **
    1039         elif line[i] == "^" and not in_quote():
    1040             line = line[:i] + "**" + line[i+1:]
    1041             i += 2
    1042             continue
    1043 
    1044         elif line[i] == "." and i > 0 and i < len(line)-1 and not in_quote() and \
    1045                  (isalphadigit_(line[i-1]) or line[i-1] == ")" or line[i-1] == ']') and line[i+1].isdigit():
    1046             # Generators: replace all ".<number>" by ".gen(<number>)"
    1047             # If . is preceeded by \, then replace "\." by ".".
    1048             j = i+1
    1049             while j < len(line) and line[j].isdigit():
    1050                 j += 1
    1051             line = line[:i] + ".gen(" + line[i+1:j] + ")" + line[j:]
    1052             i = j+4
    1053 
    1054         if     not in_number and \
    1055                not in_quote():
     1013        if not in_quote():
    10561014
    10571015            if i < len(line)-1 and line[i] == '\\':
    10581016                j = i+1
     
    10631021                    j += 1
    10641022                line = line[:i] + "._backslash_(" + line[i+1:j] + ')' + line[j:]
    10651023               
    1066             elif (line[i].isdigit() or \
    1067                    (len(line)>i+1 and line[i] == '.' and line[i+1].isdigit())) and \
    1068                (i == 0 or (i > 0 and not (isalphadigit_(line[i-1]) \
    1069                                           or line[i-1] == ')'))):
    1070                 in_number = True
    1071                 num_start = i
    1072            
     1024
    10731025        # Decide if we hit a comment, so we're done.
    10741026        if line[i] == '#' and not (in_single_quote or in_double_quote or in_triple_quote):
    10751027            i = len(line)
    10761028            break
    10771029
    10781030        i += 1
    1079 
    1080     if in_number:
    1081         line, _ = wrap_num(i, line, is_real, num_start)
    10821031
    10831032    # Time command like in MAGMA: (commented out, since it's standard in IPython now)
    10841033    L = line.lstrip()