Ticket #5079: 5079-preparse-2.patch

File 5079-preparse-2.patch, 11.9 KB (added by robertwb, 12 years ago)
  • sage/misc/preparser.py

    # HG changeset patch
    # User Robert Bradshaw <robertwb@math.washington.edu>
    # Date 1232786722 28800
    # Node ID 796a97819c4cf4e1d669072a9c5eaf9cb25b4a87
    # Parent  c2c72812a6267570c06b20e74954fefc638cdf05
    [mq]: preparse2
    
    diff -r c2c72812a626 -r 796a97819c4c sage/misc/preparser.py
    a b  
    194194      'Integer(2)*x'
    195195      sage: implicit_multiplication(False)
    196196      sage: preparse('2x')
    197       'Integer(2)x'
     197      '2x'
    198198    """
    199199    from sage.plot.plot import EMBEDDED_MODE
    200200    if EMBEDDED_MODE:
     
    228228    return in_single_quote or in_double_quote or in_triple_quote
    229229   
    230230
    231 def strip_string_literals(code):
     231def strip_string_literals(code, state=None):
    232232    r"""
    233233    Returns a string with all literal quotes replaced with
    234234    labels and a dict of labels for re-subsitution. This makes
     
    236236   
    237237    EXAMPLES:
    238238        sage: from sage.misc.preparser import strip_string_literals
    239         sage: s, literals = strip_string_literals(r'''['a', "b", 'c', "d\""]''')
     239        sage: s, literals, state = strip_string_literals(r'''['a', "b", 'c', "d\""]''')
    240240        sage: s
    241241        '[%(L1)s, %(L2)s, %(L3)s, %(L4)s]'
    242242        sage: literals
     
    247247        -%(L1)s-%(L2)s-
    248248       
    249249    Triple-quotes are handled as well.
    250         sage: s, literals = strip_string_literals("[a, '''b''', c, '']")
     250        sage: s, literals, state = strip_string_literals("[a, '''b''', c, '']")
    251251        sage: s
    252252        '[a, %(L1)s, c, %(L2)s]'
    253253        sage: print s % literals
    254254        [a, '''b''', c, '']
    255255
    256256    Comments are subsituted too:
    257         sage: s, literals = strip_string_literals("code '#' # ccc 't'"); s
     257        sage: s, literals, state = strip_string_literals("code '#' # ccc 't'"); s
    258258        'code %(L1)s #%(L2)s'
    259259        sage: s % literals
    260260        "code '#' # ccc 't'"
     261       
     262    A state is returned so one can break strings across multiple calls to
     263    this function:
     264        sage: s, literals, state = strip_string_literals('s = "some'); s
     265        's = %(L1)s'
     266        sage: s, literals, state = strip_string_literals('thing" * 5', state); s
     267        '%(L1)s * 5'
    261268    """
    262269    new_code = []
    263270    literals = {}
    264271    counter = 0
    265272    start = q = 0
    266     in_quote = False
    267     raw = False
     273    if state is None:
     274        in_quote = False
     275        raw = False
     276    else:
     277        in_quote, raw = state
    268278    while True:
    269279        sig_q = code.find("'", q)
    270280        dbl_q = code.find('"', q)
     
    282292            new_code.append("#%%(%s)s" % label)
    283293            start = q = newline
    284294        elif q == -1:
    285             new_code.append(code[start:].replace('%','%%'))
     295            if in_quote:
     296                counter += 1
     297                label = "L%s" % counter
     298                literals[label] = code[start:]
     299                new_code.append("%%(%s)s" % label)
     300            else:
     301                new_code.append(code[start:].replace('%','%%'))
    286302            break
    287303        elif in_quote:
    288304            if not raw and code[q-1] == '\\':
     
    302318            else:
    303319                q += 1
    304320        else:
    305             raw = q>0 and code[q-1] in ['r', 'R']
     321            raw = q>0 and code[q-1] in 'rR'
    306322            if len(code) >= q+3 and (code[q+1] == code[q] == code[q+2]):
    307323                in_quote = code[q]*3
    308324            else:
     
    310326            new_code.append(code[start:q].replace('%', '%%'))
    311327            start = q
    312328            q += len(in_quote)
    313            
    314     return "".join(new_code), literals
     329   
     330    return "".join(new_code), literals, (in_quote, raw)
    315331
    316332
    317333def containing_block(code, ix, delimiters=['()','[]','{}'], require_delim=True):
     
    447463        sage: from sage.misc.preparser import preparse_numeric_literals
    448464        sage: preparse_numeric_literals("5")
    449465        'Integer(5)'
     466        sage: preparse_numeric_literals("0x05f")
     467        'Integer(0x05f)'
    450468        sage: preparse_numeric_literals("5j")
    451469        "ComplexNumber(0, '5')"
    452470        sage: preparse_numeric_literals("5jr")
     
    479497        'Integer(1).exp()'
    480498        sage: preparse_numeric_literals("1e+10")
    481499        "RealNumber('1e+10')"
    482        
    483     It may be overly forgiving in some cases:
    484         sage: preparse_numeric_literals("0xArrrrr")
    485         '0xA'
    486500    """
    487501    literals = {}
    488502    last = 0
    489503    new_code = []
    490504    dec_num = r"\b\d+"
    491     hex_num = r"\b0x[0-9a-fA-F]"
     505    hex_num = r"\b0x[0-9a-f]+"
    492506    # This is slightly annoying as floating point numbers may start
    493507    # with a decimal point, but if they do the \b will not match.
    494     float_num = r"((\b\d+([.]\d*)?)|([.]\d+))([eE][-+]?\d+)?"
    495     all_num = r"((%s)|(%s)|(%s))([RrLlJj]*)\b" % (float_num, dec_num, hex_num)
    496     for m in re.finditer(all_num, code):
     508    float_num = r"((\b\d+([.]\d*)?)|([.]\d+))(e[-+]?\d+)?"
     509    all_num = r"((%s)|(%s)|(%s))(rj|rL|jr|Lr|j|L|r|)\b" % (float_num, dec_num, hex_num)
     510    for m in re.finditer(all_num, code, re.I):
    497511        start, end = m.start(), m.end()
    498512        num = m.group(1)
    499513        postfix = m.groups()[-1].upper()
     
    515529                        # handle R.0
    516530                        continue
    517531                elif end < len(code) and num[-1] == '.':
    518                     if re.match('[a-zA-Z]', code[end]):
     532                    if re.match('[a-zA-Z_]', code[end]):
    519533                        # handle 4.sqrt()
    520534                        end -= 1
    521535                        num = num[:-1]
     
    602616        Vanilla:
    603617       
    604618        sage: preparse("R.<x> = ZZ['x']")
    605         "R = ZZ['x']; (x,) = R._first_ngens(Integer(1))"
     619        "R = ZZ['x']; (x,) = R._first_ngens(1)"
    606620        sage: preparse("R.<x,y> = ZZ['x,y']")
    607         "R = ZZ['x,y']; (x, y,) = R._first_ngens(Integer(2))"
     621        "R = ZZ['x,y']; (x, y,) = R._first_ngens(2)"
    608622
    609623        No square brackets:
    610624
    611625        sage: preparse("R.<x> = PolynomialRing(ZZ, 'x')")
    612         "R = PolynomialRing(ZZ, 'x',names=('x',)); (x,) = R._first_ngens(Integer(1))"
     626        "R = PolynomialRing(ZZ, 'x',names=('x',)); (x,) = R._first_ngens(1)"
    613627        sage: preparse("R.<x,y> = PolynomialRing(ZZ, 'x,y')")
    614         "R = PolynomialRing(ZZ, 'x,y',names=('x', 'y')); (x, y,) = R._first_ngens(Integer(2))"
     628        "R = PolynomialRing(ZZ, 'x,y',names=('x', 'y')); (x, y,) = R._first_ngens(2)"
    615629
    616630        Names filled in:
    617631
    618632        sage: preparse("R.<x> = ZZ[]")
    619         "R = ZZ['x']; (x,) = R._first_ngens(Integer(1))"
     633        "R = ZZ['x']; (x,) = R._first_ngens(1)"
    620634        sage: preparse("R.<x,y> = ZZ[]")
    621         "R = ZZ['x, y']; (x, y,) = R._first_ngens(Integer(2))"
     635        "R = ZZ['x, y']; (x, y,) = R._first_ngens(2)"
    622636
    623637        Names given not the same as generator names:
    624638
    625639        sage: preparse("R.<x> = ZZ['y']")
    626         "R = ZZ['y']; (x,) = R._first_ngens(Integer(1))"
     640        "R = ZZ['y']; (x,) = R._first_ngens(1)"
    627641        sage: preparse("R.<x,y> = ZZ['u,v']")
    628         "R = ZZ['u,v']; (x, y,) = R._first_ngens(Integer(2))"
     642        "R = ZZ['u,v']; (x, y,) = R._first_ngens(2)"
    629643
    630644        Number fields:
    631645
    632646        sage: preparse("K.<a> = QQ[2^(1/3)]")
    633         'K = QQ[Integer(2)**(Integer(1)/Integer(3))]; (a,) = K._first_ngens(Integer(1))'
     647        'K = QQ[Integer(2)**(Integer(1)/Integer(3))]; (a,) = K._first_ngens(1)'
    634648        sage: preparse("K.<a, b> = QQ[2^(1/3), 2^(1/2)]")
    635         'K = QQ[Integer(2)**(Integer(1)/Integer(3)), Integer(2)**(Integer(1)/Integer(2))]; (a, b,) = K._first_ngens(Integer(2))'
     649        'K = QQ[Integer(2)**(Integer(1)/Integer(3)), Integer(2)**(Integer(1)/Integer(2))]; (a, b,) = K._first_ngens(2)'
    636650
    637651        Just the .<> notation:
    638652
    639653        sage: preparse("R.<x> = ZZx")
    640         'R = ZZx; (x,) = R._first_ngens(Integer(1))'
     654        'R = ZZx; (x,) = R._first_ngens(1)'
    641655        sage: preparse("R.<x, y> = a+b")
    642         'R = a+b; (x, y,) = R._first_ngens(Integer(2))'
     656        'R = a+b; (x, y,) = R._first_ngens(2)'
    643657
    644658        Ensure we don't eat too much:
    645659
    646660        sage: preparse("R.<x, y> = ZZ;2")
    647         'R = ZZ; (x, y,) = R._first_ngens(Integer(2));Integer(2)'
     661        'R = ZZ; (x, y,) = R._first_ngens(2);Integer(2)'
    648662        sage: preparse("R.<x, y> = ZZ['x,y'];2")
    649         "R = ZZ['x,y']; (x, y,) = R._first_ngens(Integer(2));Integer(2)"
     663        "R = ZZ['x,y']; (x, y,) = R._first_ngens(2);Integer(2)"
    650664    """
    651665    i = start_index
    652666    if not line.startswith(".<", i):
     
    721735    return (line, i)
    722736
    723737eq_chars_pre = ["=", "!", ">", "<", "+", "-", "*", "/", "^"]
     738quote_state = None
    724739
    725 def preparse(line, reset=True, do_time=False, ignore_prompts=False):
     740def preparse(line, reset=True, do_time=False, ignore_prompts=False, after_semicolon=False):
    726741    r"""
    727742    sage: preparse("ZZ.<x> = ZZ['x']")
    728     "ZZ = ZZ['x']; (x,) = ZZ._first_ngens(Integer(1))"
     743    "ZZ = ZZ['x']; (x,) = ZZ._first_ngens(1)"
    729744    sage: preparse("ZZ.<x> = ZZ['y']")
    730     "ZZ = ZZ['y']; (x,) = ZZ._first_ngens(Integer(1))"
     745    "ZZ = ZZ['y']; (x,) = ZZ._first_ngens(1)"
    731746    sage: preparse("ZZ.<x,y> = ZZ[]")
    732     "ZZ = ZZ['x, y']; (x, y,) = ZZ._first_ngens(Integer(2))"
     747    "ZZ = ZZ['x, y']; (x, y,) = ZZ._first_ngens(2)"
    733748    sage: preparse("ZZ.<x,y> = ZZ['u,v']")
    734     "ZZ = ZZ['u,v']; (x, y,) = ZZ._first_ngens(Integer(2))"
     749    "ZZ = ZZ['u,v']; (x, y,) = ZZ._first_ngens(2)"
    735750    sage: preparse("ZZ.<x> = QQ[2^(1/3)]")
    736     'ZZ = QQ[Integer(2)**(Integer(1)/Integer(3))]; (x,) = ZZ._first_ngens(Integer(1))'
     751    'ZZ = QQ[Integer(2)**(Integer(1)/Integer(3))]; (x,) = ZZ._first_ngens(1)'
    737752    sage: QQ[2^(1/3)]
    738753    Number Field in a with defining polynomial x^3 - 2
    739754
     
    748763    sage: 9^^1
    749764    8
    750765    """
    751     try:
    752         # [1,2,..,n] notation
    753         L, literals = strip_string_literals(line)
    754         L = parse_ellipsis(L, preparse_step=False)
     766    global quote_state
     767    if reset:
     768        quote_state = None
     769
     770    if not after_semicolon:
     771        # This part handles lines with semi-colons all at once
     772        # Then can also handle multiple lines more efficiently, but
     773        # that optimization can be done later.
     774        L, literals, quote_state = strip_string_literals(line, quote_state)
     775        try:
     776            L = parse_ellipsis(L, preparse_step=False)
     777        except SyntaxError:
     778            pass
     779        if implicit_mul_level:
     780            L = implicit_mul(L, level = implicit_mul_level)
     781        L = preparse_numeric_literals(L)
    755782        line = L % literals
    756     except SyntaxError:
    757         pass
    758783
    759     if implicit_mul_level:
    760         line = implicit_mul(line, level = implicit_mul_level)
    761784
    762785    # find where the parens are for function assignment notation
    763786    oparen_index = -1
     
    792815                O = "RealNumber('"; C="')"
    793816        else:
    794817            O = "Integer("; C = ")"
    795         line = line[:num_start] + O + zz + C + line[i:]
    796         return line, len(O+C)
     818        # Number wrapping handled earlier
     819        if False:
     820            line = line[:num_start] + O + zz + C + line[i:]
     821            return line, len(O+C)
     822        else:
     823            return line, 0
    797824   
    798825    i = 0
    799826    num_start = -1
     
    883910                continue
    884911
    885912        elif line[i] == ";" and not in_quote():
    886             line = line[:i+1] + preparse(line[i+1:], reset, do_time, ignore_prompts)
     913            line = line[:i+1] + preparse(line[i+1:], reset, do_time, ignore_prompts, after_semicolon=True)
    887914            i = len(line)
    888915            continue
    889916
     
    10991126        numeric_literals = False
    11001127   
    11011128    if numeric_literals:
    1102         contents, literals = strip_string_literals(contents)
     1129        contents, literals, state = strip_string_literals(contents)
    11031130        contents, nums = extract_numeric_literals(contents)
    11041131        contents = contents % literals
    11051132        if nums:
     
    12091236                                          code[m.end():])
    12101237        return code
    12111238       
    1212     code, literals = strip_string_literals(code)
     1239    code, literals, state = strip_string_literals(code)
    12131240    if level >= 1:
    12141241        no_mul_token = " '''_no_mult_token_''' "
    12151242        code = re.sub(r'\b0x', r'0%sx' % no_mul_token, code)  # hex digits