# HG changeset patch
# User Robert Bradshaw <robertwb@math.washington.edu>
# Date 1232792490 28800
# Node ID ccd5c3191de06204331bde8c09a287c3bec1fda8
# Parent 7b34a9ff87bff438a5f607a2d85680c84aa478cf
Preparser simplification
diff -r 7b34a9ff87bf -r ccd5c3191de0 sage/misc/preparser.py
|
a
|
b
|
|
| 450 | 450 | '[_sage_const_1 .sqrt(), _sage_const_1p2 .sqrt(), 1 , 1.2 , R.1, R0.1, (_sage_const_1 .._sage_const_5 )]' |
| 451 | 451 | """ |
| 452 | 452 | return preparse_numeric_literals(code, True) |
| 453 | | |
| | 453 | |
| | 454 | all_num_regex = None |
| | 455 | |
| 454 | 456 | def preparse_numeric_literals(code, extract=False): |
| 455 | 457 | """ |
| 456 | 458 | This preparses numerical literals into their sage counterparts, |
| … |
… |
|
| 463 | 465 | sage: from sage.misc.preparser import preparse_numeric_literals |
| 464 | 466 | sage: preparse_numeric_literals("5") |
| 465 | 467 | 'Integer(5)' |
| 466 | | sage: preparse_numeric_literals("0x05f") |
| 467 | | 'Integer(0x05f)' |
| 468 | 468 | sage: preparse_numeric_literals("5j") |
| 469 | 469 | "ComplexNumber(0, '5')" |
| 470 | 470 | sage: preparse_numeric_literals("5jr") |
| … |
… |
|
| 497 | 497 | 'Integer(1).exp()' |
| 498 | 498 | sage: preparse_numeric_literals("1e+10") |
| 499 | 499 | "RealNumber('1e+10')" |
| | 500 | sage: preparse_numeric_literals("0x0af") |
| | 501 | 'Integer(0x0af)' |
| | 502 | sage: preparse_numeric_literals("0x10.sqrt()") |
| | 503 | 'Integer(0x10).sqrt()' |
| | 504 | sage: preparse_numeric_literals('0o100') |
| | 505 | "Integer('100', 8)" |
| | 506 | sage: preparse_numeric_literals('0b111001') |
| | 507 | "Integer('111001', 2)" |
| 500 | 508 | """ |
| 501 | 509 | literals = {} |
| 502 | 510 | last = 0 |
| 503 | 511 | new_code = [] |
| 504 | | dec_num = r"\b\d+" |
| 505 | | hex_num = r"\b0x[0-9a-f]+" |
| 506 | | # This is slightly annoying as floating point numbers may start |
| 507 | | # with a decimal point, but if they do the \b will not match. |
| 508 | | float_num = r"((\b\d+([.]\d*)?)|([.]\d+))(e[-+]?\d+)?" |
| 509 | | all_num = r"((%s)|(%s)|(%s))(rj|rL|jr|Lr|j|L|r|)\b" % (float_num, dec_num, hex_num) |
| 510 | | for m in re.finditer(all_num, code, re.I): |
| | 512 | |
| | 513 | global all_num_regex |
| | 514 | if all_num_regex is None: |
| | 515 | dec_num = r"\b\d+" |
| | 516 | hex_num = r"\b0x[0-9a-f]+" |
| | 517 | oct_num = r"\b0o[0-7]+" |
| | 518 | bin_num = r"\b0b[01]+" |
| | 519 | # This is slightly annoying as floating point numbers may start |
| | 520 | # with a decimal point, but if they do the \b will not match. |
| | 521 | float_num = r"((\b\d+([.]\d*)?)|([.]\d+))(e[-+]?\d+)?" |
| | 522 | all_num = r"((%s)|(%s)|(%s)|(%s)|(%s))(rj|rL|jr|Lr|j|L|r|)\b" % (float_num, dec_num, hex_num, oct_num, bin_num) |
| | 523 | all_num_regex = re.compile(all_num, re.I) |
| | 524 | |
| | 525 | for m in all_num_regex.finditer(code): |
| 511 | 526 | start, end = m.start(), m.end() |
| 512 | 527 | num = m.group(1) |
| 513 | 528 | postfix = m.groups()[-1].upper() |
| … |
… |
|
| 533 | 548 | # handle 4.sqrt() |
| 534 | 549 | end -= 1 |
| 535 | 550 | num = num[:-1] |
| 536 | | elif end < len(code) and code[end] == '.' and not postfix: |
| 537 | | # \b does not match after the . |
| | 551 | elif end < len(code) and code[end] == '.' and not postfix and re.match(r'\d+$', num): |
| | 552 | # \b does not match after the . for floating point |
| 538 | 553 | # two dots in a row would be an ellipsis |
| 539 | 554 | if end+1 == len(code) or code[end+1] != '.': |
| 540 | 555 | end += 1 |
| … |
… |
|
| 549 | 564 | num_make = "RealNumber('%s')" % num |
| 550 | 565 | else: |
| 551 | 566 | num_name = numeric_literal_prefix + num |
| 552 | | num_make = "Integer(%s)" % num |
| | 567 | if len(num) > 3: |
| | 568 | # Py3 oct and bin support |
| | 569 | if num[1] in 'bB': |
| | 570 | num_make = "Integer('%s', 2)" % num[2:] |
| | 571 | elif num[1] in 'oO': |
| | 572 | num_make = "Integer('%s', 8)" % num[2:] |
| | 573 | else: |
| | 574 | num_make = "Integer(%s)" % num |
| | 575 | else: |
| | 576 | num_make = "Integer(%s)" % num |
| 553 | 577 | |
| 554 | 578 | literals[num_name] = num_make |
| 555 | 579 | |
| … |
… |
|
| 772 | 796 | # Then can also handle multiple lines more efficiently, but |
| 773 | 797 | # that optimization can be done later. |
| 774 | 798 | L, literals, quote_state = strip_string_literals(line, quote_state) |
| | 799 | |
| | 800 | # Ellipsis Range |
| | 801 | # [1..n] |
| 775 | 802 | try: |
| 776 | 803 | L = parse_ellipsis(L, preparse_step=False) |
| 777 | 804 | except SyntaxError: |
| 778 | 805 | pass |
| | 806 | |
| | 807 | # Implicit Multiplication |
| | 808 | # 2x -> 2*x |
| 779 | 809 | if implicit_mul_level: |
| 780 | 810 | L = implicit_mul(L, level = implicit_mul_level) |
| | 811 | |
| | 812 | # Wrapping |
| | 813 | # 1 + 0.5 -> Integer(1) + RealNumber('0.5') |
| 781 | 814 | L = preparse_numeric_literals(L) |
| | 815 | |
| | 816 | # Generators |
| | 817 | # R.0 -> R.gen(0) |
| | 818 | L = re.sub(r'([_a-zA-Z]\w*|[)\]])\.(\d+)', r'\1.gen(\2)', L) |
| | 819 | |
| | 820 | # Use ^ for exponentiation and ^^ for xor |
| | 821 | # (A side effect is that **** becomes xor as well.) |
| | 822 | L = L.replace('^', '**').replace('****', '^') |
| | 823 | |
| 782 | 824 | line = L % literals |
| 783 | 825 | |
| 784 | 826 | |
| … |
… |
|
| 800 | 842 | i = line.find('...') |
| 801 | 843 | return line[:i+3] + preparse(line[i+3:], reset=reset, do_time=do_time, ignore_prompts=ignore_prompts) |
| 802 | 844 | |
| 803 | | # Wrap integers with ZZ() and reals with RR(). |
| 804 | | def wrap_num(i, line, is_real, num_start): |
| 805 | | zz = line[num_start:i] |
| 806 | | if is_real or '.' in zz: |
| 807 | | if zz[-1] == '.' and i < len(line) and line[i].isalpha(): |
| 808 | | # by popular demand -- this allows, e.g., 173.sqrt(). |
| 809 | | if '.' in zz[:-1]: |
| 810 | | O = "RealNumber('"; C="')." |
| 811 | | else: |
| 812 | | O = "Integer("; C = ")." |
| 813 | | zz = zz[:-1] |
| 814 | | else: |
| 815 | | O = "RealNumber('"; C="')" |
| 816 | | else: |
| 817 | | O = "Integer("; C = ")" |
| 818 | | # Number wrapping handled earlier |
| 819 | | if False: |
| 820 | | line = line[:num_start] + O + zz + C + line[i:] |
| 821 | | return line, len(O+C) |
| 822 | | else: |
| 823 | | return line, 0 |
| 824 | | |
| 825 | 845 | i = 0 |
| 826 | | num_start = -1 |
| 827 | | in_number = False |
| 828 | | is_real = False |
| 829 | | is_hex = False |
| 830 | | |
| 831 | 846 | in_args = False |
| 832 | 847 | |
| 833 | 848 | if reset: |
| … |
… |
|
| 876 | 891 | i += 1 |
| 877 | 892 | continue |
| 878 | 893 | |
| 879 | | # Decide if we should wrap a particular integer or real literal |
| 880 | | if in_number: |
| 881 | | if line[i] == ".": |
| 882 | | is_real = True |
| 883 | | elif not is_real and i == num_start+1 and line[num_start:i+1].lower() == '0x': |
| 884 | | is_hex = True |
| 885 | | elif not (line[i].isdigit() or (is_hex and line[i].lower() in 'abcdef')): |
| 886 | | # end of a number |
| 887 | | # Do we wrap? |
| 888 | | if in_quote(): |
| 889 | | # do not wrap |
| 890 | | pass |
| 891 | | elif i < len(line) and line[i] == 'x' and line[i-1] == '0' and num_start==i-1: |
| 892 | | # Yes, hex constant. |
| 893 | | i += 1 |
| 894 | | continue |
| 895 | | elif i < len(line) and line[i] in 'eE': |
| 896 | | # Yes, in scientific notation, so will wrap later |
| 897 | | is_real = True |
| 898 | | i += 1 |
| 899 | | if i < len(line) and line[i] == '-': |
| 900 | | i += 2 |
| 901 | | continue |
| 902 | | elif i < len(line) and line[i] in 'rR': |
| 903 | | # Raw number so do not wrap; but have to get rid of the "r". |
| 904 | | line = line[:i] + line[i+1:] |
| 905 | | else: |
| 906 | | line, n = wrap_num(i, line, is_real, num_start) |
| 907 | | i += n |
| 908 | | in_number = False |
| 909 | | is_real = False |
| 910 | | continue |
| 911 | | |
| 912 | | elif line[i] == ";" and not in_quote(): |
| | 894 | if line[i] == ";" and not in_quote(): |
| 913 | 895 | line = line[:i+1] + preparse(line[i+1:], reset, do_time, ignore_prompts, after_semicolon=True) |
| 914 | 896 | i = len(line) |
| 915 | 897 | continue |
| … |
… |
|
| 1028 | 1010 | # |
| 1029 | 1011 | ####### END CALCULUS ######## |
| 1030 | 1012 | |
| 1031 | | # Since we use ^ for exponentiation (see below), we |
| 1032 | | # rewrite ^^ to ^ so that XOR is still accessible |
| 1033 | | elif line[i:i+2] == "^^" and not in_quote(): |
| 1034 | | line = line[:i] + "^" + line[i+2:] |
| 1035 | | i += 1 |
| 1036 | | continue |
| 1037 | | |
| 1038 | | # exponents can be either ^ or ** |
| 1039 | | elif line[i] == "^" and not in_quote(): |
| 1040 | | line = line[:i] + "**" + line[i+1:] |
| 1041 | | i += 2 |
| 1042 | | continue |
| 1043 | | |
| 1044 | | elif line[i] == "." and i > 0 and i < len(line)-1 and not in_quote() and \ |
| 1045 | | (isalphadigit_(line[i-1]) or line[i-1] == ")" or line[i-1] == ']') and line[i+1].isdigit(): |
| 1046 | | # Generators: replace all ".<number>" by ".gen(<number>)" |
| 1047 | | # If . is preceeded by \, then replace "\." by ".". |
| 1048 | | j = i+1 |
| 1049 | | while j < len(line) and line[j].isdigit(): |
| 1050 | | j += 1 |
| 1051 | | line = line[:i] + ".gen(" + line[i+1:j] + ")" + line[j:] |
| 1052 | | i = j+4 |
| 1053 | | |
| 1054 | | if not in_number and \ |
| 1055 | | not in_quote(): |
| | 1013 | if not in_quote(): |
| 1056 | 1014 | |
| 1057 | 1015 | if i < len(line)-1 and line[i] == '\\': |
| 1058 | 1016 | j = i+1 |
| … |
… |
|
| 1063 | 1021 | j += 1 |
| 1064 | 1022 | line = line[:i] + "._backslash_(" + line[i+1:j] + ')' + line[j:] |
| 1065 | 1023 | |
| 1066 | | elif (line[i].isdigit() or \ |
| 1067 | | (len(line)>i+1 and line[i] == '.' and line[i+1].isdigit())) and \ |
| 1068 | | (i == 0 or (i > 0 and not (isalphadigit_(line[i-1]) \ |
| 1069 | | or line[i-1] == ')'))): |
| 1070 | | in_number = True |
| 1071 | | num_start = i |
| 1072 | | |
| | 1024 | |
| 1073 | 1025 | # Decide if we hit a comment, so we're done. |
| 1074 | 1026 | if line[i] == '#' and not (in_single_quote or in_double_quote or in_triple_quote): |
| 1075 | 1027 | i = len(line) |
| 1076 | 1028 | break |
| 1077 | 1029 | |
| 1078 | 1030 | i += 1 |
| 1079 | | |
| 1080 | | if in_number: |
| 1081 | | line, _ = wrap_num(i, line, is_real, num_start) |
| 1082 | 1031 | |
| 1083 | 1032 | # Time command like in MAGMA: (commented out, since it's standard in IPython now) |
| 1084 | 1033 | L = line.lstrip() |