# HG changeset patch
# User Robert Bradshaw <robertwb@math.washington.edu>
# Date 1232792490 28800
# Node ID ccd5c3191de06204331bde8c09a287c3bec1fda8
# Parent 7b34a9ff87bff438a5f607a2d85680c84aa478cf
Preparser simplification
diff r 7b34a9ff87bf r ccd5c3191de0 sage/misc/preparser.py
a

b


450  450  '[_sage_const_1 .sqrt(), _sage_const_1p2 .sqrt(), 1 , 1.2 , R.1, R0.1, (_sage_const_1 .._sage_const_5 )]' 
451  451  """ 
452  452  return preparse_numeric_literals(code, True) 
453   
 453  
 454  all_num_regex = None 
 455  
454  456  def preparse_numeric_literals(code, extract=False): 
455  457  """ 
456  458  This preparses numerical literals into their sage counterparts, 
… 
… 

463  465  sage: from sage.misc.preparser import preparse_numeric_literals 
464  466  sage: preparse_numeric_literals("5") 
465  467  'Integer(5)' 
466   sage: preparse_numeric_literals("0x05f") 
467   'Integer(0x05f)' 
468  468  sage: preparse_numeric_literals("5j") 
469  469  "ComplexNumber(0, '5')" 
470  470  sage: preparse_numeric_literals("5jr") 
… 
… 

497  497  'Integer(1).exp()' 
498  498  sage: preparse_numeric_literals("1e+10") 
499  499  "RealNumber('1e+10')" 
 500  sage: preparse_numeric_literals("0x0af") 
 501  'Integer(0x0af)' 
 502  sage: preparse_numeric_literals("0x10.sqrt()") 
 503  'Integer(0x10).sqrt()' 
 504  sage: preparse_numeric_literals('0o100') 
 505  "Integer('100', 8)" 
 506  sage: preparse_numeric_literals('0b111001') 
 507  "Integer('111001', 2)" 
500  508  """ 
501  509  literals = {} 
502  510  last = 0 
503  511  new_code = [] 
504   dec_num = r"\b\d+" 
505   hex_num = r"\b0x[09af]+" 
506   # This is slightly annoying as floating point numbers may start 
507   # with a decimal point, but if they do the \b will not match. 
508   float_num = r"((\b\d+([.]\d*)?)([.]\d+))(e[+]?\d+)?" 
509   all_num = r"((%s)(%s)(%s))(rjrLjrLrjLr)\b" % (float_num, dec_num, hex_num) 
510   for m in re.finditer(all_num, code, re.I): 
 512  
 513  global all_num_regex 
 514  if all_num_regex is None: 
 515  dec_num = r"\b\d+" 
 516  hex_num = r"\b0x[09af]+" 
 517  oct_num = r"\b0o[07]+" 
 518  bin_num = r"\b0b[01]+" 
 519  # This is slightly annoying as floating point numbers may start 
 520  # with a decimal point, but if they do the \b will not match. 
 521  float_num = r"((\b\d+([.]\d*)?)([.]\d+))(e[+]?\d+)?" 
 522  all_num = r"((%s)(%s)(%s)(%s)(%s))(rjrLjrLrjLr)\b" % (float_num, dec_num, hex_num, oct_num, bin_num) 
 523  all_num_regex = re.compile(all_num, re.I) 
 524  
 525  for m in all_num_regex.finditer(code): 
511  526  start, end = m.start(), m.end() 
512  527  num = m.group(1) 
513  528  postfix = m.groups()[1].upper() 
… 
… 

533  548  # handle 4.sqrt() 
534  549  end = 1 
535  550  num = num[:1] 
536   elif end < len(code) and code[end] == '.' and not postfix: 
537   # \b does not match after the . 
 551  elif end < len(code) and code[end] == '.' and not postfix and re.match(r'\d+$', num): 
 552  # \b does not match after the . for floating point 
538  553  # two dots in a row would be an ellipsis 
539  554  if end+1 == len(code) or code[end+1] != '.': 
540  555  end += 1 
… 
… 

549  564  num_make = "RealNumber('%s')" % num 
550  565  else: 
551  566  num_name = numeric_literal_prefix + num 
552   num_make = "Integer(%s)" % num 
 567  if len(num) > 3: 
 568  # Py3 oct and bin support 
 569  if num[1] in 'bB': 
 570  num_make = "Integer('%s', 2)" % num[2:] 
 571  elif num[1] in 'oO': 
 572  num_make = "Integer('%s', 8)" % num[2:] 
 573  else: 
 574  num_make = "Integer(%s)" % num 
 575  else: 
 576  num_make = "Integer(%s)" % num 
553  577  
554  578  literals[num_name] = num_make 
555  579  
… 
… 

772  796  # Then can also handle multiple lines more efficiently, but 
773  797  # that optimization can be done later. 
774  798  L, literals, quote_state = strip_string_literals(line, quote_state) 
 799  
 800  # Ellipsis Range 
 801  # [1..n] 
775  802  try: 
776  803  L = parse_ellipsis(L, preparse_step=False) 
777  804  except SyntaxError: 
778  805  pass 
 806  
 807  # Implicit Multiplication 
 808  # 2x > 2*x 
779  809  if implicit_mul_level: 
780  810  L = implicit_mul(L, level = implicit_mul_level) 
 811  
 812  # Wrapping 
 813  # 1 + 0.5 > Integer(1) + RealNumber('0.5') 
781  814  L = preparse_numeric_literals(L) 
 815  
 816  # Generators 
 817  # R.0 > R.gen(0) 
 818  L = re.sub(r'([_azAZ]\w*[)\]])\.(\d+)', r'\1.gen(\2)', L) 
 819  
 820  # Use ^ for exponentiation and ^^ for xor 
 821  # (A side effect is that **** becomes xor as well.) 
 822  L = L.replace('^', '**').replace('****', '^') 
 823  
782  824  line = L % literals 
783  825  
784  826  
… 
… 

800  842  i = line.find('...') 
801  843  return line[:i+3] + preparse(line[i+3:], reset=reset, do_time=do_time, ignore_prompts=ignore_prompts) 
802  844  
803   # Wrap integers with ZZ() and reals with RR(). 
804   def wrap_num(i, line, is_real, num_start): 
805   zz = line[num_start:i] 
806   if is_real or '.' in zz: 
807   if zz[1] == '.' and i < len(line) and line[i].isalpha(): 
808   # by popular demand  this allows, e.g., 173.sqrt(). 
809   if '.' in zz[:1]: 
810   O = "RealNumber('"; C="')." 
811   else: 
812   O = "Integer("; C = ")." 
813   zz = zz[:1] 
814   else: 
815   O = "RealNumber('"; C="')" 
816   else: 
817   O = "Integer("; C = ")" 
818   # Number wrapping handled earlier 
819   if False: 
820   line = line[:num_start] + O + zz + C + line[i:] 
821   return line, len(O+C) 
822   else: 
823   return line, 0 
824   
825  845  i = 0 
826   num_start = 1 
827   in_number = False 
828   is_real = False 
829   is_hex = False 
830   
831  846  in_args = False 
832  847  
833  848  if reset: 
… 
… 

876  891  i += 1 
877  892  continue 
878  893  
879   # Decide if we should wrap a particular integer or real literal 
880   if in_number: 
881   if line[i] == ".": 
882   is_real = True 
883   elif not is_real and i == num_start+1 and line[num_start:i+1].lower() == '0x': 
884   is_hex = True 
885   elif not (line[i].isdigit() or (is_hex and line[i].lower() in 'abcdef')): 
886   # end of a number 
887   # Do we wrap? 
888   if in_quote(): 
889   # do not wrap 
890   pass 
891   elif i < len(line) and line[i] == 'x' and line[i1] == '0' and num_start==i1: 
892   # Yes, hex constant. 
893   i += 1 
894   continue 
895   elif i < len(line) and line[i] in 'eE': 
896   # Yes, in scientific notation, so will wrap later 
897   is_real = True 
898   i += 1 
899   if i < len(line) and line[i] == '': 
900   i += 2 
901   continue 
902   elif i < len(line) and line[i] in 'rR': 
903   # Raw number so do not wrap; but have to get rid of the "r". 
904   line = line[:i] + line[i+1:] 
905   else: 
906   line, n = wrap_num(i, line, is_real, num_start) 
907   i += n 
908   in_number = False 
909   is_real = False 
910   continue 
911   
912   elif line[i] == ";" and not in_quote(): 
 894  if line[i] == ";" and not in_quote(): 
913  895  line = line[:i+1] + preparse(line[i+1:], reset, do_time, ignore_prompts, after_semicolon=True) 
914  896  i = len(line) 
915  897  continue 
… 
… 

1028  1010  # 
1029  1011  ####### END CALCULUS ######## 
1030  1012  
1031   # Since we use ^ for exponentiation (see below), we 
1032   # rewrite ^^ to ^ so that XOR is still accessible 
1033   elif line[i:i+2] == "^^" and not in_quote(): 
1034   line = line[:i] + "^" + line[i+2:] 
1035   i += 1 
1036   continue 
1037   
1038   # exponents can be either ^ or ** 
1039   elif line[i] == "^" and not in_quote(): 
1040   line = line[:i] + "**" + line[i+1:] 
1041   i += 2 
1042   continue 
1043   
1044   elif line[i] == "." and i > 0 and i < len(line)1 and not in_quote() and \ 
1045   (isalphadigit_(line[i1]) or line[i1] == ")" or line[i1] == ']') and line[i+1].isdigit(): 
1046   # Generators: replace all ".<number>" by ".gen(<number>)" 
1047   # If . is preceeded by \, then replace "\." by ".". 
1048   j = i+1 
1049   while j < len(line) and line[j].isdigit(): 
1050   j += 1 
1051   line = line[:i] + ".gen(" + line[i+1:j] + ")" + line[j:] 
1052   i = j+4 
1053   
1054   if not in_number and \ 
1055   not in_quote(): 
 1013  if not in_quote(): 
1056  1014  
1057  1015  if i < len(line)1 and line[i] == '\\': 
1058  1016  j = i+1 
… 
… 

1063  1021  j += 1 
1064  1022  line = line[:i] + "._backslash_(" + line[i+1:j] + ')' + line[j:] 
1065  1023  
1066   elif (line[i].isdigit() or \ 
1067   (len(line)>i+1 and line[i] == '.' and line[i+1].isdigit())) and \ 
1068   (i == 0 or (i > 0 and not (isalphadigit_(line[i1]) \ 
1069   or line[i1] == ')'))): 
1070   in_number = True 
1071   num_start = i 
1072   
 1024  
1073  1025  # Decide if we hit a comment, so we're done. 
1074  1026  if line[i] == '#' and not (in_single_quote or in_double_quote or in_triple_quote): 
1075  1027  i = len(line) 
1076  1028  break 
1077  1029  
1078  1030  i += 1 
1079   
1080   if in_number: 
1081   line, _ = wrap_num(i, line, is_real, num_start) 
1082  1031  
1083  1032  # Time command like in MAGMA: (commented out, since it's standard in IPython now) 
1084  1033  L = line.lstrip() 