Ticket #12342: hg_json

File hg_json, 9.8 KB (added by William Stein, 11 years ago)
Line 
1#!/usr/bin/env python
2"""
3Convert between a Mercurial 1.0 repository and a JSON file.
4
5AUTHORS:
6 
7    - Keshav Kini (Dec 2011)
8    - William Stein (Jan 2012)
9"""
10
11import os, sys
12
13if not os.environ.has_key('SAGE_ROOT'):
14    print "%s: run this using the Sage environment, e.g., type 'sage -sh'"%(sys.argv[0])
15    sys.exit(1)
16
17import struct, json, cStringIO, shutil
18from mercurial.changegroup import readexactly, readbundle, writebundle
19
20#
21# bundle --> JSON
22#
23
24def unpack_groups(fh):
25    """
26    A generator of parsed groups from a bundle file. Decompress the bundle
27    and discard the magic header before calling this function (i.e. call this
28    function with a "changegroup" as input). Expects bundle format HG10
29    (the bundle format introduced in Mercurial 1.0).
30    """
31    yield [chunk for chunk in unpack_chunks(fh)] # the changeloge group
32    yield [chunk for chunk in unpack_chunks(fh)] # the manifest group
33    while True:
34        length, = struct.unpack('>l', readexactly(fh, 4))
35        if length <= 4:
36            # found a "null meta chunk", which ends the changegroup
37            break
38        filename = readexactly(fh, length - 4).encode('string_escape')
39        yield (filename, [chunk for chunk in unpack_chunks(fh)]) # a file group
40
41def unpack_chunks(fh):
42    """
43    A generator of parsed chunks of a "group" in a bundle file. Place the
44    input head at the beginning of a group, and this function will yield
45    parsed chunks until the end of the group (a "null chunk").
46    """
47    while True:
48        length, = struct.unpack('>l', readexactly(fh, 4))
49        if length <= 4:
50            # found a "null chunk", which ends the group
51            break
52        if length < 84:
53            raise Exception("negative data length")
54        node, p1, p2, cs = struct.unpack( '20s20s20s20s',
55                readexactly(fh, 80) )
56        yield { 'node': node.encode('hex')
57              , 'p1': p1.encode('hex')
58              , 'p2': p2.encode('hex')
59              , 'cs': cs.encode('hex')
60              , 'data': [patch for patch in unpack_patches(fh, length - 84)] }
61
62def unpack_patches(fh, remaining):
63    """
64    A generator of patches from the data field in a chunk. As there is
65    no delimiter for this data field, we require a length argument.
66    """
67    while remaining >= 12:
68        start, end, blocklen = struct.unpack('>lll', readexactly(fh, 12))
69        remaining -= 12
70        if blocklen > remaining:
71            raise Exception("unexpected end of patch stream")
72        block = readexactly(fh, blocklen)
73        remaining -= blocklen
74        yield { 'start': start
75              , 'end': end
76              , 'blocklen': blocklen
77              , 'block': block.encode('string_escape')
78              }
79
80    if remaining > 0:
81        print remaining
82        raise Exception("unexpected end of patch stream")
83
84def to_json(ifilename, ofilename):
85    """
86    Given an HG10xx file (Mercurial 1.0 bundle) ``ifilename``, convert
87    it to JSON and dump it to ``ofilename``
88    """
89    if ifilename:
90        ifile = open(ifilename, 'rb')
91    else:
92        ifile = sys.stdin
93    fh = readbundle(ifile, ifilename)
94
95    oobj = [group for group in unpack_groups(fh)]
96    # this is unimplemented in Mercurial 1.8.4, unfortunately (?)
97    #fh.close()
98
99    if ofilename:
100        ofile = open(ofilename, 'w')
101    else:
102        ofile = sys.stdout
103    json.dump(oobj, ofile, indent=4)
104    ofile.close()
105
106#
107# JSON --> bundle
108#
109
110def pack_groups(cg_obj):
111    """
112    Given a JSON object ``cg_obj`` representing a changegroup, return the
113    changegroup.
114    """
115    return ''.join([ pack_chunks(cg_obj[0])   # the changelog group
116                   , pack_chunks(cg_obj[1]) ] # the manifest group
117                 + [ ''.join([ struct.pack('>l', len(pair[0].decode('string_escape')) + 4)
118                             , pair[0].decode('string_escape') # a filename
119                             , pack_chunks(pair[1]) ]) # a file group
120                   for pair in cg_obj[2:] ] # over all files
121                 + [ struct.pack('>l', 0) ]) # a null meta chunk to end files
122
123def pack_chunks(group_obj):
124    """
125    Given a JSON object ``group_obj`` representing a group, create the group
126    and write it to ``fh``
127    """
128    def chunk_gen():
129        for chunk in group_obj:
130            chunk['data'] = pack_patches(chunk['data'])
131            yield ''.join([ struct.pack('>l', len(chunk['data']) + 84)
132                          , chunk['node'].decode('string_escape').decode('hex')
133                          , chunk['p1'].decode('string_escape').decode('hex')
134                          , chunk['p2'].decode('string_escape').decode('hex')
135                          , chunk['cs'].decode('string_escape').decode('hex')
136                          , chunk['data'] ])
137    return ''.join([ packed_chunk for packed_chunk in chunk_gen() ]
138                 + [ struct.pack('>l', 0) ]) # a null chunk to end the group
139
140def pack_patches(data_obj):
141    """
142    Pack a given list of patch objects into a binary patch stream.
143    """
144    return ''.join([ ''.join([ struct.pack( '>lll'
145                                          , patch['start']
146                                          , patch['end']
147                                          , patch['blocklen'] )
148                             , patch['block'].decode('string_escape') ])
149                   for patch in data_obj ])
150
151def to_hg(ifilename, ofilename):
152    """
153    Given a JSON file produced by this script, at ``ifilename``, convert
154    it into an HG10UN file (Mercurial 1.0 uncompressed bundle) at
155    ``ofilename``.
156    """
157    if ifilename:
158        ifile = open(ifilename, 'r')
159    else:
160        ifile = sys.stdin
161    iobj = json.load(ifile, encoding='ascii')
162
163    writebundle(cStringIO.StringIO(pack_groups(iobj)), ofilename, 'HG10UN')
164
165def system(cmd):
166    """
167    Verbosely run the given line under the shell.  Raise a
168    RuntimeError if the command fails or is killed.
169    """
170    print cmd
171    if os.system(cmd):
172        raise RuntimeError, "error running command '%s'"%cmd
173
174def convert_hg_to_json(path, destructive=False):
175    """
176    Convert path/.hg to path/.hg.json
177
178    Do no error checking that the path contains .hg and not .hg.json;
179    that was assumed done by the convert function, which calls this.
180    """
181    print "Converting .hg repo to .hg.json plaintext file..."
182    bundle = os.path.join(path, '.hg_json.bundle')
183    try:
184        # 1. convert repository to a bundle
185        cmd = 'hg bundle -a -R "%s" "%s"'%(path, bundle)
186        system(cmd)
187        # 2. convert bundle to json dump
188        out_json = os.path.join(path, '.hg.json')
189        to_json(bundle, out_json)
190    finally:
191        # 3. Clean up no matter what
192        if os.path.exists(bundle):
193            os.remove(bundle)
194    if not os.path.exists(out_json):
195        raise RuntimeError, "something went wrong creating .hg.json"
196    print "created '%s'"%out_json
197    # 4. Destroy the .hg repository
198    hg_repo = os.path.join(path, '.hg')
199    if destructive and os.path.exists(hg_repo):
200        shutil.rmtree(hg_repo)
201
202def convert_json_to_hg(path, destructive=False):
203    """
204    Convert path/.hg.json to path/.hg
205
206    Do no error checking that the path contains .hg.json and not .hg;
207    that was assumed done by the convert function, which calls this.
208    """
209    print "Converting .hg.json plaintext file to .hg repo..."
210    json_file = os.path.join(path, '.hg.json')
211    bundle_file = os.path.join(path, '.hg.json.bundle')
212    try:
213        # 1. convert json to bundle
214        to_hg(json_file, bundle_file)
215        if not os.path.exists(bundle_file):
216            raise RuntimeError
217        # 2. convert bundle to .hg repo
218        system('hg init "%s"'%path)
219        system('hg pull -R "%s" "%s"'%(path, bundle_file))
220        system('hg update -R "%s"'%path)
221    finally:
222        # 3. Clean up
223        if os.path.exists(bundle_file):
224            os.remove(bundle_file)
225    print "created '%s'"%os.path.join(path, '.hg')
226    # 4. Destroy plain text file
227    if destructive and os.path.exists(json_file):
228        os.remove(json_file)
229
230def convert(path, destructive=False):
231    """
232    Do some error checking and decide whether we need to convert
233    from .hg to .hg.json or conversely, then call the appropriate
234    conversion function.
235    """
236    if not os.path.exists(path):
237        raise RuntimeError, "path ('%s') does not exist"%path
238    if not os.path.isdir(path):
239        raise RuntimeError, "path ('%s') must be a directory"%path
240    path_to_hg = os.path.join(path, '.hg')
241    path_to_json = os.path.join(path, '.hg.json')
242    hg_to_json = os.path.exists(path_to_hg)
243    json_to_hg = os.path.exists(path_to_json)
244    if hg_to_json and json_to_hg:
245        raise RuntimeError, "path ('%s') must contain either an .hg repo or .hg.json, but not both (right now it contains both)"%path
246    if not hg_to_json and not json_to_hg:
247        raise RuntimeError, "path ('%s') must contain an .hg repo or .hg.json file, but contains neither"%path
248    if hg_to_json:
249        convert_hg_to_json(path, destructive)
250    if json_to_hg:
251        convert_json_to_hg(path, destructive)
252   
253
254#
255# Command line usage
256#
257
258if __name__ == '__main__':
259    import optparse        # so works with python 2.6
260
261    parser = optparse.OptionParser(
262        usage = "Usage: %prog path",
263        description="Convert Mercurial repo (.hg directory) to .hg.json file or convert back.",
264        epilog=
265"""If path contain only an .hg repository, then .hg.json is created.
266If path contains only a file .hg.json, then the .hg repository is created.
267Any other situation is an error.  The default path is the current directory.
268 """)
269   
270    parser.add_option('--destructive', dest="destructive",
271                      help="(default: 'yes') if 'yes' deletes either .hg or .hg.json after creating .hg.json or .hg", default="yes")
272
273    (options, args) = parser.parse_args()
274    if len(args) !=1 :
275        parser.print_help()
276        sys.exit(1)
277
278    destructive =  options.destructive=='yes'   # yes, ==.
279    convert(args[0], destructive)