# HG changeset patch
# User hthomas@unb.ca
# Date 1375992301 10800
# Node ID b0c1206052e9e7b47ec6a0a30bd3d7a8c197dc46
# Parent 99d19ec6945f98dbeba083aafd2d5faf9c98acfd
Trac 14969: implement longest_common_subword (reviewer patch)
diff --git a/sage/combinat/words/finite_word.py b/sage/combinat/words/finite_word.py
a
|
b
|
|
3306 | 3306 | r""" |
3307 | 3307 | Returns a longest subword of ``self`` and ``other``. |
3308 | 3308 | |
| 3309 | A subword of a word is a subset of the word's letters, read in the |
| 3310 | order in which they appear in the word. |
| 3311 | |
3309 | 3312 | For more information, see |
3310 | 3313 | :wikipedia:`Longest_common_subsequence_problem`. |
3311 | 3314 | |
… |
… |
|
3315 | 3318 | |
3316 | 3319 | ALGORITHM: |
3317 | 3320 | |
3318 | | For any indices `i,j'`, we compute the longest common subword ``lcs[i,j]`` of |
3319 | | `self[:i]` and `other[:j]`. This can be easily obtained as the maximum |
| 3321 | For any indices `i,j`, we compute the longest common subword ``lcs[i,j]`` of |
| 3322 | `self[:i]` and `other[:j]`. This can be easily obtained as the longest |
3320 | 3323 | of |
3321 | 3324 | |
3322 | 3325 | - ``lcs[i-1,j]`` |
3323 | 3326 | |
3324 | 3327 | - ``lcs[i,j-1]`` |
3325 | 3328 | |
3326 | | - ``lcs[i-1,j-1]+1`` if ``self[i]==other[j]`` |
3327 | | |
3328 | | EXAMPLE:: |
| 3329 | - ``lcs[i-1,j-1]+self[i]`` if ``self[i]==other[j]`` |
| 3330 | |
| 3331 | EXAMPLES:: |
| 3332 | |
| 3333 | sage: v1 = Word("abc") |
| 3334 | sage: v2 = Word("ace") |
| 3335 | sage: v1.longest_common_subword(v2) |
| 3336 | word: ac |
3329 | 3337 | |
3330 | 3338 | sage: w1 = Word("1010101010101010101010101010101010101010") |
3331 | 3339 | sage: w2 = Word("0011001100110011001100110011001100110011") |
… |
… |
|
3349 | 3357 | |
3350 | 3358 | # In order to avoid storing lcs[i,j] for each pair i,j of indices, we |
3351 | 3359 | # only store the lcs[i,j] for two consecutive values of i. At any step |
3352 | | # of the algorithm, lcs[i,j] is stored at lcs[0][j] and lcs[-1,j] is |
| 3360 | # of the algorithm, lcs[i,j] is stored at lcs[0][j] and lcs[i-1,j] is |
3353 | 3361 | # stored at lcs[1][j] |
3354 | | l1 = self[0] |
3355 | 3362 | |
3356 | 3363 | # The weird +1 that follows exists to make sure that lcs[i,-1] returns |
3357 | | # the empty word |
| 3364 | # the empty word. |
3358 | 3365 | lcs = [[[] for i in range(len(w2)+1)] for j in range(2)] |
3359 | 3366 | |
3360 | 3367 | for i,l1 in enumerate(self): |
3361 | 3368 | for j,l2 in enumerate(other): |
3362 | | lcs[0][j] = max(lcs[0][j-1],lcs[1][j],lcs[1][j-1] + ([l1] if l1==l2 else []),key=len) |
| 3369 | lcs[0][j] = max(lcs[0][j-1], lcs[1][j], |
| 3370 | lcs[1][j-1] + ([l1] if l1==l2 else []),key=len) |
3363 | 3371 | |
3364 | 3372 | # Maintaining the meaning of lcs for the next loop |
3365 | 3373 | lcs.pop(1) |