Wagner Fischer algorithm + display steps - java
I made an implementation of Wagner Fischer algorithm in java with input cost, but I want to display all steps.
I search but can't find any idea.After a long time I tried to keep each transformation in matrix alongside cost and to go through back to first solution then reverse it... is this a good idea, if it is, how should I set condition?
kitten -> sitting
1.replace k with s
2.keep i
3.keep t
4.keep t
5.replace t
6.add g
I tried to make function for display steps and can't figure out how to solve it.
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Scanner;
public class Principal {
static int c1, c2, c3;
static String word1, word2;
public static void main(String[] args) throws FileNotFoundException {
Scanner data_in = new Scanner(new File("data.in"));
word1 = data_in.next();
word2 = data_in.next();
c1 = data_in.nextInt();
c2 = data_in.nextInt();
c3 = data_in.nextInt();
System.out.printf("\nInsert: %s, Delete: %s, Replace: %s\n", c1, c2, c3);
System.out.printf("\nLevenstheinDistance: %s", LevenshteinDistance(word1, word2, c1, c2, c3));
}
private static int LevenshteinDistance(String str1, String str2, int InsCost, int DelCost, int ReplCost)
{
if(word1.length() == 0)
return InsCost*str1.length();
if(word2.length() == 0)
return DelCost*str2.length();
int substitutionCost = ReplCost;
if(ReplCost > InsCost + DelCost)
ReplCost = InsCost + DelCost;
Solution[][] ManageSol = new Solution[str1.length()+1][str2.length()+1];
for(int i = 0; i <= str1.length(); i++)
{
for(int j = 0; j <= str2.length(); j++){
ManageSol[i][j] = new Solution();
}
}
System.out.printf("\nLungime str1: %s", str1.length());
System.out.printf("\nLungime str2: %s", str2.length());
for(int i = 0; i <= str1.length(); i++)
{
for (int j = 0; j <= str2.length(); j++)
{
if (i == 0)
ManageSol[i][j].solution = j;
else if (j == 0)
ManageSol[i][j].solution = i;
else if (str1.charAt(i - 1) == str2.charAt(j - 1))
{
substitutionCost = 0;
ManageSol[i][j].ecqualTo(minimum(
ManageSol[i][j - 1].solution + InsCost,
ManageSol[i - 1][j].solution + DelCost,
ManageSol[i - 1][j - 1].solution + substitutionCost));
System.out.printf("\nManagerSol[%s, %s]: ch1: %s, ch2: %s", i, j, str1.charAt(i - 1), str2.charAt(j - 1));
}
else
{
substitutionCost = 1;
ManageSol[i][j].ecqualTo(minimum(
ManageSol[i][j - 1].solution + InsCost,
ManageSol[i - 1][j].solution + DelCost,
ManageSol[i - 1][j - 1].solution + substitutionCost));
System.out.printf("\nManagerSol[%s, %s]: ch1: %s, ch2: %s", i, j, str1.charAt(i - 1), str2.charAt(j - 1));
}
}
}
System.out.printf("\n");
path(ManageSol, str1.length(), str2.length(), str1, str2);
System.out.printf("\n");
for(int i = 0; i <= str1.length(); i++)
{
for (int j = 0; j <= str2.length(); j++)
{
System.out.printf("[%s,%s]:(%s,%s) ", i, j, ManageSol[i][j].solution, ManageSol[i][j].operation);
}
System.out.printf("\n");
}
return ManageSol[str1.length()][str2.length()].solution;
}
static int minimum(int x, int y)
{
if(x >= y)
return x;
return y;
}
static Solution minimum(int Ins, int Del, int Replace)
{
Solution solution = null;
if(Ins <= Del && Ins <= Replace)
{
solution = new Solution();
solution.operation = 1;
solution.solution = Ins;
return solution;
}
else if(Del <= Ins && Del <= Replace)
{
solution = new Solution();
solution.operation = 2;
solution.solution = Del;
return solution;
}
else
{
solution = new Solution();
solution.solution = Replace;
solution.operation = 0;
return solution;
}
}
//my failure, function that should display steps
static void path(Solution[][] ManagerSol, int i, int j, String str1, String str2)
{
if(ManagerSol[i][j].operation == 0)
{
System.out.printf("\nReplace %s -> %s", str1.charAt(i-1), str2.charAt(j-1));
if(i > 1 && j > 1)
path(ManagerSol, i-1,j-1, str1, str2);
}
if(ManagerSol[i][j].operation == 1)
{
System.out.printf("\nAdd %s on position %s", str2.charAt(j-1), i-1);
if(j > 1)
path(ManagerSol, i,j-1, str1, str2);
}
if(ManagerSol[i][j].operation == 2)
{
System.out.printf("\nDelete %s", str1.charAt(i-1));
if(j > 1)
path(ManagerSol, i-1,j, str1, str2);
}
}
}
Output for kitten to sitting:
[0,0]:(0,3) [0,1]:(1,3) [0,2]:(2,3) [0,3]:(3,3) [0,4]:(4,3) [0,5]:(5,3) [0,6]:(6,3) [0,7]:(7,3)
[1,0]:(1,3) [1,1]:(1,0) [1,2]:(2,1) [1,3]:(3,1) [1,4]:(4,1) [1,5]:(5,1) [1,6]:(6,1) [1,7]:(7,1)
[2,0]:(2,3) [2,1]:(2,2) [2,2]:(1,0) [2,3]:(2,1) [2,4]:(3,1) [2,5]:(4,1) [2,6]:(5,1) [2,7]:(6,1)
[3,0]:(3,3) [3,1]:(3,2) [3,2]:(2,2) [3,3]:(1,0) [3,4]:(2,1) [3,5]:(3,1) [3,6]:(4,1) [3,7]:(5,1)
[4,0]:(4,3) [4,1]:(4,2) [4,2]:(3,2) [4,3]:(2,2) [4,4]:(1,0) [4,5]:(2,1) [4,6]:(3,1) [4,7]:(4,1)
[5,0]:(5,3) [5,1]:(5,2) [5,2]:(4,2) [5,3]:(3,2) [5,4]:(2,2) [5,5]:(2,0) [5,6]:(3,1) [5,7]:(4,1)
[6,0]:(6,3) [6,1]:(6,2) [6,2]:(5,2) [6,3]:(4,2) [6,4]:(3,2) [6,5]:(3,2) [6,6]:(2,0) [6,7]:(3,1)
In general your idea is correct. It's even simpler than that. You don't need to store any additional information.
You can go backwards (starting from the end of the given strings) and use your dynamic programming values in the following way:
If one of the indices is 0, there is only one way to go.
Otherwise, you can look at 3 possible transitions "backwards" (from (i, j) to (i - 1, j - 1), (i - 1, j) and (i, j - 1)) and choose the one which yields the actual value for (i, j). If there are several possible options, you can choose any of them.
Once you know where to go from the given pair of positions, the operation is uniquely determined.
I'm not versed in Java but here is an illustration in JavaScript:
var a = 'kitten',
b = 'sitting';
var m = new Array(a.length + 1);
for (var i=0; i<m.length; i++){
m[i] = new Array(b.length + 1);
for (var j=0; j<m[i].length; j++){
if (i === 0) m[i][j] = j;
if (j === 0) m[i][j] = i;
}
}
for (var i=1; i<=a.length; i++){
for (var j=1; j<=b.length; j++){
// no change needed
if (a[i - 1] === b[j - 1]){
m[i][j] = m[i - 1][j - 1];
// choose deletion or insertion
} else {
m[i][j] = Math.min(m[i - 1][j], m[i][j - 1], m[i - 1][j - 1]) + 1;
}
}
}
console.log('a: ' + JSON.stringify(a));
console.log('b: ' + JSON.stringify(b));
var i = a.length,
j = b.length,
steps = '';
while (i !== 0 && j !== 0){
if (a[i - 1] === b[j - 1]){
steps = 'no change; ' + steps;
i--;
j--;
} else if (m[i - 1][j] < m[i][j - 1]){
steps = 'delete \'' + a[i - 1] + '\'; ' + steps;
i--;
} else if (m[i - 1][j] === m[i][j - 1]){
steps = 'replace \'' + a[i - 1] + '\' with \'' + b[j - 1] + '\'; ' + steps;
i--;
j--;
} else {
steps = 'insert \'' + b[j - 1] + '\'; ' + steps;
j--;
}
}
if (i === 0 && j > 0){
steps = 'insert first ' + j + ' elements from b; ' + steps;
} else if (j === 0 && i > 0){
steps = 'delete first ' + i + ' elements from a; ' + steps;
}
console.log('\n' + steps[0].toUpperCase() + steps.substr(1));
console.log('\nMatrix:\n');
for (var i in m){
console.log(JSON.stringify(m[i]));
}
Related
Longest Palindromic Substring on Java (leetcode)
In leetcode I tried to solve the "Longest Palindromic Substring" task. Here is the code: public String longestPalindrome(String s) { String str = ""; for(int i = 0; i < s.length(); i++) { for(int j = 1 + i; j < s.length() + 1; j++) { String sub = s.substring(i, j); if(isPalindrome(sub) && sub.length() > str.length()) { str = s.substring(i, j); } } } return str; } public static boolean isPalindrome(String s) { if(s.length() < 2) return true; else for(int i = 0; i < s.length() / 2; i++) { if(!(s.charAt(i) == s.charAt(s.length() - 1 - i))) return false; } return true; } It works when I run it in Eclipse, but when I want to submit the solution to leetcode, it shows me an error: Submission Result: Time Limit Exceeded Last executed input: "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee... Can you tell me, what's my problem?
your code is taking too much time for leetcode for(int j = 1 + i; j < s.length() + 1; j++){ String sub = s.substring(i, j); if(isPalindrome(sub) && sub.length() > str.length()){ str = s.substring(i, j); } } in this loop you call 2 times s.substring(i, j); you can start by calling it 1 time for(int j = 1 + i; j < s.length() + 1; j++){ String sub = s.substring(i, j); if(isPalindrome(sub) && sub.length() > str.length()){ str = sub; } } then you can search on internet : https://www.geeksforgeeks.org/longest-palindrome-substring-set-1/ you have 2 methods brute force and optimize
You're answer is taking an insane amount of time to run. Instead of the brute force approach try to optimize it. This is a better solution if you've trouble dealing with the dynamic approach: class Solution { public String longestPalindrome(String s) { if(s == null || s.length() < 1) return ""; int start = 0; int end = 0; for(int i=0; i<s.length(); i++) { int l1 = fromMiddle(s,i,i); int l2 = fromMiddle(s,i,i+1); int l = Math.max(l1,l2); if(l > end - start) { start = i - ((l-1)/2); end = i + (l/2); } } return s.substring(start, end+1); } public int fromMiddle(String s, int left, int right) { if(s == null || left > right) return 0; while(left >= 0 && right < s.length() && s.charAt(left) == s.charAt(right)) { left--; right++; } return right-left-1; } }
I need a mutable version of BigInteger, but I can't access mutablebiginteger
I'm doing an article right now about code I wrote. I'm calculating values that go past the limit of the variable long and I also need to compare this values, so I'm using BigInteger instead. The thing is, BigInteger is immutable so every time a new calculation happen, my program runs slower and slower cause a new BigInteger is being added in an array or something. After some research I learned about MutableBigInteger and tried to use it, but I can't import this class because its private from java.util.Math. I found a topic where some one was using MutableBigInteger Performance of MutableBigInteger The problem is, I couldn't understand much of his code, thus I don't know how to use an mutable version of BigInteger. I read a little about BitSet as well but I don't think it would help me in this scenario. Imagine a clock with n numbers >0. You need to "cut" this clock in 2 (imagine a clock with 15 numbers, I'll put a risk in the number 1 and another on the number 7) now I need to check if the sum of numbers between 1 and 7 are equal to the sum between 7 and 1 (7 till n then this result 'till cut 1) Here's my code: public class LinePuzzle { private static int tam; private static int cont = 0; private static int t1parte, t2parte, t3parte, t4parte, ajuda, ajuda2; static int geraCortes(int tam) { Thread tt1 = new Thread(t1); Thread tt2 = new Thread(t2); Thread tt3 = new Thread(t3); Thread tt4 = new Thread(t4); if(tam % 2 == 0){ ajuda = tam/4; t1parte = ajuda; t2parte = ajuda*2; t3parte = ajuda*3; t4parte = ajuda*4; }else{ ajuda2 = tam%4; ajuda = (tam-ajuda2)/4; t1parte = ajuda; t2parte = (ajuda*2); t3parte = (ajuda*3); t4parte = (ajuda*4) + ajuda2; } // "Starts" tt1.start(); tt2.start(); tt3.start(); tt4.start(); try { // tt1.join(); tt2.join(); tt3.join(); tt4.join(); } catch (Exception ex) { System.out.println("Finalizado"); } return cont; } private static Runnable t1 = new Runnable() { #Override public void run() { long soma1, soma2; for (int i = 0; i<= t1parte; i++) { // 1º cut for (int j = i + 1; j <= tam; j++) { // 2º cut if (i == j || i == j - 1) { // tests continue; } soma1 = (((i + 1) + (j - 1)) * (j - i - 1)) / 2; soma2 = (((j + 1) + tam) * (tam - j) + (1 + (i - 1)) * (i - 1)) / 2; if (soma1 == soma2 && soma1 != 0) { BigInteger bi, bi2; bi = BigInteger.valueOf(soma1); bi2 = BigInteger.valueOf(soma2); if(bi.equals(bi2)){ System.out.printf("Equals: cut1 = %d and cut2 = %d -> result: %s / %s\n", i, j, bi, bi2); cont++; break; } } } } } }; private static Runnable t2 = new Runnable() { #Override public void run() { long soma1, soma2; for (int i = t1parte; i<= t2parte; i++) { for (int j = i + 1; j <= tam; j++) { if (i == j || i == j - 1) { continue; } soma1 = (((i + 1) + (j - 1)) * (j - i - 1)) / 2; soma2 = (((j + 1) + tam) * (tam - j) + (1 + (i - 1)) * (i - 1)) / 2; if (soma1 == soma2 && soma1 != 0) { BigInteger bi, bi2; bi = BigInteger.valueOf(soma1); bi2 = BigInteger.valueOf(soma2); if(bi.equals(bi2)){ System.out.printf("Equals: cut1 = %d and cut2 = %d -> result: %s / %s\n", i, j, bi, bi2); cont++; break; } } } } } }; private static Runnable t3 = new Runnable() { #Override public void run() { long soma1, soma2; for (int i = t2parte; i<= t3parte; i++) { // 1º corte for (int j = i + 1; j <= tam; j++) { // 2º corte if (i == j || i == j - 1) { continue; } soma1 = (((i + 1) + (j - 1)) * (j - i - 1)) / 2; soma2 = (((j + 1) + tam) * (tam - j) + (1 + (i - 1)) * (i - 1)) / 2; if (soma1 == soma2 && soma1 != 0) { BigInteger bi, bi2; bi = BigInteger.valueOf(soma1); bi2 = BigInteger.valueOf(soma2); if(bi.equals(bi2)){ System.out.printf("Equals: cut1 = %d and cut2 = %d -> result: %s / %s\n", i, j, bi, bi2); cont++; break; } } } } } }; private static Runnable t4 = new Runnable() { #Override public void run() { long soma1, soma2; for (int i = t3parte; i<= t4parte; i++) { // 1º corte for (int j = i + 1; j <= tam; j++) { // 2º corte if (i == j || i == j - 1) { continue; } soma1 = (((i + 1) + (j - 1)) * (j - i - 1)) / 2; soma2 = (((j + 1) + tam) * (tam - j) + (1 + (i - 1)) * (i - 1)) / 2; if (soma1 == soma2 && soma1 != 0) { BigInteger bi, bi2; bi = BigInteger.valueOf(soma1); bi2 = BigInteger.valueOf(soma2); if(bi.equals(bi2)){ System.out.printf("Equals: cut1 = %d and cut2 = %d -> result: %s / %s\n", i, j, bi, bi2); cont++; break; } } } } } }; public static void main(String[] args) { Scanner scan = new Scanner(System.in); do{ System.out.println("Clock length: "); tam = scan.nextInt(); }while(tam < 2); System.out.println("Numbers of equals: " + geraCortes(tam)); // executar o geracortes e os threads } }
You need to understand how to use BigInteger to make calculations. Below are examples with two of your calculations (assuming i, j, tam are longs): //(((i + 1) + (j - 1)) * (j - i - 1)) / 2; BigInteger bSoma1 = bigI.add(BigInteger.ONE) .add(bigJ.subtract(BigInteger.ONE)) .multiply(bigJ.subtract(bigI).subtract(BigInteger.ONE)) .divide(BigInteger.valueOf(2)); //(((j + 1) + tam) * (tam - j) + (1 + (i - 1)) * (i - 1)) / 2; BigInteger bSoma2 = bigJ.add(BigInteger.ONE).add(bigTam) .multiply(bigTam.subtract(bigJ)) .add(BigInteger.ONE.add(bigI.subtract(BigInteger.ONE))) .multiply(bigI.subtract(BigInteger.ONE)) .divide(BigInteger.valueOf(2));
problems trying to convert c sharp code into java
i'm try find most similar string in a array, and i found a code in c sharp that is this one public static int LevenshteinDistance(string s, string t) { int n = s.Length; int m = t.Length; int[,] d = new int[n + 1, m + 1]; if (n == 0) { return m; } if (m == 0) { return n; } for (int i = 1; i <= n; i++) { for (int j = 1; j <= m; j++) { int cost = (t[j - 1] == s[i - 1]) ? 0 : 1; Console.WriteLine(cost); d[i, j] = Math.Min( Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost); } } return d[n, m]; } and i'm trying to convert it into java but i get 1 error this is my code in java public static int LevenshteinDistance(String s, String t) { int n = s.length(); int m = t.length(); int[][] d = new int[n + 1][ m + 1]; if (n == 0) { return m; } if (m == 0) { return n; } for (int i = 1; i <= n; i++) { for (int j = 1; j <= m; j++) { int cost = (t[j - 1] == s[i - 1])? 0 : 1; d[i][ j] = Math.min( Math.min(d[i - 1][ j] + 1, d[i][ j - 1] + 1), d[i - 1][ j - 1]+cost ); } } return d[n] [m]; } i get the error in this line of code int cost = (t[j - 1] == s[i - 1]) ? 0 : 1; the error that i have is "Array is required,but string found" this is what i have in my main String []ray ={"food","drinks","stuffs"}; String fa="drink"; for (int i = 0; i < ray.length; i++) { System.out.print(LevenshteinDistance(fa, ray[i])); } i would appreciate any help
Use t.charAt(j-1) == s.charAt(i-1) as to access characters (letters) in string You cannot access them directly via index (brackets []). int cost = (t.charAt(j - 1) == s.charAt(i - 1))? 0 : 1;
You are accessing the strings as arrays here with the [] array operator: t[j - 1] == s[i - 1] to get the nth char of a string, instead use .charAt(n) so in this case change it to: t.charAt(j - 1) == s.charAt(i - 1) the same applies to the rest of the code.
longest common subsequence printdDiff
Just a quick question about the longest Common subsequence algorithm. I have done the part where you need to generate the subsequence as follow: public int[][] lcsLength(char[] input1, char[] input2) { int[][] opt = new int[M][N]; for (int i = 1; i < input1.length; i++) { for (int j = 1; j < input2.length; j++) { if (input1[i] == input2[j]) { opt[i][j] = opt[i - 1][j - 1] + 1; } else { opt[i][j] = Math.max(opt[i][j - 1], opt[i - 1][j]); } } } return opt; } and the printDiff function as follow: private static void printDiff(int[][] opt,String x,String y,int i, int j) { if(i>0 &&j>0 && x.charAt(i-1)==y.charAt(j-1)){ printDiff(i-1,j-1); System.out.println(x.charAt(i-1)); } else{ if(j>0&&(i==0||opt[i][j-1]>=opt[i-1][j])){ printDiff(i-1,j-1); System.out.println("-"+y.charAt(j-1)); } else if(i>0&&(j==0|| opt[i][j-1]<=opt[i-1][j])){ printDiff(i-1,j-1); System.out.println(x.charAt(i-1)); } } } And then if I use this as parameters: String input1="ABCDE" String input2="ACDC" int i=input1.length() int j=input2.length() after generating the opt matrix with lcsLength() I wish that printdiff woul give me : ABCDE- A-CD-C but instead I get: ABCDE- ABCD-C any ideas on what I did wrong would help me a lot Thanks Laurent
From wiki: function printDiff(C[0..m,0..n], X[1..m], Y[1..n], i, j) if i > 0 and j > 0 and X[i] = Y[j] printDiff(C, X, Y, i-1, j-1) print " " + X[i] else if j > 0 and (i = 0 or C[i,j-1] ≥ C[i-1,j]) printDiff(C, X, Y, i, j-1) print "+ " + Y[j] else if i > 0 and (j = 0 or C[i,j-1] < C[i-1,j]) printDiff(C, X, Y, i-1, j) print "- " + X[i] else print "" This line: else if(i>0&&(j==0|| opt[i][j-1]<=opt[i-1][j])){ Should be: else if(i>0&&(j==0|| opt[i][j-1]<opt[i-1][j])){ (change <= to just <)
Don't know if it's a related issue, but I think your LCS code should be: public int[][] lcsLength(char[] input1, char[] input2) { int[][] opt = new int[input1.length+1][input2.length+1]; for (int i = 1; i <= input1.length; i++) { for (int j = 1; j <= input2.length; j++) { if (input1[i-1] == input2[j-1]) { opt[i][j] = opt[i - 1][j - 1] + 1; } else { opt[i][j] = Math.max(opt[i][j - 1], opt[i - 1][j]); } } } return opt; }
Longest common subsequence difference
I have a program that I am writing in Java and have to do 2 things, find the longest common sub-sequence and align the common characters. The LCS works just fine but the align part just loops away or do nothing. I try to do this algorithm which I found on Wikipedia function printDiff(C[0..m,0..n], X[1..m], Y[1..n], i, j) if i > 0 and j > 0 and X[i] = Y[j] printDiff(C, X, Y, i-1, j-1) print " " + X[i] else if j > 0 and (i = 0 or C[i,j-1] ≥ C[i-1,j]) printDiff(C, X, Y, i, j-1) print "+ " + Y[j] else if i > 0 and (j = 0 or C[i,j-1] < C[i-1,j]) printDiff(C, X, Y, i-1, j) print "- " + X[i] else print "" Here is the code I wrote (I removed the LCS part) static char[] input1 = "ABCDE".toCharArray(); static char[] input2 = "ACDC".toCharArray(); static int M = input1.length; static int N = input2.length; static int[][] opt = new int[M + 1][N + 1]; public static void printDiff(int opt[][], char input1[], char input2[]) { int i = 0, j = 0; while (i < input1.length && j < input2.length) { if (i > 0 && j > 0 && input1[i] == input2[j]) { System.out.print(" " + input1[i]); i++; j++; } else if (j > 0 && (i == 0 || opt[i][j - 1] >= opt[i - 1][j])) { System.out.print("+ " + input2[j]); j++; } else if (i > 0 && (j == 0 || opt[i][j - 1] < opt[i - 1][j])) { System.out.print("- " + input1[i]); i++; } else { System.out.print(""); } } }
I rewrote your code to use the Wikipedia algorithm. In other words, I used recursion rather than a where clause. I had to change one of the if conditions because Java is zero index based and the Wikipedia algorithm is one index based. I had to add the LCS function back in so that I could calculate the int[][]opt. I added parenthesis to the if statements to make sure that the operations were done in the order I wanted them done. I also fixed the output. The Wikipedia algorithm had "+ " and "- " as output. That appears to be a typo. The output should be " +" and " -", respectively. Here's my version of the code. public class PrintDiff { char[] input1 = "ABCDE".toCharArray(); char[] input2 = "ACDC".toCharArray(); int M = input1.length; int N = input2.length; public void run() { int[][] opt = lcsLength(input1, input2); printDiff(opt, input1, input2, M - 1, N - 1); } public int[][] lcsLength(char[] input1, char[] input2) { int[][] opt = new int[M][N]; for (int i = 1; i < input1.length; i++) { for (int j = 1; j < input2.length; j++) { if (input1[i] == input2[j]) { opt[i][j] = opt[i - 1][j - 1] + 1; } else { opt[i][j] = Math.max(opt[i][j - 1], opt[i - 1][j]); } } } return opt; } public void printDiff(int opt[][], char input1[], char input2[], int i, int j) { if ((i >= 0) && (j >= 0) && (input1[i] == input2[j])) { printDiff(opt, input1, input2, i - 1, j - 1); System.out.print(" " + input1[i]); } else if ((j > 0) && ((i == 0) || (opt[i][j - 1] >= opt[i - 1][j]))) { printDiff(opt, input1, input2, i, j - 1); System.out.print(" +" + input2[j]); } else if ((i > 0) && ((j == 0) || (opt[i][j - 1] < opt[i - 1][j]))) { printDiff(opt, input1, input2, i - 1, j); System.out.print(" -" + input1[i]); } else { System.out.print(""); } } public static void main(String[] args) { new PrintDiff().run(); } } And here's my output. A -B C D -E +C
Here is a version which returns the diffs of all the longest common subsequences (basically backtracks using the cached table - similar to the approach taken to get to all longest common subsequences in All Longest Common Subsequences) (or, you may refer to my blog#: http://codingworkout.blogspot.com/2014/07/longest-common-subsequence.html) for ex, for GAC and AGCAT, it returns => { { "[G][A]C", "[G]A[C]", "G[A][C]" }, {"A[G]C[A]T", "A[G][C]AT", "[A]G[C]AT"} where GA, GC and AC are longest common subsequences... string[][] GetDiffs(string A, string B, int aIndex, int bIndex, int[][] DP_LCS_AllPrefixes_Cache) { if((aIndex == 0) && (bIndex ==0)) { return null; } if (DP_LCS_AllPrefixes_Cache[aIndex][bIndex] == 0) { var r = new string[2][]; r[0] = new string[] { A.Substring(0, aIndex) }; r[1] = new string[] { B.Substring(0, bIndex) }; return r; } if (A[aIndex - 1] == B[bIndex - 1]) { var r = this.GetDiffs(A, B, aIndex - 1, bIndex - 1, DP_LCS_AllPrefixes_Cache); string ch = string.Format("[{0}]", A[aIndex - 1]); if (r == null) { r = new string[2][]; r[0] = new string[] { ch }; r[1] = new string[] { ch }; } else { r[0] = r[0].Select(s => s + ch).ToArray(); r[1] = r[1].Select(s => s + ch).ToArray(); } return r; } int lcs_up_direction = DP_LCS_AllPrefixes_Cache[aIndex - 1][bIndex]; int lcs_left_direction = DP_LCS_AllPrefixes_Cache[aIndex][bIndex - 1]; string[][] lcs_up = null, lcs_left = null; if (lcs_up_direction == lcs_left_direction) { lcs_up = this.GetDiffs(A, B, aIndex - 1, bIndex, DP_LCS_AllPrefixes_Cache); lcs_left = this.GetDiffs(A, B, aIndex, bIndex - 1, DP_LCS_AllPrefixes_Cache); } else if (lcs_up_direction > lcs_left_direction) { lcs_up = this.GetDiffs(A, B, aIndex - 1, bIndex, DP_LCS_AllPrefixes_Cache); } else { lcs_left = this.GetDiffs(A, B, aIndex, bIndex - 1, DP_LCS_AllPrefixes_Cache); } char a = A[aIndex - 1], b = B[bIndex - 1]; string[][] rl = new string[2][]; rl[0] = new string[0]; rl[1] = new string[0]; if(lcs_up != null) { //we moved upward, that is we accepted that they differ with 'A' at aIndex-1 (a) rl[0] = lcs_up[0].Select(s => s + a.ToString()).ToArray(); rl[1] = lcs_up[1]; } if (lcs_left != null) { //we moved left, that is we accepted that they differ with 'B' at bIndex-1 (b) rl[0] = rl[0].Union(lcs_left[0]).ToArray(); ; rl[1] = rl[1].Union(lcs_left[1].Select(s => s + b.ToString())).ToArray(); } return rl.ToArray(); } where the caller is string[][] GetDiffs(string A, string B, int[][] DP_LCS_AllPrefixes_Cache) { var r = this.GetDiffs(A, B, A.Length, B.Length, DP_LCS_AllPrefixes_Cache); return r; } And the DP method which captures LCS lengths to backtrack public int[][] LCS_OfAllPrefixes_Length(string A, string B) { A.ThrowIfNullOrWhiteSpace("a"); B.ThrowIfNullOrWhiteSpace("b"); int[][] DP_LCS_AllPrefixes_Cache = new int[A.Length+1][]; for(int i = 0;i<DP_LCS_AllPrefixes_Cache.Length; i++) { DP_LCS_AllPrefixes_Cache[i] = new int[B.Length + 1]; } for (int rowIndexOfCache = 1; rowIndexOfCache <= A.Length; rowIndexOfCache++) { for (int columnIndexOfCache = 1; columnIndexOfCache <= B.Length; columnIndexOfCache++) { //LCS(Ai, Bj) = 0 if i <=0, or j <= 0 // LCS(Ai, Bj) + 1 if Ai == Bj // Max(LCS(Ai-1, Bj), LCS(Ai, Bj-1)) if(A[rowIndexOfCache-1] == B[columnIndexOfCache-1]) { DP_LCS_AllPrefixes_Cache[rowIndexOfCache][columnIndexOfCache] = DP_LCS_AllPrefixes_Cache[rowIndexOfCache - 1][columnIndexOfCache - 1] + 1; } else { DP_LCS_AllPrefixes_Cache[rowIndexOfCache][columnIndexOfCache] = Utilities.Max(DP_LCS_AllPrefixes_Cache[rowIndexOfCache - 1][columnIndexOfCache], DP_LCS_AllPrefixes_Cache[rowIndexOfCache][columnIndexOfCache - 1]); } } } return DP_LCS_AllPrefixes_Cache; } TestMethod [TestMethod] public void LCS_Tests() { string A = "GAC", B = "AGCAT"; var DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 2); var lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(lcs_sequences); var diffs = this.GetDiffs(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(diffs); Assert.IsTrue(diffs.Length == 2); Assert.IsTrue(diffs[0].Length == lcs_sequences.Length); Assert.IsTrue(diffs[1].Length == lcs_sequences.Length); Assert.IsTrue(lcs_sequences.Any(s => "AC".Equals(s))); Assert.IsTrue(lcs_sequences.Any(s => "GC".Equals(s))); Assert.IsTrue(lcs_sequences.Any(s => "GA".Equals(s))); var DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 2); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences .Any(s => "AC".Equals(s))); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences .Any(s => "GC".Equals(s))); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences .Any(s => "GA".Equals(s))); A = "ABCDGH"; B = "AEDFHR"; DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 3); lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(lcs_sequences); diffs = this.GetDiffs(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(diffs); Assert.IsTrue(diffs.Length == 2); Assert.IsTrue(diffs[0].Length == lcs_sequences.Length); Assert.IsTrue(diffs[1].Length == lcs_sequences.Length); Assert.IsTrue(lcs_sequences.Any(s => "ADH".Equals(s))); DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 3); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences .Any(s => "ADH".Equals(s))); A = "AGGTAB"; B = "GXTXAYB"; DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 4); lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(lcs_sequences); diffs = this.GetDiffs(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(diffs); Assert.IsTrue(diffs.Length == 2); Assert.IsTrue(diffs[0].Length == 2); Assert.IsTrue(diffs[1].Length == lcs_sequences.Length); Assert.IsTrue(lcs_sequences.Any(s => "GTAB".Equals(s))); DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 4); Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences .Any(s => "GTAB".Equals(s))); A = "ABCDEF"; B = "UVWXYZ"; DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B); Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 0); lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache); diffs = this.GetDiffs(A, B, DP_LCS_AllPrefixes_Cache); Assert.IsNotNull(diffs); Assert.IsTrue(diffs.Length == 2); Assert.IsTrue(diffs[0].Length == 1); Assert.IsTrue(diffs[1].Length == 1); }