// mydiff.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <BaseTsd.h>
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <utility>
#include <vector>
#include <functional>
#include <algorithm>
using namespace std;
bool MyPairCompFirst( pair<int, int> elem1, pair<int, int> elem2 )
{
return elem1.first < elem2.first;
}
bool MyPairCompSecond( pair<int, int> elem1, pair<int, int> elem2 )
{
return elem1.second < elem2.second;
}
int BiSearch(vector< pair<int, int> > &valPairs, int value)
{
int left = 0;
int right = valPairs.size() - 1;
int mid = 0;
while (left <= right)
{
mid = (left + right) / 2;
if (valPairs[mid].first == value)
return mid;
if (valPairs[mid].first < value)
left = mid + 1;
else
right = mid - 1;
}
return -1;
}
int _tmain(int argc, _TCHAR* argv[])
{
vector<int> valsNew;
vector<pair<int, int>> valsOld;
vector<pair<int, int>> valsOldBackup;
if (argc != 3)
{
printf(" Usage:\n");
printf(" mydiff fileNew fileOld\n");
return 0;
} else
{
//read the two input file build the hash table, turn string into values for compare
string line;
ifstream finNew(argv[1]);
if (!finNew)
{
cout << "failed to open file:" << argv[1] << endl;
exit(0);
}
map<string, int> diffLines;
char ch[10001];
while (finNew.getline(ch, 10000))
{
line = ch;
const char* p = line.c_str();
//chop off the leading and ending blank chars
int pos=0;
while (pos < (int)line.size() && (line[pos] == ' ' || line[pos] == '\t')) pos++;
int posLeft = pos;
pos = (int)line.size() - 1;
while (pos >= 0 && (line[pos] == ' ' || line[pos] == '\t')) pos--;
string temp = line.substr(posLeft, pos-posLeft);
//@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
if (diffLines.find(line) == diffLines.end())
diffLines[line] = diffLines.size(); //use size() as the unique value
valsNew.push_back(diffLines[line]);
}
finNew.close();
ifstream finOld(argv[2]);
if (!finOld)
{
cout << "failed to open file:" << argv[2] << endl;
exit(0);
}
while (finOld.getline(ch, 10000))
{
line = ch;
//chop off the leading and ending blank chars
int pos=0;
while (pos < line.size() && (line[pos] == ' ' || line[pos] == '\t')) pos++;
int posLeft = pos;
pos = line.size() - 1;
while (pos >= 0 && (line[pos] == ' ' || line[pos] == '\t')) pos--;
string temp = line.substr(posLeft, pos-posLeft);
//@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
if (diffLines.find(line) == diffLines.end())
diffLines[line] = diffLines.size(); //use size() as the unique value
valsOld.push_back(make_pair(diffLines[line], valsOld.size()));
}
finOld.close();
diffLines.clear();
} //Here diffLines and file handles should be released
valsOldBackup = valsOld;
//use the greedy method, each step we should find the most "RECENT" equal lines
//Sort the old file values, so we can use bi-search against it when we want to compare
sort(valsOld.begin(), valsOld.end(), MyPairCompFirst);
int posNew = 0;
//Search for the next
vector<int> genLinesNew;
vector<int> genLinesOld;
int searchedOldLineCount = 0;
while (posNew < valsNew.size() && valsOld.size() > 0)
{
// get the next equal one
// Calculate the "distance", (leftHand^2 + rightHand^2)
int leftHand = 0;
int rightHand = 0;
INT64 minValueBar = _I64_MAX;
bool bFound = false;
int lastFoundLeftHand = 0;
int lastFoundRightHand = 0;
while (leftHand + posNew < valsNew.size())
{
if (leftHand * leftHand > minValueBar) break; //Found! We can stop now!
int target = valsNew[posNew + leftHand];
int pos = BiSearch(valsOld, target);
if (pos < 0)
{
leftHand++;
continue;
}
//found a match
//get the most "recent" match
int posMin = INT_MAX;
int minPos = pos;
while (pos >= 0 && valsOld[pos].first == target)
{
if (valsOld[pos].second < posMin)
{
posMin = valsOld[pos].second;
minPos = pos;
}
pos--;
}
pos = minPos;
//process the current match
rightHand = valsOld[pos].second - searchedOldLineCount;
if (minValueBar > ((INT64)(leftHand)) * leftHand + rightHand * rightHand)
{
bFound = true;
lastFoundLeftHand = leftHand;
lastFoundRightHand = rightHand;
minValueBar = ((INT64)(leftHand)) * leftHand + rightHand * rightHand;
}
leftHand++;
}
if (bFound)
{
//@@@@@@@
leftHand = lastFoundLeftHand;
rightHand = lastFoundRightHand;
int left1 = posNew;
int left2 = left1 + leftHand;
int right1 = searchedOldLineCount;
int right2 = right1 + rightHand;
//@@@@Add?
//@@@@Delete?
//@@@@Change?
// printf("%d,%d,c,%d,%d,\n", left1, left2, right1, right2);
posNew += leftHand + 1;
//delete the searched old lines from the rest old lines
for (int i=searchedOldLineCount; i<searchedOldLineCount+rightHand+1; i++)
{
int target = valsOldBackup[i].first;
int pos = BiSearch(valsOld, target);
if (pos < 0)
{
printf ("ERROR! this should not happen!, something may be wrong in previous analysis!\n");
printf ("i=[%d], searchedOldLineCount=[%d]\n", i, searchedOldLineCount);
exit(1);
}
int posMin = INT_MAX;
int minPos = pos;
while (pos >= 0 && valsOld[pos].first == target)
{
if (valsOld[pos].second < posMin)
{
posMin = valsOld[pos].second;
minPos = pos;
}
pos--;
}
pos = minPos;
valsOld.erase(valsOld.begin() + pos);
}
searchedOldLineCount += rightHand + 1;
} else
{
//@@@@ should output the rest RightHand and LeftHand values as changed from L->R
printf("%d,%d,c,%d,%d,\n", posNew, valsNew.size() - 1, searchedOldLineCount, valsOldBackup.size() - 1);
break;
}
}
system("PAUSE");
//@@@@@@@@
//print out in batch file format to support console
}